Xen 4.16.2-pre-patchset-04.16.2-pre-patchset-0

Signed-off-by: Florian Schmaus <flow@gentoo.org>
author: Florian Schmaus <flow@gentoo.org> 2022-07-01 20:20:49 +0200
committer: Florian Schmaus <flow@gentoo.org> 2022-07-01 20:26:39 +0200
commit: a16128cfac11b2f6462bbbc993cced2636abb312 (patch)
tree: 19b04c0f36afd341ba8a0ffbedf91ada49a332f1
download: xen-upstream-patches-a16128cfac11b2f6462bbbc993cced2636abb312.tar.gz
xen-upstream-patches-a16128cfac11b2f6462bbbc993cced2636abb312.tar.bz2
xen-upstream-patches-a16128cfac11b2f6462bbbc993cced2636abb312.zip
35 files changed, 3613 insertions, 0 deletions
diff --git a/0001-update-Xen-version-to-4.16.2-pre.patch b/0001-update-Xen-version-to-4.16.2-pre.patch
new file mode 100644
index 0000000..30411de
--- /dev/null
+++ b/0001-update-Xen-version-to-4.16.2-pre.patch
@@ -0,0 +1,25 @@
+From 5be9edb482ab20cf3e7acb05b511465294d1e19b Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 7 Jun 2022 13:55:17 +0200
+Subject: [PATCH 01/32] update Xen version to 4.16.2-pre
+
+---
+ xen/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index 8abc71cf73aa..90a29782dbf4 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -2,7 +2,7 @@
+ # All other places this is stored (eg. compile.h) should be autogenerated.
+ export XEN_VERSION       = 4
+ export XEN_SUBVERSION    = 16
+-export XEN_EXTRAVERSION ?= .1$(XEN_VENDORVERSION)
++export XEN_EXTRAVERSION ?= .2-pre$(XEN_VENDORVERSION)
+ export XEN_FULLVERSION   = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
+ -include xen-version
+ 
+-- 
+2.35.1
+
diff --git a/0002-x86-irq-skip-unmap_domain_pirq-XSM-during-destructio.patch b/0002-x86-irq-skip-unmap_domain_pirq-XSM-during-destructio.patch
new file mode 100644
index 0000000..fc6c2e1
--- /dev/null
+++ b/0002-x86-irq-skip-unmap_domain_pirq-XSM-during-destructio.patch
@@ -0,0 +1,50 @@
+From b58fb6e81bd55b6bd946abc3070770f7994c9ef9 Mon Sep 17 00:00:00 2001
+From: Jason Andryuk <jandryuk@gmail.com>
+Date: Tue, 7 Jun 2022 13:55:39 +0200
+Subject: [PATCH 02/32] x86/irq: skip unmap_domain_pirq XSM during destruction
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+xsm_unmap_domain_irq was seen denying unmap_domain_pirq when called from
+complete_domain_destroy as an RCU callback.  The source context was an
+unexpected, random domain.  Since this is a xen-internal operation,
+going through the XSM hook is inapproriate.
+
+Check d->is_dying and skip the XSM hook when set since this is a cleanup
+operation for a domain being destroyed.
+
+Suggested-by: Roger Pau Monné <roger.pau@citrix.com>
+Signed-off-by: Jason Andryuk <jandryuk@gmail.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 2e6f95a942d1927a53f077c301db0b799c54c05a
+master date: 2022-04-08 14:51:52 +0200
+---
+ xen/arch/x86/irq.c | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
+index 67cbf6b979dc..47b86af5dce9 100644
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -2342,8 +2342,14 @@ int unmap_domain_pirq(struct domain *d, int pirq)
+         nr = msi_desc->msi.nvec;
+     }
+ 
+-    ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq,
+-                               msi_desc ? msi_desc->dev : NULL);
++    /*
++     * When called by complete_domain_destroy via RCU, current is a random
++     * domain.  Skip the XSM check since this is a Xen-initiated action.
++     */
++    if ( !d->is_dying )
++        ret = xsm_unmap_domain_irq(XSM_HOOK, d, irq,
++                                   msi_desc ? msi_desc->dev : NULL);
++
+     if ( ret )
+         goto done;
+ 
+-- 
+2.35.1
+
diff --git a/0003-xen-fix-XEN_DOMCTL_gdbsx_guestmemio-crash.patch b/0003-xen-fix-XEN_DOMCTL_gdbsx_guestmemio-crash.patch
new file mode 100644
index 0000000..905993b
--- /dev/null
+++ b/0003-xen-fix-XEN_DOMCTL_gdbsx_guestmemio-crash.patch
@@ -0,0 +1,63 @@
+From 6c6bbfdff9374ef41f84c4ebed7b8a7a40767ef6 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 7 Jun 2022 13:56:54 +0200
+Subject: [PATCH 03/32] xen: fix XEN_DOMCTL_gdbsx_guestmemio crash
+
+A hypervisor built without CONFIG_GDBSX will crash in case the
+XEN_DOMCTL_gdbsx_guestmemio domctl is being called, as the call will
+end up in iommu_do_domctl() with d == NULL:
+
+  (XEN) CPU:    6
+  (XEN) RIP:    e008:[<ffff82d040269984>] iommu_do_domctl+0x4/0x30
+  (XEN) RFLAGS: 0000000000010202   CONTEXT: hypervisor (d0v0)
+  (XEN) rax: 00000000000003e8   rbx: ffff830856277ef8   rcx: ffff830856277fff
+  ...
+  (XEN) Xen call trace:
+  (XEN)    [<ffff82d040269984>] R iommu_do_domctl+0x4/0x30
+  (XEN)    [<ffff82d04035cd5f>] S arch_do_domctl+0x7f/0x2330
+  (XEN)    [<ffff82d040239e46>] S do_domctl+0xe56/0x1930
+  (XEN)    [<ffff82d040238ff0>] S do_domctl+0/0x1930
+  (XEN)    [<ffff82d0402f8c59>] S pv_hypercall+0x99/0x110
+  (XEN)    [<ffff82d0402f5161>] S arch/x86/pv/domain.c#_toggle_guest_pt+0x11/0x90
+  (XEN)    [<ffff82d040366288>] S lstar_enter+0x128/0x130
+  (XEN)
+  (XEN) Pagetable walk from 0000000000000144:
+  (XEN)  L4[0x000] = 0000000000000000 ffffffffffffffff
+  (XEN)
+  (XEN) ****************************************
+  (XEN) Panic on CPU 6:
+  (XEN) FATAL PAGE FAULT
+  (XEN) [error_code=0000]
+  (XEN) Faulting linear address: 0000000000000144
+  (XEN) ****************************************
+
+It used to be permitted to pass DOMID_IDLE to dbg_rw_mem(), which is why the
+special case skipping the domid checks exists.  Now that it is only permitted
+to pass proper domids, remove the special case, making 'd' always valid.
+
+Reported-by: Cheyenne Wills <cheyenne.wills@gmail.com>
+Fixes: e726a82ca0dc ("xen: make gdbsx support configurable")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: f00daf1fb3213a9b0335d9dcd90fe9cb5c02b7a9
+master date: 2022-04-19 17:07:08 +0100
+---
+ xen/common/domctl.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/xen/common/domctl.c b/xen/common/domctl.c
+index 271862ae587f..419e4070f59d 100644
+--- a/xen/common/domctl.c
++++ b/xen/common/domctl.c
+@@ -304,7 +304,6 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+         if ( op->domain == DOMID_INVALID )
+         {
+     case XEN_DOMCTL_createdomain:
+-    case XEN_DOMCTL_gdbsx_guestmemio:
+             d = NULL;
+             break;
+         }
+-- 
+2.35.1
+
diff --git a/0004-VT-d-refuse-to-use-IOMMU-with-reserved-CAP.ND-value.patch b/0004-VT-d-refuse-to-use-IOMMU-with-reserved-CAP.ND-value.patch
new file mode 100644
index 0000000..c566888
--- /dev/null
+++ b/0004-VT-d-refuse-to-use-IOMMU-with-reserved-CAP.ND-value.patch
@@ -0,0 +1,49 @@
+From b378ee56c7e0bb5eeb35dcc55b3d29e5f50eb566 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 7 Jun 2022 13:58:16 +0200
+Subject: [PATCH 04/32] VT-d: refuse to use IOMMU with reserved CAP.ND value
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The field taking the value 7 (resulting in 18-bit DIDs when using the
+calculation in cap_ndoms(), when the DID fields are only 16 bits wide)
+is reserved. Instead of misbehaving in case we would encounter such an
+IOMMU, refuse to use it.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+master commit: a1545fbf45c689aff39ce76a6eaa609d32ef72a7
+master date: 2022-04-20 10:54:26 +0200
+---
+ xen/drivers/passthrough/vtd/iommu.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
+index 93dd8aa643aa..8975c1de61bc 100644
+--- a/xen/drivers/passthrough/vtd/iommu.c
++++ b/xen/drivers/passthrough/vtd/iommu.c
+@@ -1279,8 +1279,11 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
+ 
+     quirk_iommu_caps(iommu);
+ 
++    nr_dom = cap_ndoms(iommu->cap);
++
+     if ( cap_fault_reg_offset(iommu->cap) +
+          cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE ||
++         ((nr_dom - 1) >> 16) /* I.e. cap.nd > 6 */ ||
+          ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE )
+     {
+         printk(XENLOG_ERR VTDPREFIX "IOMMU: unsupported\n");
+@@ -1305,7 +1308,6 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
+         vtd_ops.sync_cache = sync_cache;
+ 
+     /* allocate domain id bitmap */
+-    nr_dom = cap_ndoms(iommu->cap);
+     iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
+     if ( !iommu->domid_bitmap )
+         return -ENOMEM;
+-- 
+2.35.1
+
diff --git a/0005-x86-mm-avoid-inadvertently-degrading-a-TLB-flush-to-.patch b/0005-x86-mm-avoid-inadvertently-degrading-a-TLB-flush-to-.patch
new file mode 100644
index 0000000..6410aaa
--- /dev/null
+++ b/0005-x86-mm-avoid-inadvertently-degrading-a-TLB-flush-to-.patch
@@ -0,0 +1,116 @@
+From 7c003ab4a398ff4ddd54d15d4158cffb463134cc Mon Sep 17 00:00:00 2001
+From: David Vrabel <dvrabel@amazon.co.uk>
+Date: Tue, 7 Jun 2022 13:59:31 +0200
+Subject: [PATCH 05/32] x86/mm: avoid inadvertently degrading a TLB flush to
+ local only
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+If the direct map is incorrectly modified with interrupts disabled,
+the required TLB flushes are degraded to flushing the local CPU only.
+
+This could lead to very hard to diagnose problems as different CPUs will
+end up with different views of memory. Although, no such issues have yet
+been identified.
+
+Change the check in the flush_area() macro to look at system_state
+instead. This defers the switch from local to all later in the boot
+(see xen/arch/x86/setup.c:__start_xen()). This is fine because
+additional PCPUs are not brought up until after the system state is
+SYS_STATE_smp_boot.
+
+Signed-off-by: David Vrabel <dvrabel@amazon.co.uk>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+
+x86/flushtlb: remove flush_area check on system state
+
+Booting with Shadow Stacks leads to the following assert on a debug
+hypervisor:
+
+Assertion 'local_irq_is_enabled()' failed at arch/x86/smp.c:265
+----[ Xen-4.17.0-10.24-d  x86_64  debug=y  Not tainted ]----
+CPU:    0
+RIP:    e008:[<ffff82d040345300>] flush_area_mask+0x40/0x13e
+[...]
+Xen call trace:
+   [<ffff82d040345300>] R flush_area_mask+0x40/0x13e
+   [<ffff82d040338a40>] F modify_xen_mappings+0xc5/0x958
+   [<ffff82d0404474f9>] F arch/x86/alternative.c#_alternative_instructions+0xb7/0xb9
+   [<ffff82d0404476cc>] F alternative_branches+0xf/0x12
+   [<ffff82d04044e37d>] F __start_xen+0x1ef4/0x2776
+   [<ffff82d040203344>] F __high_start+0x94/0xa0
+
+This is due to SYS_STATE_smp_boot being set before calling
+alternative_branches(), and the flush in modify_xen_mappings() then
+using flush_area_all() with interrupts disabled.  Note that
+alternative_branches() is called before APs are started, so the flush
+must be a local one (and indeed the cpumask passed to
+flush_area_mask() just contains one CPU).
+
+Take the opportunity to simplify a bit the logic and make flush_area()
+an alias of flush_area_all() in mm.c, taking into account that
+cpu_online_map just contains the BSP before APs are started.  This
+requires widening the assert in flush_area_mask() to allow being
+called with interrupts disabled as long as it's strictly a local only
+flush.
+
+The overall result is that a conditional can be removed from
+flush_area().
+
+While there also introduce an ASSERT to check that a vCPU state flush
+is not issued for the local CPU only.
+
+Fixes: 78e072bc37 ('x86/mm: avoid inadvertently degrading a TLB flush to local only')
+Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 78e072bc375043e81691a59454e09f0b38241ddd
+master date: 2022-04-20 10:55:01 +0200
+master commit: 9f735ee4903f1b9f1966bb4ba5b5616b03ae08b5
+master date: 2022-05-25 11:09:46 +0200
+---
+ xen/arch/x86/mm.c  | 10 ++--------
+ xen/arch/x86/smp.c |  5 ++++-
+ 2 files changed, 6 insertions(+), 9 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 4d799032dc82..e222d9aa98ee 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5051,14 +5051,8 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v)
+ #define l1f_to_lNf(f) (((f) & _PAGE_PRESENT) ? ((f) |  _PAGE_PSE) : (f))
+ #define lNf_to_l1f(f) (((f) & _PAGE_PRESENT) ? ((f) & ~_PAGE_PSE) : (f))
+ 
+-/*
+- * map_pages_to_xen() can be called with interrupts disabled during
+- * early bootstrap. In this case it is safe to use flush_area_local()
+- * and avoid locking because only the local CPU is online.
+- */
+-#define flush_area(v,f) (!local_irq_is_enabled() ?              \
+-                         flush_area_local((const void *)v, f) : \
+-                         flush_area_all((const void *)v, f))
++/* flush_area_all() can be used prior to any other CPU being online.  */
++#define flush_area(v, f) flush_area_all((const void *)(v), f)
+ 
+ #define L3T_INIT(page) (page) = ZERO_BLOCK_PTR
+ 
+diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c
+index eef0f9c6cbf4..3556ec116608 100644
+--- a/xen/arch/x86/smp.c
++++ b/xen/arch/x86/smp.c
+@@ -262,7 +262,10 @@ void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
+ {
+     unsigned int cpu = smp_processor_id();
+ 
+-    ASSERT(local_irq_is_enabled());
++    /* Local flushes can be performed with interrupts disabled. */
++    ASSERT(local_irq_is_enabled() || cpumask_subset(mask, cpumask_of(cpu)));
++    /* Exclude use of FLUSH_VCPU_STATE for the local CPU. */
++    ASSERT(!cpumask_test_cpu(cpu, mask) || !(flags & FLUSH_VCPU_STATE));
+ 
+     if ( (flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK)) &&
+          cpumask_test_cpu(cpu, mask) )
+-- 
+2.35.1
+
diff --git a/0006-xen-build-Fix-dependency-for-the-MAP-rule.patch b/0006-xen-build-Fix-dependency-for-the-MAP-rule.patch
new file mode 100644
index 0000000..6489cba
--- /dev/null
+++ b/0006-xen-build-Fix-dependency-for-the-MAP-rule.patch
@@ -0,0 +1,29 @@
+From 4bb8c34ba4241c2bf7845cd8b80c17530dbfb085 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Tue, 7 Jun 2022 14:00:09 +0200
+Subject: [PATCH 06/32] xen/build: Fix dependency for the MAP rule
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Jan Beulich <jbeulich@suse.com>
+master commit: e1e72198213b80b7a82bdc90f96ed05ae4f53e20
+master date: 2022-04-20 19:10:59 +0100
+---
+ xen/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index 90a29782dbf4..ce4eca3ee4d7 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -507,7 +507,7 @@ cscope:
+ 	cscope -k -b -q
+ 
+ .PHONY: _MAP
+-_MAP:
++_MAP: $(TARGET)
+ 	$(NM) -n $(TARGET)-syms | grep -v '\(compiled\)\|\(\.o$$\)\|\( [aUw] \)\|\(\.\.ng$$\)\|\(LASH[RL]DI\)' > System.map
+ 
+ %.o %.i %.s: %.c FORCE
+-- 
+2.35.1
+
diff --git a/0007-tools-libs-evtchn-don-t-set-errno-to-negative-values.patch b/0007-tools-libs-evtchn-don-t-set-errno-to-negative-values.patch
new file mode 100644
index 0000000..2f02fcc
--- /dev/null
+++ b/0007-tools-libs-evtchn-don-t-set-errno-to-negative-values.patch
@@ -0,0 +1,74 @@
+From 13a29f3756bc4cab96c59f46c3875b483553fb8f Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 7 Jun 2022 14:00:31 +0200
+Subject: [PATCH 07/32] tools/libs/evtchn: don't set errno to negative values
+
+Setting errno to a negative value makes no sense.
+
+Fixes: 6b6500b3cbaa ("tools/libs/evtchn: Add support for restricting a handle")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 60245b71c1cd001686fa7b7a26869cbcb80d074c
+master date: 2022-04-22 20:39:34 +0100
+---
+ tools/libs/evtchn/freebsd.c | 2 +-
+ tools/libs/evtchn/minios.c  | 2 +-
+ tools/libs/evtchn/netbsd.c  | 2 +-
+ tools/libs/evtchn/solaris.c | 2 +-
+ 4 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/tools/libs/evtchn/freebsd.c b/tools/libs/evtchn/freebsd.c
+index 7427ab240860..fa17a0f8dbb5 100644
+--- a/tools/libs/evtchn/freebsd.c
++++ b/tools/libs/evtchn/freebsd.c
+@@ -58,7 +58,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce)
+ 
+ int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid)
+ {
+-    errno = -EOPNOTSUPP;
++    errno = EOPNOTSUPP;
+ 
+     return -1;
+ }
+diff --git a/tools/libs/evtchn/minios.c b/tools/libs/evtchn/minios.c
+index e5dfdc5ef52e..c0bd5429eea2 100644
+--- a/tools/libs/evtchn/minios.c
++++ b/tools/libs/evtchn/minios.c
+@@ -97,7 +97,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce)
+ 
+ int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid)
+ {
+-    errno = -EOPNOTSUPP;
++    errno = EOPNOTSUPP;
+ 
+     return -1;
+ }
+diff --git a/tools/libs/evtchn/netbsd.c b/tools/libs/evtchn/netbsd.c
+index 1cebc21ffce0..56409513bc23 100644
+--- a/tools/libs/evtchn/netbsd.c
++++ b/tools/libs/evtchn/netbsd.c
+@@ -53,7 +53,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce)
+ 
+ int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid)
+ {
+-    errno = -EOPNOTSUPP;
++    errno = EOPNOTSUPP;
+ 
+     return -1;
+ }
+diff --git a/tools/libs/evtchn/solaris.c b/tools/libs/evtchn/solaris.c
+index df9579df1778..beaa7721425f 100644
+--- a/tools/libs/evtchn/solaris.c
++++ b/tools/libs/evtchn/solaris.c
+@@ -53,7 +53,7 @@ int osdep_evtchn_close(xenevtchn_handle *xce)
+ 
+ int osdep_evtchn_restrict(xenevtchn_handle *xce, domid_t domid)
+ {
+-    errno = -EOPNOTSUPP;
++    errno = EOPNOTSUPP;
+     return -1;
+ }
+ 
+-- 
+2.35.1
+
diff --git a/0008-tools-libs-ctrl-don-t-set-errno-to-a-negative-value.patch b/0008-tools-libs-ctrl-don-t-set-errno-to-a-negative-value.patch
new file mode 100644
index 0000000..acd7955
--- /dev/null
+++ b/0008-tools-libs-ctrl-don-t-set-errno-to-a-negative-value.patch
@@ -0,0 +1,36 @@
+From ba62afdbc31a8cfe897191efd25ed4449d9acd94 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 7 Jun 2022 14:01:03 +0200
+Subject: [PATCH 08/32] tools/libs/ctrl: don't set errno to a negative value
+
+The claimed reason for setting errno to -1 is wrong. On x86
+xc_domain_pod_target() will set errno to a sane value in the error
+case.
+
+Fixes: ff1745d5882b ("tools: libxl: do not set the PoD target on ARM")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: a0fb7e0e73483ed042d5ca34861a891a51ad337b
+master date: 2022-04-22 20:39:34 +0100
+---
+ tools/libs/ctrl/xc_domain.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
+index b155d6afd2ef..9d675c8f21e1 100644
+--- a/tools/libs/ctrl/xc_domain.c
++++ b/tools/libs/ctrl/xc_domain.c
+@@ -1297,9 +1297,7 @@ int xc_domain_get_pod_target(xc_interface *xch,
+                              uint64_t *pod_cache_pages,
+                              uint64_t *pod_entries)
+ {
+-    /* On x86 (above) xc_domain_pod_target will incorrectly return -1
+-     * with errno==-1 on error. Do the same for least surprise. */
+-    errno = -1;
++    errno = EOPNOTSUPP;
+     return -1;
+ }
+ #endif
+-- 
+2.35.1
+
diff --git a/0009-tools-libs-guest-don-t-set-errno-to-a-negative-value.patch b/0009-tools-libs-guest-don-t-set-errno-to-a-negative-value.patch
new file mode 100644
index 0000000..41eb1f1
--- /dev/null
+++ b/0009-tools-libs-guest-don-t-set-errno-to-a-negative-value.patch
@@ -0,0 +1,32 @@
+From a2cf30eec08db5df974a9e8bb7366fee8fc7fcd9 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 7 Jun 2022 14:01:27 +0200
+Subject: [PATCH 09/32] tools/libs/guest: don't set errno to a negative value
+
+Setting errno to a negative error value makes no sense.
+
+Fixes: cb99a64029c9 ("libxc: arm: allow passing a device tree blob to the guest")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 438e96ab479495a932391a22e219ee62fa8c4f47
+master date: 2022-04-22 20:39:34 +0100
+---
+ tools/libs/guest/xg_dom_core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/libs/guest/xg_dom_core.c b/tools/libs/guest/xg_dom_core.c
+index 2e4c1330ea6b..65975a75da37 100644
+--- a/tools/libs/guest/xg_dom_core.c
++++ b/tools/libs/guest/xg_dom_core.c
+@@ -856,7 +856,7 @@ int xc_dom_devicetree_file(struct xc_dom_image *dom, const char *filename)
+         return -1;
+     return 0;
+ #else
+-    errno = -EINVAL;
++    errno = EINVAL;
+     return -1;
+ #endif
+ }
+-- 
+2.35.1
+
diff --git a/0010-tools-libs-light-don-t-set-errno-to-a-negative-value.patch b/0010-tools-libs-light-don-t-set-errno-to-a-negative-value.patch
new file mode 100644
index 0000000..a83e1cc
--- /dev/null
+++ b/0010-tools-libs-light-don-t-set-errno-to-a-negative-value.patch
@@ -0,0 +1,32 @@
+From 15391de8e2bb6153eadd483154c53044ab53d98d Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 7 Jun 2022 14:01:44 +0200
+Subject: [PATCH 10/32] tools/libs/light: don't set errno to a negative value
+
+Setting errno to a negative value makes no sense.
+
+Fixes: e78e8b9bb649 ("libxl: Add interface for querying hypervisor about PCI topology")
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: 2419a159fb943c24a6f2439604b9fdb1478fcd08
+master date: 2022-04-22 20:39:34 +0100
+---
+ tools/libs/light/libxl_linux.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/libs/light/libxl_linux.c b/tools/libs/light/libxl_linux.c
+index 8d62dfd255cb..27f2bce71837 100644
+--- a/tools/libs/light/libxl_linux.c
++++ b/tools/libs/light/libxl_linux.c
+@@ -288,7 +288,7 @@ int libxl__pci_topology_init(libxl__gc *gc,
+         if (i == num_devs) {
+             LOG(ERROR, "Too many devices");
+             err = ERROR_FAIL;
+-            errno = -ENOSPC;
++            errno = ENOSPC;
+             goto out;
+         }
+ 
+-- 
+2.35.1
+
diff --git a/0011-xen-iommu-cleanup-iommu-related-domctl-handling.patch b/0011-xen-iommu-cleanup-iommu-related-domctl-handling.patch
new file mode 100644
index 0000000..b62ae9b
--- /dev/null
+++ b/0011-xen-iommu-cleanup-iommu-related-domctl-handling.patch
@@ -0,0 +1,112 @@
+From a6c32abd144ec6443c6a433b5a2ac00e2615aa86 Mon Sep 17 00:00:00 2001
+From: Juergen Gross <jgross@suse.com>
+Date: Tue, 7 Jun 2022 14:02:08 +0200
+Subject: [PATCH 11/32] xen/iommu: cleanup iommu related domctl handling
+
+Today iommu_do_domctl() is being called from arch_do_domctl() in the
+"default:" case of a switch statement. This has led already to crashes
+due to unvalidated parameters.
+
+Fix that by moving the call of iommu_do_domctl() to the main switch
+statement of do_domctl().
+
+Signed-off-by: Juergen Gross <jgross@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> # Arm
+master commit: 9cd7e31b3f584e97a138a770cfb031a91a867936
+master date: 2022-04-26 10:23:58 +0200
+---
+ xen/arch/arm/domctl.c   | 11 +----------
+ xen/arch/x86/domctl.c   |  2 +-
+ xen/common/domctl.c     |  7 +++++++
+ xen/include/xen/iommu.h | 12 +++++++++---
+ 4 files changed, 18 insertions(+), 14 deletions(-)
+
+diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
+index 6245af6d0bab..1baf25c3d98b 100644
+--- a/xen/arch/arm/domctl.c
++++ b/xen/arch/arm/domctl.c
+@@ -176,16 +176,7 @@ long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
+         return rc;
+     }
+     default:
+-    {
+-        int rc;
+-
+-        rc = subarch_do_domctl(domctl, d, u_domctl);
+-
+-        if ( rc == -ENOSYS )
+-            rc = iommu_do_domctl(domctl, d, u_domctl);
+-
+-        return rc;
+-    }
++        return subarch_do_domctl(domctl, d, u_domctl);
+     }
+ }
+ 
+diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
+index 7d102e0647ec..0fa51f2ebd10 100644
+--- a/xen/arch/x86/domctl.c
++++ b/xen/arch/x86/domctl.c
+@@ -1380,7 +1380,7 @@ long arch_do_domctl(
+         break;
+ 
+     default:
+-        ret = iommu_do_domctl(domctl, d, u_domctl);
++        ret = -ENOSYS;
+         break;
+     }
+ 
+diff --git a/xen/common/domctl.c b/xen/common/domctl.c
+index 419e4070f59d..65d2a4588b71 100644
+--- a/xen/common/domctl.c
++++ b/xen/common/domctl.c
+@@ -870,6 +870,13 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
+             copyback = 1;
+         break;
+ 
++    case XEN_DOMCTL_assign_device:
++    case XEN_DOMCTL_test_assign_device:
++    case XEN_DOMCTL_deassign_device:
++    case XEN_DOMCTL_get_device_group:
++        ret = iommu_do_domctl(op, d, u_domctl);
++        break;
++
+     default:
+         ret = arch_do_domctl(op, d, u_domctl);
+         break;
+diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
+index 92b2d23f0ba2..861579562e8a 100644
+--- a/xen/include/xen/iommu.h
++++ b/xen/include/xen/iommu.h
+@@ -342,8 +342,17 @@ struct domain_iommu {
+ /* Does the IOMMU pagetable need to be kept synchronized with the P2M */
+ #ifdef CONFIG_HAS_PASSTHROUGH
+ #define need_iommu_pt_sync(d)     (dom_iommu(d)->need_sync)
++
++int iommu_do_domctl(struct xen_domctl *domctl, struct domain *d,
++                    XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl);
+ #else
+ #define need_iommu_pt_sync(d)     ({ (void)(d); false; })
++
++static inline int iommu_do_domctl(struct xen_domctl *domctl, struct domain *d,
++                                  XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
++{
++    return -ENOSYS;
++}
+ #endif
+ 
+ int __must_check iommu_suspend(void);
+@@ -357,9 +366,6 @@ int iommu_do_pci_domctl(struct xen_domctl *, struct domain *d,
+                         XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
+ #endif
+ 
+-int iommu_do_domctl(struct xen_domctl *, struct domain *d,
+-                    XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
+-
+ void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev);
+ 
+ /*
+-- 
+2.35.1
+
diff --git a/0012-IOMMU-make-domctl-handler-tolerate-NULL-domain.patch b/0012-IOMMU-make-domctl-handler-tolerate-NULL-domain.patch
new file mode 100644
index 0000000..ff26651
--- /dev/null
+++ b/0012-IOMMU-make-domctl-handler-tolerate-NULL-domain.patch
@@ -0,0 +1,36 @@
+From 4cf9a7c7bdb9d544fbac81105bbc1059ba3dd932 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 7 Jun 2022 14:02:30 +0200
+Subject: [PATCH 12/32] IOMMU: make domctl handler tolerate NULL domain
+
+Besides the reporter's issue of hitting a NULL deref when !CONFIG_GDBSX,
+XEN_DOMCTL_test_assign_device can legitimately end up having NULL passed
+here, when the domctl was passed DOMID_INVALID.
+
+Fixes: 71e617a6b8f6 ("use is_iommu_enabled() where appropriate...")
+Reported-by: Cheyenne Wills <cheyenne.wills@gmail.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+Reviewed-by: Juergen Gross <jgross@suse.com>
+master commit: fa4d84e6dd3c3bfd23a525b75a5483d4ce15adbb
+master date: 2022-04-26 10:25:54 +0200
+---
+ xen/drivers/passthrough/iommu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
+index caaba62c8865..287f63fc736f 100644
+--- a/xen/drivers/passthrough/iommu.c
++++ b/xen/drivers/passthrough/iommu.c
+@@ -535,7 +535,7 @@ int iommu_do_domctl(
+ {
+     int ret = -ENODEV;
+ 
+-    if ( !is_iommu_enabled(d) )
++    if ( !(d ? is_iommu_enabled(d) : iommu_enabled) )
+         return -EOPNOTSUPP;
+ 
+ #ifdef CONFIG_HAS_PCI
+-- 
+2.35.1
+
diff --git a/0013-IOMMU-x86-disallow-device-assignment-to-PoD-guests.patch b/0013-IOMMU-x86-disallow-device-assignment-to-PoD-guests.patch
new file mode 100644
index 0000000..efadef6
--- /dev/null
+++ b/0013-IOMMU-x86-disallow-device-assignment-to-PoD-guests.patch
@@ -0,0 +1,229 @@
+From 838f6c211f7f05f107e1acdfb0977ab61ec0bf2e Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 7 Jun 2022 14:03:20 +0200
+Subject: [PATCH 13/32] IOMMU/x86: disallow device assignment to PoD guests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+While it is okay for IOMMU page tables to be set up for guests starting
+in PoD mode, actual device assignment may only occur once all PoD
+entries have been removed from the P2M. So far this was enforced only
+for boot-time assignment, and only in the tool stack.
+
+Also use the new function to replace p2m_pod_entry_count(): Its unlocked
+access to p2m->pod.entry_count wasn't really okay (irrespective of the
+result being stale by the time the caller gets to see it). Nor was the
+use of that function in line with the immediately preceding comment: A
+PoD guest isn't just one with a non-zero entry count, but also one with
+a non-empty cache (e.g. prior to actually launching the guest).
+
+To allow the tool stack to see a consistent snapshot of PoD state, move
+the tail of XENMEM_{get,set}_pod_target handling into a function, adding
+proper locking there.
+
+In libxl take the liberty to use the new local variable r also for a
+pre-existing call into libxc.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: ad4312d764e8b40a1e45b64aac6d840a60c59f13
+master date: 2022-05-02 08:48:02 +0200
+---
+ xen/arch/x86/mm.c                   |  6 +---
+ xen/arch/x86/mm/p2m-pod.c           | 43 ++++++++++++++++++++++++++++-
+ xen/common/vm_event.c               |  2 +-
+ xen/drivers/passthrough/x86/iommu.c |  3 +-
+ xen/include/asm-x86/p2m.h           | 21 +++++++-------
+ 5 files changed, 57 insertions(+), 18 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index e222d9aa98ee..4ee2de11051d 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -4777,7 +4777,6 @@ long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
+     {
+         xen_pod_target_t target;
+         struct domain *d;
+-        struct p2m_domain *p2m;
+ 
+         if ( copy_from_guest(&target, arg, 1) )
+             return -EFAULT;
+@@ -4812,10 +4811,7 @@ long arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
+         }
+         else if ( rc >= 0 )
+         {
+-            p2m = p2m_get_hostp2m(d);
+-            target.tot_pages       = domain_tot_pages(d);
+-            target.pod_cache_pages = p2m->pod.count;
+-            target.pod_entries     = p2m->pod.entry_count;
++            p2m_pod_get_mem_target(d, &target);
+ 
+             if ( __copy_to_guest(arg, &target, 1) )
+             {
+diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c
+index d8d1a0ce7ed7..a3c9d8a97423 100644
+--- a/xen/arch/x86/mm/p2m-pod.c
++++ b/xen/arch/x86/mm/p2m-pod.c
+@@ -20,6 +20,7 @@
+  */
+ 
+ #include <xen/event.h>
++#include <xen/iocap.h>
+ #include <xen/ioreq.h>
+ #include <xen/mm.h>
+ #include <xen/sched.h>
+@@ -362,7 +363,10 @@ p2m_pod_set_mem_target(struct domain *d, unsigned long target)
+ 
+     ASSERT( pod_target >= p2m->pod.count );
+ 
+-    ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
++    if ( has_arch_pdevs(d) || cache_flush_permitted(d) )
++        ret = -ENOTEMPTY;
++    else
++        ret = p2m_pod_set_cache_target(p2m, pod_target, 1/*preemptible*/);
+ 
+ out:
+     pod_unlock(p2m);
+@@ -370,6 +374,23 @@ out:
+     return ret;
+ }
+ 
++void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t *target)
++{
++    struct p2m_domain *p2m = p2m_get_hostp2m(d);
++
++    ASSERT(is_hvm_domain(d));
++
++    pod_lock(p2m);
++    lock_page_alloc(p2m);
++
++    target->tot_pages       = domain_tot_pages(d);
++    target->pod_cache_pages = p2m->pod.count;
++    target->pod_entries     = p2m->pod.entry_count;
++
++    unlock_page_alloc(p2m);
++    pod_unlock(p2m);
++}
++
+ int p2m_pod_empty_cache(struct domain *d)
+ {
+     struct p2m_domain *p2m = p2m_get_hostp2m(d);
+@@ -1387,6 +1408,9 @@ guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
+     if ( !paging_mode_translate(d) )
+         return -EINVAL;
+ 
++    if ( has_arch_pdevs(d) || cache_flush_permitted(d) )
++        return -ENOTEMPTY;
++
+     do {
+         rc = mark_populate_on_demand(d, gfn, chunk_order);
+ 
+@@ -1408,3 +1432,20 @@ void p2m_pod_init(struct p2m_domain *p2m)
+     for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i )
+         p2m->pod.mrp.list[i] = gfn_x(INVALID_GFN);
+ }
++
++bool p2m_pod_active(const struct domain *d)
++{
++    struct p2m_domain *p2m;
++    bool res;
++
++    if ( !is_hvm_domain(d) )
++        return false;
++
++    p2m = p2m_get_hostp2m(d);
++
++    pod_lock(p2m);
++    res = p2m->pod.entry_count | p2m->pod.count;
++    pod_unlock(p2m);
++
++    return res;
++}
+diff --git a/xen/common/vm_event.c b/xen/common/vm_event.c
+index 70ab3ba406ff..21d2f0edf727 100644
+--- a/xen/common/vm_event.c
++++ b/xen/common/vm_event.c
+@@ -639,7 +639,7 @@ int vm_event_domctl(struct domain *d, struct xen_domctl_vm_event_op *vec)
+ 
+             rc = -EXDEV;
+             /* Disallow paging in a PoD guest */
+-            if ( p2m_pod_entry_count(p2m_get_hostp2m(d)) )
++            if ( p2m_pod_active(d) )
+                 break;
+ 
+             /* domain_pause() not required here, see XSA-99 */
+diff --git a/xen/drivers/passthrough/x86/iommu.c b/xen/drivers/passthrough/x86/iommu.c
+index a36a6bd4b249..dc9936e16930 100644
+--- a/xen/drivers/passthrough/x86/iommu.c
++++ b/xen/drivers/passthrough/x86/iommu.c
+@@ -502,11 +502,12 @@ bool arch_iommu_use_permitted(const struct domain *d)
+ {
+     /*
+      * Prevent device assign if mem paging, mem sharing or log-dirty
+-     * have been enabled for this domain.
++     * have been enabled for this domain, or if PoD is still in active use.
+      */
+     return d == dom_io ||
+            (likely(!mem_sharing_enabled(d)) &&
+             likely(!mem_paging_enabled(d)) &&
++            likely(!p2m_pod_active(d)) &&
+             likely(!p2m_get_hostp2m(d)->global_logdirty));
+ }
+ 
+diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
+index 357a8087481e..f2af7a746ced 100644
+--- a/xen/include/asm-x86/p2m.h
++++ b/xen/include/asm-x86/p2m.h
+@@ -661,6 +661,12 @@ int p2m_pod_empty_cache(struct domain *d);
+  * domain matches target */
+ int p2m_pod_set_mem_target(struct domain *d, unsigned long target);
+ 
++/* Obtain a consistent snapshot of PoD related domain state. */
++void p2m_pod_get_mem_target(const struct domain *d, xen_pod_target_t *target);
++
++/* Check whether PoD is (still) active in a domain. */
++bool p2m_pod_active(const struct domain *d);
++
+ /* Scan pod cache when offline/broken page triggered */
+ int
+ p2m_pod_offline_or_broken_hit(struct page_info *p);
+@@ -669,11 +675,6 @@ p2m_pod_offline_or_broken_hit(struct page_info *p);
+ void
+ p2m_pod_offline_or_broken_replace(struct page_info *p);
+ 
+-static inline long p2m_pod_entry_count(const struct p2m_domain *p2m)
+-{
+-    return p2m->pod.entry_count;
+-}
+-
+ void p2m_pod_init(struct p2m_domain *p2m);
+ 
+ #else
+@@ -689,6 +690,11 @@ static inline int p2m_pod_empty_cache(struct domain *d)
+     return 0;
+ }
+ 
++static inline bool p2m_pod_active(const struct domain *d)
++{
++    return false;
++}
++
+ static inline int p2m_pod_offline_or_broken_hit(struct page_info *p)
+ {
+     return 0;
+@@ -699,11 +705,6 @@ static inline void p2m_pod_offline_or_broken_replace(struct page_info *p)
+     ASSERT_UNREACHABLE();
+ }
+ 
+-static inline long p2m_pod_entry_count(const struct p2m_domain *p2m)
+-{
+-    return 0;
+-}
+-
+ static inline void p2m_pod_init(struct p2m_domain *p2m) {}
+ 
+ #endif
+-- 
+2.35.1
+
diff --git a/0014-x86-msr-handle-reads-to-MSR_P5_MC_-ADDR-TYPE.patch b/0014-x86-msr-handle-reads-to-MSR_P5_MC_-ADDR-TYPE.patch
new file mode 100644
index 0000000..09f56f5
--- /dev/null
+++ b/0014-x86-msr-handle-reads-to-MSR_P5_MC_-ADDR-TYPE.patch
@@ -0,0 +1,121 @@
+From 9ebe2ba83644ec6cd33a93c68dab5f551adcbea0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 7 Jun 2022 14:04:16 +0200
+Subject: [PATCH 14/32] x86/msr: handle reads to MSR_P5_MC_{ADDR,TYPE}
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Windows Server 2019 Essentials will unconditionally attempt to read
+P5_MC_ADDR MSR at boot and throw a BSOD if injected a #GP.
+
+Fix this by mapping MSR_P5_MC_{ADDR,TYPE} to
+MSR_IA32_MCi_{ADDR,STATUS}, as reported also done by hardware in Intel
+SDM "Mapping of the Pentium Processor Machine-Check Errors to the
+Machine-Check Architecture" section.
+
+Reported-by: Steffen Einsle <einsle@phptrix.de>
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: ce59e472b581e4923f6892172dde62b88c8aa8b7
+master date: 2022-05-02 08:49:12 +0200
+---
+ xen/arch/x86/cpu/mcheck/mce.h       |  6 ++++++
+ xen/arch/x86/cpu/mcheck/mce_intel.c | 19 +++++++++++++++++++
+ xen/arch/x86/cpu/mcheck/vmce.c      |  2 ++
+ xen/arch/x86/msr.c                  |  2 ++
+ xen/include/asm-x86/msr-index.h     |  3 +++
+ 5 files changed, 32 insertions(+)
+
+diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h
+index 195362691904..192315ecfa3d 100644
+--- a/xen/arch/x86/cpu/mcheck/mce.h
++++ b/xen/arch/x86/cpu/mcheck/mce.h
+@@ -169,6 +169,12 @@ static inline int mce_vendor_bank_msr(const struct vcpu *v, uint32_t msr)
+         if (msr >= MSR_IA32_MC0_CTL2 &&
+             msr < MSR_IA32_MCx_CTL2(v->arch.vmce.mcg_cap & MCG_CAP_COUNT) )
+             return 1;
++        fallthrough;
++
++    case X86_VENDOR_CENTAUR:
++    case X86_VENDOR_SHANGHAI:
++        if (msr == MSR_P5_MC_ADDR || msr == MSR_P5_MC_TYPE)
++            return 1;
+         break;
+ 
+     case X86_VENDOR_AMD:
+diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c
+index bb9f3a3ff795..d364e9bf5ad1 100644
+--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
++++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
+@@ -1001,8 +1001,27 @@ int vmce_intel_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
+ 
+ int vmce_intel_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+ {
++    const struct cpuid_policy *cp = v->domain->arch.cpuid;
+     unsigned int bank = msr - MSR_IA32_MC0_CTL2;
+ 
++    switch ( msr )
++    {
++    case MSR_P5_MC_ADDR:
++        /*
++         * Bank 0 is used for the 'bank 0 quirk' on older processors.
++         * See vcpu_fill_mc_msrs() for reference.
++         */
++        *val = v->arch.vmce.bank[1].mci_addr;
++        return 1;
++
++    case MSR_P5_MC_TYPE:
++        *val = v->arch.vmce.bank[1].mci_status;
++        return 1;
++    }
++
++    if ( !(cp->x86_vendor & X86_VENDOR_INTEL) )
++        return 0;
++
+     if ( bank < GUEST_MC_BANK_NUM )
+     {
+         *val = v->arch.vmce.bank[bank].mci_ctl2;
+diff --git a/xen/arch/x86/cpu/mcheck/vmce.c b/xen/arch/x86/cpu/mcheck/vmce.c
+index eb6434a3ba20..0899df58bcbf 100644
+--- a/xen/arch/x86/cpu/mcheck/vmce.c
++++ b/xen/arch/x86/cpu/mcheck/vmce.c
+@@ -150,6 +150,8 @@ static int bank_mce_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+     default:
+         switch ( boot_cpu_data.x86_vendor )
+         {
++        case X86_VENDOR_CENTAUR:
++        case X86_VENDOR_SHANGHAI:
+         case X86_VENDOR_INTEL:
+             ret = vmce_intel_rdmsr(v, msr, val);
+             break;
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index aaedb2c31287..da305c7aa4c9 100644
+--- a/xen/arch/x86/msr.c
++++ b/xen/arch/x86/msr.c
+@@ -282,6 +282,8 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t *val)
+         *val = msrs->misc_features_enables.raw;
+         break;
+ 
++    case MSR_P5_MC_ADDR:
++    case MSR_P5_MC_TYPE:
+     case MSR_IA32_MCG_CAP     ... MSR_IA32_MCG_CTL:      /* 0x179 -> 0x17b */
+     case MSR_IA32_MCx_CTL2(0) ... MSR_IA32_MCx_CTL2(31): /* 0x280 -> 0x29f */
+     case MSR_IA32_MCx_CTL(0)  ... MSR_IA32_MCx_MISC(31): /* 0x400 -> 0x47f */
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index 3e038db618ff..31964b88af7a 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -15,6 +15,9 @@
+  * abbreviated name.  Exceptions will be considered on a case-by-case basis.
+  */
+ 
++#define MSR_P5_MC_ADDR                      0
++#define MSR_P5_MC_TYPE                      0x00000001
++
+ #define MSR_APIC_BASE                       0x0000001b
+ #define  APIC_BASE_BSP                      (_AC(1, ULL) <<  8)
+ #define  APIC_BASE_EXTD                     (_AC(1, ULL) << 10)
+-- 
+2.35.1
+
diff --git a/0015-kconfig-detect-LD-implementation.patch b/0015-kconfig-detect-LD-implementation.patch
new file mode 100644
index 0000000..f2fc24a
--- /dev/null
+++ b/0015-kconfig-detect-LD-implementation.patch
@@ -0,0 +1,46 @@
+From 3754bd128d1a6b3d5864d1a3ee5d27b67d35387a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 7 Jun 2022 14:05:06 +0200
+Subject: [PATCH 15/32] kconfig: detect LD implementation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Detect GNU and LLVM ld implementations. This is required for further
+patches that will introduce diverging behaviour depending on the
+linker implementation in use.
+
+Note that LLVM ld returns "compatible with GNU linkers" as part of the
+version string, so be on the safe side and use '^' to only match at
+the start of the line in case LLVM ever decides to change the text to
+use "compatible with GNU ld" instead.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Michal Orzel <michal.orzel@arm.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+master commit: c70c4b624f85f7d4e28c70a804a0a3f20d73092b
+master date: 2022-05-02 08:50:39 +0200
+---
+ xen/Kconfig | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/xen/Kconfig b/xen/Kconfig
+index bcbd2758e5d3..0c89afd50fcf 100644
+--- a/xen/Kconfig
++++ b/xen/Kconfig
+@@ -23,6 +23,12 @@ config CLANG_VERSION
+ 	int
+ 	default $(shell,$(BASEDIR)/scripts/clang-version.sh $(CC))
+ 
++config LD_IS_GNU
++	def_bool $(success,$(LD) --version | head -n 1 | grep -q "^GNU ld")
++
++config LD_IS_LLVM
++	def_bool $(success,$(LD) --version | head -n 1 | grep -q "^LLD")
++
+ # -fvisibility=hidden reduces -fpic cost, if it's available
+ config CC_HAS_VISIBILITY_ATTRIBUTE
+ 	def_bool $(cc-option,-fvisibility=hidden)
+-- 
+2.35.1
+
diff --git a/0016-linker-lld-do-not-generate-quoted-section-names.patch b/0016-linker-lld-do-not-generate-quoted-section-names.patch
new file mode 100644
index 0000000..a42083e
--- /dev/null
+++ b/0016-linker-lld-do-not-generate-quoted-section-names.patch
@@ -0,0 +1,54 @@
+From 88b653f73928117461dc250acd1e830a47a14c2b Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Tue, 7 Jun 2022 14:05:24 +0200
+Subject: [PATCH 16/32] linker/lld: do not generate quoted section names
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+LLVM LD doesn't strip the quotes from the section names, and so the
+resulting binary ends up with section names like:
+
+  [ 1] ".text"           PROGBITS         ffff82d040200000  00008000
+       000000000018cbc1  0000000000000000  AX       0     0     4096
+
+This confuses some tools (like gdb) and prevents proper parsing of the
+binary.
+
+The issue has already been reported and is being fixed in LLD.  In
+order to workaround this issue and keep the GNU ld support define
+different DECL_SECTION macros depending on the used ld
+implementation.
+
+Drop the quotes from the definitions of the debug sections in
+DECL_DEBUG{2}, as those quotes are not required for GNU ld either.
+
+Fixes: 6254920587c3 ('x86: quote section names when defining them in linker script')
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 702c9a800eb3ecd4b8595998d37a769d470c5bb0
+master date: 2022-05-02 08:51:45 +0200
+---
+ xen/arch/x86/xen.lds.S | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
+index 4c58f3209c3d..bc9b9651b192 100644
+--- a/xen/arch/x86/xen.lds.S
++++ b/xen/arch/x86/xen.lds.S
+@@ -18,7 +18,11 @@ ENTRY(efi_start)
+ #else /* !EFI */
+ 
+ #define FORMAT "elf64-x86-64"
+-#define DECL_SECTION(x) #x : AT(ADDR(#x) - __XEN_VIRT_START)
++#ifdef CONFIG_LD_IS_GNU
++# define DECL_SECTION(x) x : AT(ADDR(#x) - __XEN_VIRT_START)
++#else
++# define DECL_SECTION(x) x : AT(ADDR(x) - __XEN_VIRT_START)
++#endif
+ 
+ ENTRY(start_pa)
+ 
+-- 
+2.35.1
+
diff --git a/0017-xen-io-Fix-race-between-sending-an-I-O-and-domain-sh.patch b/0017-xen-io-Fix-race-between-sending-an-I-O-and-domain-sh.patch
new file mode 100644
index 0000000..d226e97
--- /dev/null
+++ b/0017-xen-io-Fix-race-between-sending-an-I-O-and-domain-sh.patch
@@ -0,0 +1,142 @@
+From 982a314bd3000a16c3128afadb36a8ff41029adc Mon Sep 17 00:00:00 2001
+From: Julien Grall <jgrall@amazon.com>
+Date: Tue, 7 Jun 2022 14:06:11 +0200
+Subject: [PATCH 17/32] xen: io: Fix race between sending an I/O and domain
+ shutdown
+
+Xen provides hypercalls to shutdown (SCHEDOP_shutdown{,_code}) and
+resume a domain (XEN_DOMCTL_resumedomain). They can be used for checkpoint
+where the expectation is the domain should continue as nothing happened
+afterwards.
+
+hvmemul_do_io() and handle_pio() will act differently if the return
+code of hvm_send_ioreq() (resp. hvmemul_do_pio_buffer()) is X86EMUL_RETRY.
+
+In this case, the I/O state will be reset to STATE_IOREQ_NONE (i.e
+no I/O is pending) and/or the PC will not be advanced.
+
+If the shutdown request happens right after the I/O was sent to the
+IOREQ, then emulation code will end up to re-execute the instruction
+and therefore forward again the same I/O (at least when reading IO port).
+
+This would be problem if the access has a side-effect. A dumb example,
+is a device implementing a counter which is incremented by one for every
+access. When running shutdown/resume in a loop, the value read by the
+OS may not be the old value + 1.
+
+Add an extra boolean in the structure hvm_vcpu_io to indicate whether
+the I/O was suspended. This is then used in place of checking the domain
+is shutting down in hvmemul_do_io() and handle_pio() as they should
+act on suspend (i.e. vcpu_start_shutdown_deferral() returns false) rather
+than shutdown.
+
+Signed-off-by: Julien Grall <jgrall@amazon.com>
+Reviewed-by: Paul Durrant <paul@xen.org>
+master commit: b7e0d8978810b534725e94a321736496928f00a5
+master date: 2022-05-06 17:16:22 +0100
+---
+ xen/arch/arm/ioreq.c       | 3 ++-
+ xen/arch/x86/hvm/emulate.c | 3 ++-
+ xen/arch/x86/hvm/io.c      | 7 ++++---
+ xen/common/ioreq.c         | 4 ++++
+ xen/include/xen/sched.h    | 5 +++++
+ 5 files changed, 17 insertions(+), 5 deletions(-)
+
+diff --git a/xen/arch/arm/ioreq.c b/xen/arch/arm/ioreq.c
+index 308650b40051..fbccef212bf1 100644
+--- a/xen/arch/arm/ioreq.c
++++ b/xen/arch/arm/ioreq.c
+@@ -80,9 +80,10 @@ enum io_state try_fwd_ioserv(struct cpu_user_regs *regs,
+         return IO_ABORT;
+ 
+     vio->req = p;
++    vio->suspended = false;
+ 
+     rc = ioreq_send(s, &p, 0);
+-    if ( rc != IO_RETRY || v->domain->is_shutting_down )
++    if ( rc != IO_RETRY || vio->suspended )
+         vio->req.state = STATE_IOREQ_NONE;
+     else if ( !ioreq_needs_completion(&vio->req) )
+         rc = IO_HANDLED;
+diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
+index 76a2ccfafe23..7da348b5d486 100644
+--- a/xen/arch/x86/hvm/emulate.c
++++ b/xen/arch/x86/hvm/emulate.c
+@@ -239,6 +239,7 @@ static int hvmemul_do_io(
+     ASSERT(p.count);
+ 
+     vio->req = p;
++    vio->suspended = false;
+ 
+     rc = hvm_io_intercept(&p);
+ 
+@@ -334,7 +335,7 @@ static int hvmemul_do_io(
+         else
+         {
+             rc = ioreq_send(s, &p, 0);
+-            if ( rc != X86EMUL_RETRY || currd->is_shutting_down )
++            if ( rc != X86EMUL_RETRY || vio->suspended )
+                 vio->req.state = STATE_IOREQ_NONE;
+             else if ( !ioreq_needs_completion(&vio->req) )
+                 rc = X86EMUL_OKAY;
+diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
+index 93f1d1503fa6..80915f27e488 100644
+--- a/xen/arch/x86/hvm/io.c
++++ b/xen/arch/x86/hvm/io.c
+@@ -138,10 +138,11 @@ bool handle_pio(uint16_t port, unsigned int size, int dir)
+ 
+     case X86EMUL_RETRY:
+         /*
+-         * We should not advance RIP/EIP if the domain is shutting down or
+-         * if X86EMUL_RETRY has been returned by an internal handler.
++         * We should not advance RIP/EIP if the vio was suspended (e.g.
++         * because the domain is shutting down) or if X86EMUL_RETRY has
++         * been returned by an internal handler.
+          */
+-        if ( curr->domain->is_shutting_down || !vcpu_ioreq_pending(curr) )
++        if ( vio->suspended || !vcpu_ioreq_pending(curr) )
+             return false;
+         break;
+ 
+diff --git a/xen/common/ioreq.c b/xen/common/ioreq.c
+index d732dc045df9..42414b750bef 100644
+--- a/xen/common/ioreq.c
++++ b/xen/common/ioreq.c
+@@ -1256,6 +1256,7 @@ int ioreq_send(struct ioreq_server *s, ioreq_t *proto_p,
+     struct vcpu *curr = current;
+     struct domain *d = curr->domain;
+     struct ioreq_vcpu *sv;
++    struct vcpu_io *vio = &curr->io;
+ 
+     ASSERT(s);
+ 
+@@ -1263,7 +1264,10 @@ int ioreq_send(struct ioreq_server *s, ioreq_t *proto_p,
+         return ioreq_send_buffered(s, proto_p);
+ 
+     if ( unlikely(!vcpu_start_shutdown_deferral(curr)) )
++    {
++        vio->suspended = true;
+         return IOREQ_STATUS_RETRY;
++    }
+ 
+     list_for_each_entry ( sv,
+                           &s->ioreq_vcpu_list,
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index 28146ee404e6..9671062360ac 100644
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -159,6 +159,11 @@ enum vio_completion {
+ struct vcpu_io {
+     /* I/O request in flight to device model. */
+     enum vio_completion  completion;
++    /*
++     * Indicate whether the I/O was not handled because the domain
++     * is about to be paused.
++     */
++    bool                 suspended;
+     ioreq_t              req;
+ };
+ 
+-- 
+2.35.1
+
diff --git a/0018-build-suppress-GNU-ld-warning-about-RWX-load-segment.patch b/0018-build-suppress-GNU-ld-warning-about-RWX-load-segment.patch
new file mode 100644
index 0000000..87a0873
--- /dev/null
+++ b/0018-build-suppress-GNU-ld-warning-about-RWX-load-segment.patch
@@ -0,0 +1,35 @@
+From 4890031d224262a6cf43d3bef1af4a16c13db306 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 7 Jun 2022 14:06:51 +0200
+Subject: [PATCH 18/32] build: suppress GNU ld warning about RWX load segments
+
+We cannot really avoid such and we're also not really at risk because of
+them, as we control page table permissions ourselves rather than relying
+on a loader of some sort. Present GNU ld master started warning about
+such, and hence 2.39 is anticipated to have this warning.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+master commit: 68f5aac012b9ae36ce9b65d9ca9cc9f232191ad3
+master date: 2022-05-18 11:17:19 +0200
+---
+ xen/Makefile | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index ce4eca3ee4d7..4d9abe704628 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -260,6 +260,8 @@ endif
+ 
+ AFLAGS += -D__ASSEMBLY__
+ 
++LDFLAGS-$(call ld-option,--warn-rwx-segments) += --no-warn-rwx-segments
++
+ CFLAGS += $(CFLAGS-y)
+ # allow extra CFLAGS externally via EXTRA_CFLAGS_XEN_CORE
+ CFLAGS += $(EXTRA_CFLAGS_XEN_CORE)
+-- 
+2.35.1
+
diff --git a/0019-build-silence-GNU-ld-warning-about-executable-stacks.patch b/0019-build-silence-GNU-ld-warning-about-executable-stacks.patch
new file mode 100644
index 0000000..75e9f7e
--- /dev/null
+++ b/0019-build-silence-GNU-ld-warning-about-executable-stacks.patch
@@ -0,0 +1,35 @@
+From 1bc669a568a9f4bdab9e9ddb95823ba370dc0baf Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 7 Jun 2022 14:07:11 +0200
+Subject: [PATCH 19/32] build: silence GNU ld warning about executable stacks
+
+While for C files the compiler is supposed to arrange for emitting
+respective information, for assembly sources we're responsible ourselves.
+Present GNU ld master started warning about such, and hence 2.39 is
+anticipated to have this warning.
+
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Acked-by: Julien Grall <jgrall@amazon.com>
+master commit: 62d22296a95d259c934ca2f39ac511d729cfbb68
+master date: 2022-05-18 11:18:45 +0200
+---
+ xen/Makefile | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/xen/Makefile b/xen/Makefile
+index 4d9abe704628..971028eda240 100644
+--- a/xen/Makefile
++++ b/xen/Makefile
+@@ -260,6 +260,8 @@ endif
+ 
+ AFLAGS += -D__ASSEMBLY__
+ 
++$(call cc-option-add,AFLAGS,CC,-Wa$(comma)--noexecstack)
++
+ LDFLAGS-$(call ld-option,--warn-rwx-segments) += --no-warn-rwx-segments
+ 
+ CFLAGS += $(CFLAGS-y)
+-- 
+2.35.1
+
diff --git a/0020-ns16550-use-poll-mode-if-INTERRUPT_LINE-is-0xff.patch b/0020-ns16550-use-poll-mode-if-INTERRUPT_LINE-is-0xff.patch
new file mode 100644
index 0000000..b83be9a
--- /dev/null
+++ b/0020-ns16550-use-poll-mode-if-INTERRUPT_LINE-is-0xff.patch
@@ -0,0 +1,50 @@
+From f1be0b62a03b90a40a03e21f965e4cbb89809bb1 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?=
+ <marmarek@invisiblethingslab.com>
+Date: Tue, 7 Jun 2022 14:07:34 +0200
+Subject: [PATCH 20/32] ns16550: use poll mode if INTERRUPT_LINE is 0xff
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Intel LPSS has INTERRUPT_LINE set to 0xff by default, that is declared
+by the PCI Local Bus Specification Revision 3.0 (from 2004) as
+"unknown"/"no connection". Fallback to poll mode in this case.
+The 0xff handling is x86-specific, the surrounding code is guarded with
+CONFIG_X86 anyway.
+
+Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 6a2ea1a2370a0c8a0210accac0ae62e68c185134
+master date: 2022-05-20 12:19:45 +0200
+---
+ xen/drivers/char/ns16550.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/xen/drivers/char/ns16550.c b/xen/drivers/char/ns16550.c
+index 30596d60d4ed..2d2bd2a02469 100644
+--- a/xen/drivers/char/ns16550.c
++++ b/xen/drivers/char/ns16550.c
+@@ -1221,6 +1221,19 @@ pci_uart_config(struct ns16550 *uart, bool_t skip_amt, unsigned int idx)
+                             pci_conf_read8(PCI_SBDF(0, b, d, f),
+                                            PCI_INTERRUPT_LINE) : 0;
+ 
++#ifdef CONFIG_X86
++                /*
++                 * PCI Local Bus Specification Revision 3.0 defines 0xff value
++                 * as special only for X86.
++                 */
++                if ( uart->irq == 0xff )
++                    uart->irq = 0;
++#endif
++                if ( !uart->irq )
++                    printk(XENLOG_INFO
++                           "ns16550: %pp: no legacy IRQ, using poll mode\n",
++                           &PCI_SBDF(0, b, d, f));
++
+                 return 0;
+             }
+         }
+-- 
+2.35.1
+
diff --git a/0021-PCI-don-t-allow-pci-phantom-to-mark-real-devices-as-.patch b/0021-PCI-don-t-allow-pci-phantom-to-mark-real-devices-as-.patch
new file mode 100644
index 0000000..1264578
--- /dev/null
+++ b/0021-PCI-don-t-allow-pci-phantom-to-mark-real-devices-as-.patch
@@ -0,0 +1,56 @@
+From 8e11ec8fbf6f933f8854f4bc54226653316903f2 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Tue, 7 Jun 2022 14:08:06 +0200
+Subject: [PATCH 21/32] PCI: don't allow "pci-phantom=" to mark real devices as
+ phantom functions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+IOMMU code mapping / unmapping devices and interrupts will misbehave if
+a wrong command line option declared a function "phantom" when there's a
+real device at that position. Warn about this and adjust the specified
+stride (in the worst case ignoring the option altogether).
+
+Requested-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+master commit: 444b555dc9e09fa3ce90f066e0c88dec9b47f422
+master date: 2022-05-20 12:20:35 +0200
+---
+ xen/drivers/passthrough/pci.c | 19 ++++++++++++++++++-
+ 1 file changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
+index 395958698e6a..e0491c908f10 100644
+--- a/xen/drivers/passthrough/pci.c
++++ b/xen/drivers/passthrough/pci.c
+@@ -382,7 +382,24 @@ static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
+                          phantom_devs[i].slot == PCI_SLOT(devfn) &&
+                          phantom_devs[i].stride > PCI_FUNC(devfn) )
+                     {
+-                        pdev->phantom_stride = phantom_devs[i].stride;
++                        pci_sbdf_t sbdf = pdev->sbdf;
++                        unsigned int stride = phantom_devs[i].stride;
++
++                        while ( (sbdf.fn += stride) > PCI_FUNC(devfn) )
++                        {
++                            if ( pci_conf_read16(sbdf, PCI_VENDOR_ID) == 0xffff &&
++                                 pci_conf_read16(sbdf, PCI_DEVICE_ID) == 0xffff )
++                                continue;
++                            stride <<= 1;
++                            printk(XENLOG_WARNING
++                                   "%pp looks to be a real device; bumping %04x:%02x:%02x stride to %u\n",
++                                   &sbdf, phantom_devs[i].seg,
++                                   phantom_devs[i].bus, phantom_devs[i].slot,
++                                   stride);
++                            sbdf = pdev->sbdf;
++                        }
++                        if ( PCI_FUNC(stride) )
++                           pdev->phantom_stride = stride;
+                         break;
+                     }
+             }
+-- 
+2.35.1
+
diff --git a/0022-x86-pv-Clean-up-_get_page_type.patch b/0022-x86-pv-Clean-up-_get_page_type.patch
new file mode 100644
index 0000000..a6008b0
--- /dev/null
+++ b/0022-x86-pv-Clean-up-_get_page_type.patch
@@ -0,0 +1,180 @@
+From b152dfbc3ad71a788996440b18174d995c3bffc9 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 9 Jun 2022 15:27:19 +0200
+Subject: [PATCH 22/32] x86/pv: Clean up _get_page_type()
+
+Various fixes for clarity, ahead of making complicated changes.
+
+ * Split the overflow check out of the if/else chain for type handling, as
+   it's somewhat unrelated.
+ * Comment the main if/else chain to explain what is going on.  Adjust one
+   ASSERT() and state the bit layout for validate-locked and partial states.
+ * Correct the comment about TLB flushing, as it's backwards.  The problem
+   case is when writeable mappings are retained to a page becoming read-only,
+   as it allows the guest to bypass Xen's safety checks for updates.
+ * Reduce the scope of 'y'.  It is an artefact of the cmpxchg loop and not
+   valid for use by subsequent logic.  Switch to using ACCESS_ONCE() to treat
+   all reads as explicitly volatile.  The only thing preventing the validated
+   wait-loop being infinite is the compiler barrier hidden in cpu_relax().
+ * Replace one page_get_owner(page) with the already-calculated 'd' already in
+   scope.
+
+No functional change.
+
+This is part of XSA-401 / CVE-2022-26362.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+master commit: 9186e96b199e4f7e52e033b238f9fe869afb69c7
+master date: 2022-06-09 14:20:36 +0200
+---
+ xen/arch/x86/mm.c | 72 +++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 61 insertions(+), 11 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 4ee2de11051d..79ad7fdd2b82 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2906,16 +2906,17 @@ static int _put_page_type(struct page_info *page, unsigned int flags,
+ static int _get_page_type(struct page_info *page, unsigned long type,
+                           bool preemptible)
+ {
+-    unsigned long nx, x, y = page->u.inuse.type_info;
++    unsigned long nx, x;
+     int rc = 0;
+ 
+     ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
+     ASSERT(!in_irq());
+ 
+-    for ( ; ; )
++    for ( unsigned long y = ACCESS_ONCE(page->u.inuse.type_info); ; )
+     {
+         x  = y;
+         nx = x + 1;
++
+         if ( unlikely((nx & PGT_count_mask) == 0) )
+         {
+             gdprintk(XENLOG_WARNING,
+@@ -2923,8 +2924,15 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+                      mfn_x(page_to_mfn(page)));
+             return -EINVAL;
+         }
+-        else if ( unlikely((x & PGT_count_mask) == 0) )
++
++        if ( unlikely((x & PGT_count_mask) == 0) )
+         {
++            /*
++             * Typeref 0 -> 1.
++             *
++             * Type changes are permitted when the typeref is 0.  If the type
++             * actually changes, the page needs re-validating.
++             */
+             struct domain *d = page_get_owner(page);
+ 
+             if ( d && shadow_mode_enabled(d) )
+@@ -2935,8 +2943,8 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+             {
+                 /*
+                  * On type change we check to flush stale TLB entries. It is
+-                 * vital that no other CPUs are left with mappings of a frame
+-                 * which is about to become writeable to the guest.
++                 * vital that no other CPUs are left with writeable mappings
++                 * to a frame which is intending to become pgtable/segdesc.
+                  */
+                 cpumask_t *mask = this_cpu(scratch_cpumask);
+ 
+@@ -2948,7 +2956,7 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ 
+                 if ( unlikely(!cpumask_empty(mask)) &&
+                      /* Shadow mode: track only writable pages. */
+-                     (!shadow_mode_enabled(page_get_owner(page)) ||
++                     (!shadow_mode_enabled(d) ||
+                       ((nx & PGT_type_mask) == PGT_writable_page)) )
+                 {
+                     perfc_incr(need_flush_tlb_flush);
+@@ -2979,7 +2987,14 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+         }
+         else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
+         {
+-            /* Don't log failure if it could be a recursive-mapping attempt. */
++            /*
++             * else, we're trying to take a new reference, of the wrong type.
++             *
++             * This (being able to prohibit use of the wrong type) is what the
++             * typeref system exists for, but skip printing the failure if it
++             * looks like a recursive mapping, as subsequent logic might
++             * ultimately permit the attempt.
++             */
+             if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
+                  (type == PGT_l1_page_table) )
+                 return -EINVAL;
+@@ -2998,18 +3013,46 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+         }
+         else if ( unlikely(!(x & PGT_validated)) )
+         {
++            /*
++             * else, the count is non-zero, and we're grabbing the right type;
++             * but the page hasn't been validated yet.
++             *
++             * The page is in one of two states (depending on PGT_partial),
++             * and should have exactly one reference.
++             */
++            ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1));
++
+             if ( !(x & PGT_partial) )
+             {
+-                /* Someone else is updating validation of this page. Wait... */
++                /*
++                 * The page has been left in the "validate locked" state
++                 * (i.e. PGT_[type] | 1) which means that a concurrent caller
++                 * of _get_page_type() is in the middle of validation.
++                 *
++                 * Spin waiting for the concurrent user to complete (partial
++                 * or fully validated), then restart our attempt to acquire a
++                 * type reference.
++                 */
+                 do {
+                     if ( preemptible && hypercall_preempt_check() )
+                         return -EINTR;
+                     cpu_relax();
+-                } while ( (y = page->u.inuse.type_info) == x );
++                } while ( (y = ACCESS_ONCE(page->u.inuse.type_info)) == x );
+                 continue;
+             }
+-            /* Type ref count was left at 1 when PGT_partial got set. */
+-            ASSERT((x & PGT_count_mask) == 1);
++
++            /*
++             * The page has been left in the "partial" state
++             * (i.e., PGT_[type] | PGT_partial | 1).
++             *
++             * Rather than bumping the type count, we need to try to grab the
++             * validation lock; if we succeed, we need to validate the page,
++             * then drop the general ref associated with the PGT_partial bit.
++             *
++             * We grab the validation lock by setting nx to (PGT_[type] | 1)
++             * (i.e., non-zero type count, neither PGT_validated nor
++             * PGT_partial set).
++             */
+             nx = x & ~PGT_partial;
+         }
+ 
+@@ -3058,6 +3101,13 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+     }
+ 
+  out:
++    /*
++     * Did we drop the PGT_partial bit when acquiring the typeref?  If so,
++     * drop the general reference that went along with it.
++     *
++     * N.B. validate_page() may have have re-set PGT_partial, not reflected in
++     * nx, but will have taken an extra ref when doing so.
++     */
+     if ( (x & PGT_partial) && !(nx & PGT_partial) )
+         put_page(page);
+ 
+-- 
+2.35.1
+
diff --git a/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch b/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch
new file mode 100644
index 0000000..2f4b734
--- /dev/null
+++ b/0023-x86-pv-Fix-ABAC-cmpxchg-race-in-_get_page_type.patch
@@ -0,0 +1,201 @@
+From 8dab3f79b122e69cbcdebca72cdc14f004ee2193 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 9 Jun 2022 15:27:37 +0200
+Subject: [PATCH 23/32] x86/pv: Fix ABAC cmpxchg() race in _get_page_type()
+
+_get_page_type() suffers from a race condition where it incorrectly assumes
+that because 'x' was read and a subsequent a cmpxchg() succeeds, the type
+cannot have changed in-between.  Consider:
+
+CPU A:
+  1. Creates an L2e referencing pg
+     `-> _get_page_type(pg, PGT_l1_page_table), sees count 0, type PGT_writable_page
+  2.     Issues flush_tlb_mask()
+CPU B:
+  3. Creates a writeable mapping of pg
+     `-> _get_page_type(pg, PGT_writable_page), count increases to 1
+  4. Writes into new mapping, creating a TLB entry for pg
+  5. Removes the writeable mapping of pg
+     `-> _put_page_type(pg), count goes back down to 0
+CPU A:
+  7.     Issues cmpxchg(), setting count 1, type PGT_l1_page_table
+
+CPU B now has a writeable mapping to pg, which Xen believes is a pagetable and
+suitably protected (i.e. read-only).  The TLB flush in step 2 must be deferred
+until after the guest is prohibited from creating new writeable mappings,
+which is after step 7.
+
+Defer all safety actions until after the cmpxchg() has successfully taken the
+intended typeref, because that is what prevents concurrent users from using
+the old type.
+
+Also remove the early validation for writeable and shared pages.  This removes
+race conditions where one half of a parallel mapping attempt can return
+successfully before:
+ * The IOMMU pagetables are in sync with the new page type
+ * Writeable mappings to shared pages have been torn down
+
+This is part of XSA-401 / CVE-2022-26362.
+
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+master commit: 8cc5036bc385112a82f1faff27a0970e6440dfed
+master date: 2022-06-09 14:21:04 +0200
+---
+ xen/arch/x86/mm.c | 116 ++++++++++++++++++++++++++--------------------
+ 1 file changed, 67 insertions(+), 49 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 79ad7fdd2b82..c6429b0f749a 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2933,56 +2933,12 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+              * Type changes are permitted when the typeref is 0.  If the type
+              * actually changes, the page needs re-validating.
+              */
+-            struct domain *d = page_get_owner(page);
+-
+-            if ( d && shadow_mode_enabled(d) )
+-               shadow_prepare_page_type_change(d, page, type);
+ 
+             ASSERT(!(x & PGT_pae_xen_l2));
+             if ( (x & PGT_type_mask) != type )
+             {
+-                /*
+-                 * On type change we check to flush stale TLB entries. It is
+-                 * vital that no other CPUs are left with writeable mappings
+-                 * to a frame which is intending to become pgtable/segdesc.
+-                 */
+-                cpumask_t *mask = this_cpu(scratch_cpumask);
+-
+-                BUG_ON(in_irq());
+-                cpumask_copy(mask, d->dirty_cpumask);
+-
+-                /* Don't flush if the timestamp is old enough */
+-                tlbflush_filter(mask, page->tlbflush_timestamp);
+-
+-                if ( unlikely(!cpumask_empty(mask)) &&
+-                     /* Shadow mode: track only writable pages. */
+-                     (!shadow_mode_enabled(d) ||
+-                      ((nx & PGT_type_mask) == PGT_writable_page)) )
+-                {
+-                    perfc_incr(need_flush_tlb_flush);
+-                    /*
+-                     * If page was a page table make sure the flush is
+-                     * performed using an IPI in order to avoid changing the
+-                     * type of a page table page under the feet of
+-                     * spurious_page_fault().
+-                     */
+-                    flush_mask(mask,
+-                               (x & PGT_type_mask) &&
+-                               (x & PGT_type_mask) <= PGT_root_page_table
+-                               ? FLUSH_TLB | FLUSH_FORCE_IPI
+-                               : FLUSH_TLB);
+-                }
+-
+-                /* We lose existing type and validity. */
+                 nx &= ~(PGT_type_mask | PGT_validated);
+                 nx |= type;
+-
+-                /*
+-                 * No special validation needed for writable pages.
+-                 * Page tables and GDT/LDT need to be scanned for validity.
+-                 */
+-                if ( type == PGT_writable_page || type == PGT_shared_page )
+-                    nx |= PGT_validated;
+             }
+         }
+         else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
+@@ -3063,6 +3019,56 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+             return -EINTR;
+     }
+ 
++    /*
++     * One typeref has been taken and is now globally visible.
++     *
++     * The page is either in the "validate locked" state (PGT_[type] | 1) or
++     * fully validated (PGT_[type] | PGT_validated | >0).
++     */
++
++    if ( unlikely((x & PGT_count_mask) == 0) )
++    {
++        struct domain *d = page_get_owner(page);
++
++        if ( d && shadow_mode_enabled(d) )
++            shadow_prepare_page_type_change(d, page, type);
++
++        if ( (x & PGT_type_mask) != type )
++        {
++            /*
++             * On type change we check to flush stale TLB entries. It is
++             * vital that no other CPUs are left with writeable mappings
++             * to a frame which is intending to become pgtable/segdesc.
++             */
++            cpumask_t *mask = this_cpu(scratch_cpumask);
++
++            BUG_ON(in_irq());
++            cpumask_copy(mask, d->dirty_cpumask);
++
++            /* Don't flush if the timestamp is old enough */
++            tlbflush_filter(mask, page->tlbflush_timestamp);
++
++            if ( unlikely(!cpumask_empty(mask)) &&
++                 /* Shadow mode: track only writable pages. */
++                 (!shadow_mode_enabled(d) ||
++                  ((nx & PGT_type_mask) == PGT_writable_page)) )
++            {
++                perfc_incr(need_flush_tlb_flush);
++                /*
++                 * If page was a page table make sure the flush is
++                 * performed using an IPI in order to avoid changing the
++                 * type of a page table page under the feet of
++                 * spurious_page_fault().
++                 */
++                flush_mask(mask,
++                           (x & PGT_type_mask) &&
++                           (x & PGT_type_mask) <= PGT_root_page_table
++                           ? FLUSH_TLB | FLUSH_FORCE_IPI
++                           : FLUSH_TLB);
++            }
++        }
++    }
++
+     if ( unlikely(((x & PGT_type_mask) == PGT_writable_page) !=
+                   (type == PGT_writable_page)) )
+     {
+@@ -3091,13 +3097,25 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ 
+     if ( unlikely(!(nx & PGT_validated)) )
+     {
+-        if ( !(x & PGT_partial) )
++        /*
++         * No special validation needed for writable or shared pages.  Page
++         * tables and GDT/LDT need to have their contents audited.
++         *
++         * per validate_page(), non-atomic updates are fine here.
++         */
++        if ( type == PGT_writable_page || type == PGT_shared_page )
++            page->u.inuse.type_info |= PGT_validated;
++        else
+         {
+-            page->nr_validated_ptes = 0;
+-            page->partial_flags = 0;
+-            page->linear_pt_count = 0;
++            if ( !(x & PGT_partial) )
++            {
++                page->nr_validated_ptes = 0;
++                page->partial_flags = 0;
++                page->linear_pt_count = 0;
++            }
++
++            rc = validate_page(page, type, preemptible);
+         }
+-        rc = validate_page(page, type, preemptible);
+     }
+ 
+  out:
+-- 
+2.35.1
+
diff --git a/0024-x86-page-Introduce-_PAGE_-constants-for-memory-types.patch b/0024-x86-page-Introduce-_PAGE_-constants-for-memory-types.patch
new file mode 100644
index 0000000..c8c2dda
--- /dev/null
+++ b/0024-x86-page-Introduce-_PAGE_-constants-for-memory-types.patch
@@ -0,0 +1,53 @@
+From 9cfd796ae05421ded8e4f70b2c55352491cfa841 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 9 Jun 2022 15:27:53 +0200
+Subject: [PATCH 24/32] x86/page: Introduce _PAGE_* constants for memory types
+
+... rather than opencoding the PAT/PCD/PWT attributes in __PAGE_HYPERVISOR_*
+constants.  These are going to be needed by forthcoming logic.
+
+No functional change.
+
+This is part of XSA-402.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 1be8707c75bf4ba68447c74e1618b521dd432499
+master date: 2022-06-09 14:21:38 +0200
+---
+ xen/include/asm-x86/page.h | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h
+index 1d080cffbe84..2e542050f65a 100644
+--- a/xen/include/asm-x86/page.h
++++ b/xen/include/asm-x86/page.h
+@@ -331,6 +331,14 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t);
+ 
+ #define PAGE_CACHE_ATTRS (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)
+ 
++/* Memory types, encoded under Xen's choice of MSR_PAT. */
++#define _PAGE_WB         (                                0)
++#define _PAGE_WT         (                        _PAGE_PWT)
++#define _PAGE_UCM        (            _PAGE_PCD            )
++#define _PAGE_UC         (            _PAGE_PCD | _PAGE_PWT)
++#define _PAGE_WC         (_PAGE_PAT                        )
++#define _PAGE_WP         (_PAGE_PAT |             _PAGE_PWT)
++
+ /*
+  * Debug option: Ensure that granted mappings are not implicitly unmapped.
+  * WARNING: This will need to be disabled to run OSes that use the spare PTE
+@@ -349,8 +357,8 @@ void efi_update_l4_pgtable(unsigned int l4idx, l4_pgentry_t);
+ #define __PAGE_HYPERVISOR_RX      (_PAGE_PRESENT | _PAGE_ACCESSED)
+ #define __PAGE_HYPERVISOR         (__PAGE_HYPERVISOR_RX | \
+                                    _PAGE_DIRTY | _PAGE_RW)
+-#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_PCD)
+-#define __PAGE_HYPERVISOR_UC      (__PAGE_HYPERVISOR | _PAGE_PCD | _PAGE_PWT)
++#define __PAGE_HYPERVISOR_UCMINUS (__PAGE_HYPERVISOR | _PAGE_UCM)
++#define __PAGE_HYPERVISOR_UC      (__PAGE_HYPERVISOR | _PAGE_UC)
+ #define __PAGE_HYPERVISOR_SHSTK   (__PAGE_HYPERVISOR_RO | _PAGE_DIRTY)
+ 
+ #define MAP_SMALL_PAGES _PAGE_AVAIL0 /* don't use superpages mappings */
+-- 
+2.35.1
+
diff --git a/0025-x86-Don-t-change-the-cacheability-of-the-directmap.patch b/0025-x86-Don-t-change-the-cacheability-of-the-directmap.patch
new file mode 100644
index 0000000..582fc74
--- /dev/null
+++ b/0025-x86-Don-t-change-the-cacheability-of-the-directmap.patch
@@ -0,0 +1,223 @@
+From 74193f4292d9cfc2874866e941d9939d8f33fcef Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 9 Jun 2022 15:28:23 +0200
+Subject: [PATCH 25/32] x86: Don't change the cacheability of the directmap
+
+Changeset 55f97f49b7ce ("x86: Change cache attributes of Xen 1:1 page mappings
+in response to guest mapping requests") attempted to keep the cacheability
+consistent between different mappings of the same page.
+
+The reason wasn't described in the changelog, but it is understood to be in
+regards to a concern over machine check exceptions, owing to errata when using
+mixed cacheabilities.  It did this primarily by updating Xen's mapping of the
+page in the direct map when the guest mapped a page with reduced cacheability.
+
+Unfortunately, the logic didn't actually prevent mixed cacheability from
+occurring:
+ * A guest could map a page normally, and then map the same page with
+   different cacheability; nothing prevented this.
+ * The cacheability of the directmap was always latest-takes-precedence in
+   terms of guest requests.
+ * Grant-mapped frames with lesser cacheability didn't adjust the page's
+   cacheattr settings.
+ * The map_domain_page() function still unconditionally created WB mappings,
+   irrespective of the page's cacheattr settings.
+
+Additionally, update_xen_mappings() had a bug where the alias calculation was
+wrong for mfn's which were .init content, which should have been treated as
+fully guest pages, not Xen pages.
+
+Worse yet, the logic introduced a vulnerability whereby necessary
+pagetable/segdesc adjustments made by Xen in the validation logic could become
+non-coherent between the cache and main memory.  The CPU could subsequently
+operate on the stale value in the cache, rather than the safe value in main
+memory.
+
+The directmap contains primarily mappings of RAM.  PAT/MTRR conflict
+resolution is asymmetric, and generally for MTRR=WB ranges, PAT of lesser
+cacheability resolves to being coherent.  The special case is WC mappings,
+which are non-coherent against MTRR=WB regions (except for fully-coherent
+CPUs).
+
+Xen must not have any WC cacheability in the directmap, to prevent Xen's
+actions from creating non-coherency.  (Guest actions creating non-coherency is
+dealt with in subsequent patches.)  As all memory types for MTRR=WB ranges
+inter-operate coherently, so leave Xen's directmap mappings as WB.
+
+Only PV guests with access to devices can use reduced-cacheability mappings to
+begin with, and they're trusted not to mount DoSs against the system anyway.
+
+Drop PGC_cacheattr_{base,mask} entirely, and the logic to manipulate them.
+Shift the later PGC_* constants up, to gain 3 extra bits in the main reference
+count.  Retain the check in get_page_from_l1e() for special_pages() because a
+guest has no business using reduced cacheability on these.
+
+This reverts changeset 55f97f49b7ce6c3520c555d19caac6cf3f9a5df0
+
+This is CVE-2022-26363, part of XSA-402.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+master commit: ae09597da34aee6bc5b76475c5eea6994457e854
+master date: 2022-06-09 14:22:08 +0200
+---
+ xen/arch/x86/mm.c        | 84 ++++------------------------------------
+ xen/include/asm-x86/mm.h | 23 +++++------
+ 2 files changed, 17 insertions(+), 90 deletions(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index c6429b0f749a..ab32d13a1a0d 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -783,28 +783,6 @@ bool is_iomem_page(mfn_t mfn)
+     return (page_get_owner(page) == dom_io);
+ }
+ 
+-static int update_xen_mappings(unsigned long mfn, unsigned int cacheattr)
+-{
+-    int err = 0;
+-    bool alias = mfn >= PFN_DOWN(xen_phys_start) &&
+-         mfn < PFN_UP(xen_phys_start + xen_virt_end - XEN_VIRT_START);
+-    unsigned long xen_va =
+-        XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT);
+-
+-    if ( boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) )
+-        return 0;
+-
+-    if ( unlikely(alias) && cacheattr )
+-        err = map_pages_to_xen(xen_va, _mfn(mfn), 1, 0);
+-    if ( !err )
+-        err = map_pages_to_xen((unsigned long)mfn_to_virt(mfn), _mfn(mfn), 1,
+-                     PAGE_HYPERVISOR | cacheattr_to_pte_flags(cacheattr));
+-    if ( unlikely(alias) && !cacheattr && !err )
+-        err = map_pages_to_xen(xen_va, _mfn(mfn), 1, PAGE_HYPERVISOR);
+-
+-    return err;
+-}
+-
+ #ifndef NDEBUG
+ struct mmio_emul_range_ctxt {
+     const struct domain *d;
+@@ -1009,47 +987,14 @@ get_page_from_l1e(
+         goto could_not_pin;
+     }
+ 
+-    if ( pte_flags_to_cacheattr(l1f) !=
+-         ((page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base) )
++    if ( (l1f & PAGE_CACHE_ATTRS) != _PAGE_WB && is_special_page(page) )
+     {
+-        unsigned long x, nx, y = page->count_info;
+-        unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
+-        int err;
+-
+-        if ( is_special_page(page) )
+-        {
+-            if ( write )
+-                put_page_type(page);
+-            put_page(page);
+-            gdprintk(XENLOG_WARNING,
+-                     "Attempt to change cache attributes of Xen heap page\n");
+-            return -EACCES;
+-        }
+-
+-        do {
+-            x  = y;
+-            nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base);
+-        } while ( (y = cmpxchg(&page->count_info, x, nx)) != x );
+-
+-        err = update_xen_mappings(mfn, cacheattr);
+-        if ( unlikely(err) )
+-        {
+-            cacheattr = y & PGC_cacheattr_mask;
+-            do {
+-                x  = y;
+-                nx = (x & ~PGC_cacheattr_mask) | cacheattr;
+-            } while ( (y = cmpxchg(&page->count_info, x, nx)) != x );
+-
+-            if ( write )
+-                put_page_type(page);
+-            put_page(page);
+-
+-            gdprintk(XENLOG_WARNING, "Error updating mappings for mfn %" PRI_mfn
+-                     " (pfn %" PRI_pfn ", from L1 entry %" PRIpte ") for d%d\n",
+-                     mfn, get_gpfn_from_mfn(mfn),
+-                     l1e_get_intpte(l1e), l1e_owner->domain_id);
+-            return err;
+-        }
++        if ( write )
++            put_page_type(page);
++        put_page(page);
++        gdprintk(XENLOG_WARNING,
++                 "Attempt to change cache attributes of Xen heap page\n");
++        return -EACCES;
+     }
+ 
+     return 0;
+@@ -2467,24 +2412,9 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
+  */
+ static int cleanup_page_mappings(struct page_info *page)
+ {
+-    unsigned int cacheattr =
+-        (page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base;
+     int rc = 0;
+     unsigned long mfn = mfn_x(page_to_mfn(page));
+ 
+-    /*
+-     * If we've modified xen mappings as a result of guest cache
+-     * attributes, restore them to the "normal" state.
+-     */
+-    if ( unlikely(cacheattr) )
+-    {
+-        page->count_info &= ~PGC_cacheattr_mask;
+-
+-        BUG_ON(is_special_page(page));
+-
+-        rc = update_xen_mappings(mfn, 0);
+-    }
+-
+     /*
+      * If this may be in a PV domain's IOMMU, remove it.
+      *
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index cb9052749963..8a9a43bb0a9d 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -69,25 +69,22 @@
+  /* Set when is using a page as a page table */
+ #define _PGC_page_table   PG_shift(3)
+ #define PGC_page_table    PG_mask(1, 3)
+- /* 3-bit PAT/PCD/PWT cache-attribute hint. */
+-#define PGC_cacheattr_base PG_shift(6)
+-#define PGC_cacheattr_mask PG_mask(7, 6)
+  /* Page is broken? */
+-#define _PGC_broken       PG_shift(7)
+-#define PGC_broken        PG_mask(1, 7)
++#define _PGC_broken       PG_shift(4)
++#define PGC_broken        PG_mask(1, 4)
+  /* Mutually-exclusive page states: { inuse, offlining, offlined, free }. */
+-#define PGC_state         PG_mask(3, 9)
+-#define PGC_state_inuse   PG_mask(0, 9)
+-#define PGC_state_offlining PG_mask(1, 9)
+-#define PGC_state_offlined PG_mask(2, 9)
+-#define PGC_state_free    PG_mask(3, 9)
++#define PGC_state           PG_mask(3, 6)
++#define PGC_state_inuse     PG_mask(0, 6)
++#define PGC_state_offlining PG_mask(1, 6)
++#define PGC_state_offlined  PG_mask(2, 6)
++#define PGC_state_free      PG_mask(3, 6)
+ #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st)
+ /* Page is not reference counted (see below for caveats) */
+-#define _PGC_extra        PG_shift(10)
+-#define PGC_extra         PG_mask(1, 10)
++#define _PGC_extra        PG_shift(7)
++#define PGC_extra         PG_mask(1, 7)
+ 
+ /* Count of references to this frame. */
+-#define PGC_count_width   PG_shift(10)
++#define PGC_count_width   PG_shift(7)
+ #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
+ 
+ /*
+-- 
+2.35.1
+
diff --git a/0026-x86-Split-cache_flush-out-of-cache_writeback.patch b/0026-x86-Split-cache_flush-out-of-cache_writeback.patch
new file mode 100644
index 0000000..ffd8d7c
--- /dev/null
+++ b/0026-x86-Split-cache_flush-out-of-cache_writeback.patch
@@ -0,0 +1,294 @@
+From 8eafa2d871ae51d461256e4a14175e24df330c70 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 9 Jun 2022 15:28:48 +0200
+Subject: [PATCH 26/32] x86: Split cache_flush() out of cache_writeback()
+
+Subsequent changes will want a fully flushing version.
+
+Use the new helper rather than opencoding it in flush_area_local().  This
+resolves an outstanding issue where the conditional sfence is on the wrong
+side of the clflushopt loop.  clflushopt is ordered with respect to older
+stores, not to younger stores.
+
+Rename gnttab_cache_flush()'s helper to avoid colliding in name.
+grant_table.c can see the prototype from cache.h so the build fails
+otherwise.
+
+This is part of XSA-402.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 9a67ffee3371506e1cbfdfff5b90658d4828f6a2
+master date: 2022-06-09 14:22:38 +0200
+---
+ xen/arch/x86/flushtlb.c               | 84 ++++++++++++++++++++++++---
+ xen/common/grant_table.c              |  4 +-
+ xen/drivers/passthrough/vtd/extern.h  |  1 -
+ xen/drivers/passthrough/vtd/iommu.c   | 53 +----------------
+ xen/drivers/passthrough/vtd/x86/vtd.c |  5 --
+ xen/include/asm-x86/cache.h           |  7 +++
+ 6 files changed, 88 insertions(+), 66 deletions(-)
+
+diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
+index 25798df50f54..0c912b8669f8 100644
+--- a/xen/arch/x86/flushtlb.c
++++ b/xen/arch/x86/flushtlb.c
+@@ -234,7 +234,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
+     if ( flags & FLUSH_CACHE )
+     {
+         const struct cpuinfo_x86 *c = &current_cpu_data;
+-        unsigned long i, sz = 0;
++        unsigned long sz = 0;
+ 
+         if ( order < (BITS_PER_LONG - PAGE_SHIFT) )
+             sz = 1UL << (order + PAGE_SHIFT);
+@@ -244,13 +244,7 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
+              c->x86_clflush_size && c->x86_cache_size && sz &&
+              ((sz >> 10) < c->x86_cache_size) )
+         {
+-            alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
+-            for ( i = 0; i < sz; i += c->x86_clflush_size )
+-                alternative_input(".byte " __stringify(NOP_DS_PREFIX) ";"
+-                                  " clflush %0",
+-                                  "data16 clflush %0",      /* clflushopt */
+-                                  X86_FEATURE_CLFLUSHOPT,
+-                                  "m" (((const char *)va)[i]));
++            cache_flush(va, sz);
+             flags &= ~FLUSH_CACHE;
+         }
+         else
+@@ -265,6 +259,80 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
+     return flags;
+ }
+ 
++void cache_flush(const void *addr, unsigned int size)
++{
++    /*
++     * This function may be called before current_cpu_data is established.
++     * Hence a fallback is needed to prevent the loop below becoming infinite.
++     */
++    unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
++    const void *end = addr + size;
++
++    addr -= (unsigned long)addr & (clflush_size - 1);
++    for ( ; addr < end; addr += clflush_size )
++    {
++        /*
++         * Note regarding the "ds" prefix use: it's faster to do a clflush
++         * + prefix than a clflush + nop, and hence the prefix is added instead
++         * of letting the alternative framework fill the gap by appending nops.
++         */
++        alternative_io("ds; clflush %[p]",
++                       "data16 clflush %[p]", /* clflushopt */
++                       X86_FEATURE_CLFLUSHOPT,
++                       /* no outputs */,
++                       [p] "m" (*(const char *)(addr)));
++    }
++
++    alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
++}
++
++void cache_writeback(const void *addr, unsigned int size)
++{
++    unsigned int clflush_size;
++    const void *end = addr + size;
++
++    /* Fall back to CLFLUSH{,OPT} when CLWB isn't available. */
++    if ( !boot_cpu_has(X86_FEATURE_CLWB) )
++        return cache_flush(addr, size);
++
++    /*
++     * This function may be called before current_cpu_data is established.
++     * Hence a fallback is needed to prevent the loop below becoming infinite.
++     */
++    clflush_size = current_cpu_data.x86_clflush_size ?: 16;
++    addr -= (unsigned long)addr & (clflush_size - 1);
++    for ( ; addr < end; addr += clflush_size )
++    {
++/*
++ * The arguments to a macro must not include preprocessor directives. Doing so
++ * results in undefined behavior, so we have to create some defines here in
++ * order to avoid it.
++ */
++#if defined(HAVE_AS_CLWB)
++# define CLWB_ENCODING "clwb %[p]"
++#elif defined(HAVE_AS_XSAVEOPT)
++# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
++#else
++# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
++#endif
++
++#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
++#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
++# define INPUT BASE_INPUT
++#else
++# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
++#endif
++
++        asm volatile (CLWB_ENCODING :: INPUT(addr));
++
++#undef INPUT
++#undef BASE_INPUT
++#undef CLWB_ENCODING
++    }
++
++    asm volatile ("sfence" ::: "memory");
++}
++
+ unsigned int guest_flush_tlb_flags(const struct domain *d)
+ {
+     bool shadow = paging_mode_shadow(d);
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index 66f8ce71741c..4c742cd8fe81 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -3431,7 +3431,7 @@ gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop,
+     return 0;
+ }
+ 
+-static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref)
++static int _cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref)
+ {
+     struct domain *d, *owner;
+     struct page_info *page;
+@@ -3525,7 +3525,7 @@ gnttab_cache_flush(XEN_GUEST_HANDLE_PARAM(gnttab_cache_flush_t) uop,
+             return -EFAULT;
+         for ( ; ; )
+         {
+-            int ret = cache_flush(&op, cur_ref);
++            int ret = _cache_flush(&op, cur_ref);
+ 
+             if ( ret < 0 )
+                 return ret;
+diff --git a/xen/drivers/passthrough/vtd/extern.h b/xen/drivers/passthrough/vtd/extern.h
+index 01e010a10d61..401079299725 100644
+--- a/xen/drivers/passthrough/vtd/extern.h
++++ b/xen/drivers/passthrough/vtd/extern.h
+@@ -76,7 +76,6 @@ int __must_check qinval_device_iotlb_sync(struct vtd_iommu *iommu,
+                                           struct pci_dev *pdev,
+                                           u16 did, u16 size, u64 addr);
+ 
+-unsigned int get_cache_line_size(void);
+ void flush_all_cache(void);
+ 
+ uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node);
+diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
+index 8975c1de61bc..bc377c9bcfa4 100644
+--- a/xen/drivers/passthrough/vtd/iommu.c
++++ b/xen/drivers/passthrough/vtd/iommu.c
+@@ -31,6 +31,7 @@
+ #include <xen/pci.h>
+ #include <xen/pci_regs.h>
+ #include <xen/keyhandler.h>
++#include <asm/cache.h>
+ #include <asm/msi.h>
+ #include <asm/nops.h>
+ #include <asm/irq.h>
+@@ -206,54 +207,6 @@ static void check_cleanup_domid_map(const struct domain *d,
+     }
+ }
+ 
+-static void sync_cache(const void *addr, unsigned int size)
+-{
+-    static unsigned long clflush_size = 0;
+-    const void *end = addr + size;
+-
+-    if ( clflush_size == 0 )
+-        clflush_size = get_cache_line_size();
+-
+-    addr -= (unsigned long)addr & (clflush_size - 1);
+-    for ( ; addr < end; addr += clflush_size )
+-/*
+- * The arguments to a macro must not include preprocessor directives. Doing so
+- * results in undefined behavior, so we have to create some defines here in
+- * order to avoid it.
+- */
+-#if defined(HAVE_AS_CLWB)
+-# define CLWB_ENCODING "clwb %[p]"
+-#elif defined(HAVE_AS_XSAVEOPT)
+-# define CLWB_ENCODING "data16 xsaveopt %[p]" /* clwb */
+-#else
+-# define CLWB_ENCODING ".byte 0x66, 0x0f, 0xae, 0x30" /* clwb (%%rax) */
+-#endif
+-
+-#define BASE_INPUT(addr) [p] "m" (*(const char *)(addr))
+-#if defined(HAVE_AS_CLWB) || defined(HAVE_AS_XSAVEOPT)
+-# define INPUT BASE_INPUT
+-#else
+-# define INPUT(addr) "a" (addr), BASE_INPUT(addr)
+-#endif
+-        /*
+-         * Note regarding the use of NOP_DS_PREFIX: it's faster to do a clflush
+-         * + prefix than a clflush + nop, and hence the prefix is added instead
+-         * of letting the alternative framework fill the gap by appending nops.
+-         */
+-        alternative_io_2(".byte " __stringify(NOP_DS_PREFIX) "; clflush %[p]",
+-                         "data16 clflush %[p]", /* clflushopt */
+-                         X86_FEATURE_CLFLUSHOPT,
+-                         CLWB_ENCODING,
+-                         X86_FEATURE_CLWB, /* no outputs */,
+-                         INPUT(addr));
+-#undef INPUT
+-#undef BASE_INPUT
+-#undef CLWB_ENCODING
+-
+-    alternative_2("", "sfence", X86_FEATURE_CLFLUSHOPT,
+-                      "sfence", X86_FEATURE_CLWB);
+-}
+-
+ /* Allocate page table, return its machine address */
+ uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
+ {
+@@ -273,7 +226,7 @@ uint64_t alloc_pgtable_maddr(unsigned long npages, nodeid_t node)
+         clear_page(vaddr);
+ 
+         if ( (iommu_ops.init ? &iommu_ops : &vtd_ops)->sync_cache )
+-            sync_cache(vaddr, PAGE_SIZE);
++            cache_writeback(vaddr, PAGE_SIZE);
+         unmap_domain_page(vaddr);
+         cur_pg++;
+     }
+@@ -1305,7 +1258,7 @@ int __init iommu_alloc(struct acpi_drhd_unit *drhd)
+     iommu->nr_pt_levels = agaw_to_level(agaw);
+ 
+     if ( !ecap_coherent(iommu->ecap) )
+-        vtd_ops.sync_cache = sync_cache;
++        vtd_ops.sync_cache = cache_writeback;
+ 
+     /* allocate domain id bitmap */
+     iommu->domid_bitmap = xzalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
+diff --git a/xen/drivers/passthrough/vtd/x86/vtd.c b/xen/drivers/passthrough/vtd/x86/vtd.c
+index 6681dccd6970..55f0faa521cb 100644
+--- a/xen/drivers/passthrough/vtd/x86/vtd.c
++++ b/xen/drivers/passthrough/vtd/x86/vtd.c
+@@ -47,11 +47,6 @@ void unmap_vtd_domain_page(const void *va)
+     unmap_domain_page(va);
+ }
+ 
+-unsigned int get_cache_line_size(void)
+-{
+-    return ((cpuid_ebx(1) >> 8) & 0xff) * 8;
+-}
+-
+ void flush_all_cache()
+ {
+     wbinvd();
+diff --git a/xen/include/asm-x86/cache.h b/xen/include/asm-x86/cache.h
+index 1f7173d8c72c..e4770efb22b9 100644
+--- a/xen/include/asm-x86/cache.h
++++ b/xen/include/asm-x86/cache.h
+@@ -11,4 +11,11 @@
+ 
+ #define __read_mostly __section(".data.read_mostly")
+ 
++#ifndef __ASSEMBLY__
++
++void cache_flush(const void *addr, unsigned int size);
++void cache_writeback(const void *addr, unsigned int size);
++
++#endif
++
+ #endif
+-- 
+2.35.1
+
diff --git a/0027-x86-amd-Work-around-CLFLUSH-ordering-on-older-parts.patch b/0027-x86-amd-Work-around-CLFLUSH-ordering-on-older-parts.patch
new file mode 100644
index 0000000..a3ab379
--- /dev/null
+++ b/0027-x86-amd-Work-around-CLFLUSH-ordering-on-older-parts.patch
@@ -0,0 +1,95 @@
+From c4815be949aae6583a9a22897beb96b095b4f1a2 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 9 Jun 2022 15:29:13 +0200
+Subject: [PATCH 27/32] x86/amd: Work around CLFLUSH ordering on older parts
+
+On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakely ordered with everything,
+including reads and writes to the address, and LFENCE/SFENCE instructions.
+
+This creates a multitude of problematic corner cases, laid out in the manual.
+Arrange to use MFENCE on both sides of the CLFLUSH to force proper ordering.
+
+This is part of XSA-402.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 062868a5a8b428b85db589fa9a6d6e43969ffeb9
+master date: 2022-06-09 14:23:07 +0200
+---
+ xen/arch/x86/cpu/amd.c            |  8 ++++++++
+ xen/arch/x86/flushtlb.c           | 13 ++++++++++++-
+ xen/include/asm-x86/cpufeatures.h |  1 +
+ 3 files changed, 21 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index a8e37dbb1f5c..b3b9a0df5fed 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -812,6 +812,14 @@ static void init_amd(struct cpuinfo_x86 *c)
+ 	if (!cpu_has_lfence_dispatch)
+ 		__set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
+ 
++	/*
++	 * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with
++	 * everything, including reads and writes to address, and
++	 * LFENCE/SFENCE instructions.
++	 */
++	if (!cpu_has_clflushopt)
++		setup_force_cpu_cap(X86_BUG_CLFLUSH_MFENCE);
++
+ 	switch(c->x86)
+ 	{
+ 	case 0xf ... 0x11:
+diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c
+index 0c912b8669f8..dcbb4064012e 100644
+--- a/xen/arch/x86/flushtlb.c
++++ b/xen/arch/x86/flushtlb.c
+@@ -259,6 +259,13 @@ unsigned int flush_area_local(const void *va, unsigned int flags)
+     return flags;
+ }
+ 
++/*
++ * On pre-CLFLUSHOPT AMD CPUs, CLFLUSH is weakly ordered with everything,
++ * including reads and writes to address, and LFENCE/SFENCE instructions.
++ *
++ * This function only works safely after alternatives have run.  Luckily, at
++ * the time of writing, we don't flush the caches that early.
++ */
+ void cache_flush(const void *addr, unsigned int size)
+ {
+     /*
+@@ -268,6 +275,8 @@ void cache_flush(const void *addr, unsigned int size)
+     unsigned int clflush_size = current_cpu_data.x86_clflush_size ?: 16;
+     const void *end = addr + size;
+ 
++    alternative("", "mfence", X86_BUG_CLFLUSH_MFENCE);
++
+     addr -= (unsigned long)addr & (clflush_size - 1);
+     for ( ; addr < end; addr += clflush_size )
+     {
+@@ -283,7 +292,9 @@ void cache_flush(const void *addr, unsigned int size)
+                        [p] "m" (*(const char *)(addr)));
+     }
+ 
+-    alternative("", "sfence", X86_FEATURE_CLFLUSHOPT);
++    alternative_2("",
++                  "sfence", X86_FEATURE_CLFLUSHOPT,
++                  "mfence", X86_BUG_CLFLUSH_MFENCE);
+ }
+ 
+ void cache_writeback(const void *addr, unsigned int size)
+diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
+index 7413febd7ad8..ff3157d52d13 100644
+--- a/xen/include/asm-x86/cpufeatures.h
++++ b/xen/include/asm-x86/cpufeatures.h
+@@ -47,6 +47,7 @@ XEN_CPUFEATURE(XEN_IBT,           X86_SYNTH(27)) /* Xen uses CET Indirect Branch
+ 
+ #define X86_BUG_FPU_PTRS          X86_BUG( 0) /* (F)X{SAVE,RSTOR} doesn't save/restore FOP/FIP/FDP. */
+ #define X86_BUG_NULL_SEG          X86_BUG( 1) /* NULL-ing a selector preserves the base and limit. */
++#define X86_BUG_CLFLUSH_MFENCE    X86_BUG( 2) /* MFENCE needed to serialise CLFLUSH */
+ 
+ /* Total number of capability words, inc synth and bug words. */
+ #define NCAPINTS (FSCAPINTS + X86_NR_SYNTH + X86_NR_BUG) /* N 32-bit words worth of info */
+-- 
+2.35.1
+
diff --git a/0028-x86-pv-Track-and-flush-non-coherent-mappings-of-RAM.patch b/0028-x86-pv-Track-and-flush-non-coherent-mappings-of-RAM.patch
new file mode 100644
index 0000000..66cd741
--- /dev/null
+++ b/0028-x86-pv-Track-and-flush-non-coherent-mappings-of-RAM.patch
@@ -0,0 +1,160 @@
+From dc020d8d1ba420e2dd0e7a40f5045db897f3c4f4 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Thu, 9 Jun 2022 15:29:38 +0200
+Subject: [PATCH 28/32] x86/pv: Track and flush non-coherent mappings of RAM
+
+There are legitimate uses of WC mappings of RAM, e.g. for DMA buffers with
+devices that make non-coherent writes.  The Linux sound subsystem makes
+extensive use of this technique.
+
+For such usecases, the guest's DMA buffer is mapped and consistently used as
+WC, and Xen doesn't interact with the buffer.
+
+However, a mischevious guest can use WC mappings to deliberately create
+non-coherency between the cache and RAM, and use this to trick Xen into
+validating a pagetable which isn't actually safe.
+
+Allocate a new PGT_non_coherent to track the non-coherency of mappings.  Set
+it whenever a non-coherent writeable mapping is created.  If the page is used
+as anything other than PGT_writable_page, force a cache flush before
+validation.  Also force a cache flush before the page is returned to the heap.
+
+This is CVE-2022-26364, part of XSA-402.
+
+Reported-by: Jann Horn <jannh@google.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: George Dunlap <george.dunlap@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: c1c9cae3a9633054b177c5de21ad7268162b2f2c
+master date: 2022-06-09 14:23:37 +0200
+---
+ xen/arch/x86/mm.c             | 38 +++++++++++++++++++++++++++++++++++
+ xen/arch/x86/pv/grant_table.c | 21 +++++++++++++++++++
+ xen/include/asm-x86/mm.h      |  6 +++++-
+ 3 files changed, 64 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index ab32d13a1a0d..bab9624fabb7 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -997,6 +997,15 @@ get_page_from_l1e(
+         return -EACCES;
+     }
+ 
++    /*
++     * Track writeable non-coherent mappings to RAM pages, to trigger a cache
++     * flush later if the target is used as anything but a PGT_writeable page.
++     * We care about all writeable mappings, including foreign mappings.
++     */
++    if ( !boot_cpu_has(X86_FEATURE_XEN_SELFSNOOP) &&
++         (l1f & (PAGE_CACHE_ATTRS | _PAGE_RW)) == (_PAGE_WC | _PAGE_RW) )
++        set_bit(_PGT_non_coherent, &page->u.inuse.type_info);
++
+     return 0;
+ 
+  could_not_pin:
+@@ -2454,6 +2463,19 @@ static int cleanup_page_mappings(struct page_info *page)
+         }
+     }
+ 
++    /*
++     * Flush the cache if there were previously non-coherent writeable
++     * mappings of this page.  This forces the page to be coherent before it
++     * is freed back to the heap.
++     */
++    if ( __test_and_clear_bit(_PGT_non_coherent, &page->u.inuse.type_info) )
++    {
++        void *addr = __map_domain_page(page);
++
++        cache_flush(addr, PAGE_SIZE);
++        unmap_domain_page(addr);
++    }
++
+     return rc;
+ }
+ 
+@@ -3027,6 +3049,22 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+ 
+     if ( unlikely(!(nx & PGT_validated)) )
+     {
++        /*
++         * Flush the cache if there were previously non-coherent mappings of
++         * this page, and we're trying to use it as anything other than a
++         * writeable page.  This forces the page to be coherent before we
++         * validate its contents for safety.
++         */
++        if ( (nx & PGT_non_coherent) && type != PGT_writable_page )
++        {
++            void *addr = __map_domain_page(page);
++
++            cache_flush(addr, PAGE_SIZE);
++            unmap_domain_page(addr);
++
++            page->u.inuse.type_info &= ~PGT_non_coherent;
++        }
++
+         /*
+          * No special validation needed for writable or shared pages.  Page
+          * tables and GDT/LDT need to have their contents audited.
+diff --git a/xen/arch/x86/pv/grant_table.c b/xen/arch/x86/pv/grant_table.c
+index 0325618c9883..81c72e61ed55 100644
+--- a/xen/arch/x86/pv/grant_table.c
++++ b/xen/arch/x86/pv/grant_table.c
+@@ -109,7 +109,17 @@ int create_grant_pv_mapping(uint64_t addr, mfn_t frame,
+ 
+     ol1e = *pl1e;
+     if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) )
++    {
++        /*
++         * We always create mappings in this path.  However, our caller,
++         * map_grant_ref(), only passes potentially non-zero cache_flags for
++         * MMIO frames, so this path doesn't create non-coherent mappings of
++         * RAM frames and there's no need to calculate PGT_non_coherent.
++         */
++        ASSERT(!cache_flags || is_iomem_page(frame));
++
+         rc = GNTST_okay;
++    }
+ 
+  out_unlock:
+     page_unlock(page);
+@@ -294,7 +304,18 @@ int replace_grant_pv_mapping(uint64_t addr, mfn_t frame,
+                  l1e_get_flags(ol1e), addr, grant_pte_flags);
+ 
+     if ( UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, 0) )
++    {
++        /*
++         * Generally, replace_grant_pv_mapping() is used to destroy mappings
++         * (n1le = l1e_empty()), but it can be a present mapping on the
++         * GNTABOP_unmap_and_replace path.
++         *
++         * In such cases, the PTE is fully transplanted from its old location
++         * via steal_linear_addr(), so we need not perform PGT_non_coherent
++         * checking here.
++         */
+         rc = GNTST_okay;
++    }
+ 
+  out_unlock:
+     page_unlock(page);
+diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
+index 8a9a43bb0a9d..7464167ae192 100644
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -53,8 +53,12 @@
+ #define _PGT_partial      PG_shift(8)
+ #define PGT_partial       PG_mask(1, 8)
+ 
++/* Has this page been mapped writeable with a non-coherent memory type? */
++#define _PGT_non_coherent PG_shift(9)
++#define PGT_non_coherent  PG_mask(1, 9)
++
+  /* Count of uses of this frame as its current type. */
+-#define PGT_count_width   PG_shift(8)
++#define PGT_count_width   PG_shift(9)
+ #define PGT_count_mask    ((1UL<<PGT_count_width)-1)
+ 
+ /* Are the 'type mask' bits identical? */
+-- 
+2.35.1
+
diff --git a/0029-x86-mm-account-for-PGT_pae_xen_l2-in-recently-added-.patch b/0029-x86-mm-account-for-PGT_pae_xen_l2-in-recently-added-.patch
new file mode 100644
index 0000000..0076984
--- /dev/null
+++ b/0029-x86-mm-account-for-PGT_pae_xen_l2-in-recently-added-.patch
@@ -0,0 +1,37 @@
+From 0b4e62847c5af1a59eea8d17093feccd550d1c26 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich@suse.com>
+Date: Fri, 10 Jun 2022 10:28:28 +0200
+Subject: [PATCH 29/32] x86/mm: account for PGT_pae_xen_l2 in recently added
+ assertion
+
+While PGT_pae_xen_l2 will be zapped once the type refcount of an L2 page
+reaches zero, it'll be retained as long as the type refcount is non-
+zero. Hence any checking against the requested type needs to either zap
+the bit from the type or include it in the used mask.
+
+Fixes: 9186e96b199e ("x86/pv: Clean up _get_page_type()")
+Signed-off-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
+master commit: c2095ac76be0f4a1940346c9ffb49fb967345060
+master date: 2022-06-10 10:21:06 +0200
+---
+ xen/arch/x86/mm.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index bab9624fabb7..c1b9a3bb102a 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2928,7 +2928,8 @@ static int _get_page_type(struct page_info *page, unsigned long type,
+              * The page is in one of two states (depending on PGT_partial),
+              * and should have exactly one reference.
+              */
+-            ASSERT((x & (PGT_type_mask | PGT_count_mask)) == (type | 1));
++            ASSERT((x & (PGT_type_mask | PGT_pae_xen_l2 | PGT_count_mask)) ==
++                   (type | 1));
+ 
+             if ( !(x & PGT_partial) )
+             {
+-- 
+2.35.1
+
diff --git a/0030-x86-spec-ctrl-Make-VERW-flushing-runtime-conditional.patch b/0030-x86-spec-ctrl-Make-VERW-flushing-runtime-conditional.patch
new file mode 100644
index 0000000..8556452
--- /dev/null
+++ b/0030-x86-spec-ctrl-Make-VERW-flushing-runtime-conditional.patch
@@ -0,0 +1,258 @@
+From 0e80f9f61168d4e4f008da75762cee0118f802ed Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Mon, 13 Jun 2022 16:19:01 +0100
+Subject: [PATCH 30/32] x86/spec-ctrl: Make VERW flushing runtime conditional
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Currently, VERW flushing to mitigate MDS is boot time conditional per domain
+type.  However, to provide mitigations for DRPW (CVE-2022-21166), we need to
+conditionally use VERW based on the trustworthiness of the guest, and the
+devices passed through.
+
+Remove the PV/HVM alternatives and instead issue a VERW on the return-to-guest
+path depending on the SCF_verw bit in cpuinfo spec_ctrl_flags.
+
+Introduce spec_ctrl_init_domain() and d->arch.verw to calculate the VERW
+disposition at domain creation time, and context switch the SCF_verw bit.
+
+For now, VERW flushing is used and controlled exactly as before, but later
+patches will add per-domain cases too.
+
+No change in behaviour.
+
+This is part of XSA-404.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+(cherry picked from commit e06b95c1d44ab80da255219fc9f1e2fc423edcb6)
+---
+ docs/misc/xen-command-line.pandoc   |  5 ++---
+ xen/arch/x86/domain.c               | 12 ++++++++++--
+ xen/arch/x86/hvm/vmx/entry.S        |  2 +-
+ xen/arch/x86/spec_ctrl.c            | 30 +++++++++++++++++------------
+ xen/include/asm-x86/cpufeatures.h   |  3 +--
+ xen/include/asm-x86/domain.h        |  3 +++
+ xen/include/asm-x86/spec_ctrl.h     |  2 ++
+ xen/include/asm-x86/spec_ctrl_asm.h | 16 +++++++++++++--
+ 8 files changed, 51 insertions(+), 22 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index 1d08fb7e9aa6..d5cb09f86541 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -2258,9 +2258,8 @@ in place for guests to use.
+ Use of a positive boolean value for either of these options is invalid.
+ 
+ The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` offer fine
+-grained control over the alternative blocks used by Xen.  These impact Xen's
+-ability to protect itself, and Xen's ability to virtualise support for guests
+-to use.
++grained control over the primitives by Xen.  These impact Xen's ability to
++protect itself, and Xen's ability to virtualise support for guests to use.
+ 
+ * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests
+   respectively.
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index ef1812dc1402..1fe6644a71ae 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -863,6 +863,8 @@ int arch_domain_create(struct domain *d,
+ 
+     d->arch.msr_relaxed = config->arch.misc_flags & XEN_X86_MSR_RELAXED;
+ 
++    spec_ctrl_init_domain(d);
++
+     return 0;
+ 
+  fail:
+@@ -2017,14 +2019,15 @@ static void __context_switch(void)
+ void context_switch(struct vcpu *prev, struct vcpu *next)
+ {
+     unsigned int cpu = smp_processor_id();
++    struct cpu_info *info = get_cpu_info();
+     const struct domain *prevd = prev->domain, *nextd = next->domain;
+     unsigned int dirty_cpu = read_atomic(&next->dirty_cpu);
+ 
+     ASSERT(prev != next);
+     ASSERT(local_irq_is_enabled());
+ 
+-    get_cpu_info()->use_pv_cr3 = false;
+-    get_cpu_info()->xen_cr3 = 0;
++    info->use_pv_cr3 = false;
++    info->xen_cr3 = 0;
+ 
+     if ( unlikely(dirty_cpu != cpu) && dirty_cpu != VCPU_CPU_CLEAN )
+     {
+@@ -2088,6 +2091,11 @@ void context_switch(struct vcpu *prev, struct vcpu *next)
+                 *last_id = next_id;
+             }
+         }
++
++        /* Update the top-of-stack block with the VERW disposition. */
++        info->spec_ctrl_flags &= ~SCF_verw;
++        if ( nextd->arch.verw )
++            info->spec_ctrl_flags |= SCF_verw;
+     }
+ 
+     sched_context_switched(prev, next);
+diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
+index 49651f3c435a..5f5de45a1309 100644
+--- a/xen/arch/x86/hvm/vmx/entry.S
++++ b/xen/arch/x86/hvm/vmx/entry.S
+@@ -87,7 +87,7 @@ UNLIKELY_END(realmode)
+ 
+         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
+         /* SPEC_CTRL_EXIT_TO_VMX   Req: %rsp=regs/cpuinfo              Clob:    */
+-        ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), X86_FEATURE_SC_VERW_HVM
++        DO_SPEC_CTRL_COND_VERW
+ 
+         mov  VCPU_hvm_guest_cr2(%rbx),%rax
+ 
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index c19464da70ce..21730aa03071 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -36,8 +36,8 @@ static bool __initdata opt_msr_sc_pv = true;
+ static bool __initdata opt_msr_sc_hvm = true;
+ static int8_t __initdata opt_rsb_pv = -1;
+ static bool __initdata opt_rsb_hvm = true;
+-static int8_t __initdata opt_md_clear_pv = -1;
+-static int8_t __initdata opt_md_clear_hvm = -1;
++static int8_t __read_mostly opt_md_clear_pv = -1;
++static int8_t __read_mostly opt_md_clear_hvm = -1;
+ 
+ /* Cmdline controls for Xen's speculative settings. */
+ static enum ind_thunk {
+@@ -932,6 +932,13 @@ static __init void mds_calculations(uint64_t caps)
+     }
+ }
+ 
++void spec_ctrl_init_domain(struct domain *d)
++{
++    bool pv = is_pv_domain(d);
++
++    d->arch.verw = pv ? opt_md_clear_pv : opt_md_clear_hvm;
++}
++
+ void __init init_speculation_mitigations(void)
+ {
+     enum ind_thunk thunk = THUNK_DEFAULT;
+@@ -1196,21 +1203,20 @@ void __init init_speculation_mitigations(void)
+                             boot_cpu_has(X86_FEATURE_MD_CLEAR));
+ 
+     /*
+-     * Enable MDS defences as applicable.  The PV blocks need using all the
+-     * time, and the Idle blocks need using if either PV or HVM defences are
+-     * used.
++     * Enable MDS defences as applicable.  The Idle blocks need using if
++     * either PV or HVM defences are used.
+      *
+      * HVM is more complicated.  The MD_CLEAR microcode extends L1D_FLUSH with
+-     * equivelent semantics to avoid needing to perform both flushes on the
+-     * HVM path.  The HVM blocks don't need activating if our hypervisor told
+-     * us it was handling L1D_FLUSH, or we are using L1D_FLUSH ourselves.
++     * equivalent semantics to avoid needing to perform both flushes on the
++     * HVM path.  Therefore, we don't need VERW in addition to L1D_FLUSH.
++     *
++     * After calculating the appropriate idle setting, simplify
++     * opt_md_clear_hvm to mean just "should we VERW on the way into HVM
++     * guests", so spec_ctrl_init_domain() can calculate suitable settings.
+      */
+-    if ( opt_md_clear_pv )
+-        setup_force_cpu_cap(X86_FEATURE_SC_VERW_PV);
+     if ( opt_md_clear_pv || opt_md_clear_hvm )
+         setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
+-    if ( opt_md_clear_hvm && !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush )
+-        setup_force_cpu_cap(X86_FEATURE_SC_VERW_HVM);
++    opt_md_clear_hvm &= !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush;
+ 
+     /*
+      * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT
+diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
+index ff3157d52d13..bd45a144ee78 100644
+--- a/xen/include/asm-x86/cpufeatures.h
++++ b/xen/include/asm-x86/cpufeatures.h
+@@ -35,8 +35,7 @@ XEN_CPUFEATURE(SC_RSB_HVM,        X86_SYNTH(19)) /* RSB overwrite needed for HVM
+ XEN_CPUFEATURE(XEN_SELFSNOOP,     X86_SYNTH(20)) /* SELFSNOOP gets used by Xen itself */
+ XEN_CPUFEATURE(SC_MSR_IDLE,       X86_SYNTH(21)) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */
+ XEN_CPUFEATURE(XEN_LBR,           X86_SYNTH(22)) /* Xen uses MSR_DEBUGCTL.LBR */
+-XEN_CPUFEATURE(SC_VERW_PV,        X86_SYNTH(23)) /* VERW used by Xen for PV */
+-XEN_CPUFEATURE(SC_VERW_HVM,       X86_SYNTH(24)) /* VERW used by Xen for HVM */
++/* Bits 23,24 unused. */
+ XEN_CPUFEATURE(SC_VERW_IDLE,      X86_SYNTH(25)) /* VERW used by Xen for idle */
+ XEN_CPUFEATURE(XEN_SHSTK,         X86_SYNTH(26)) /* Xen uses CET Shadow Stacks */
+ XEN_CPUFEATURE(XEN_IBT,           X86_SYNTH(27)) /* Xen uses CET Indirect Branch Tracking */
+diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
+index 92d54de0b9a1..2398a1d99da9 100644
+--- a/xen/include/asm-x86/domain.h
++++ b/xen/include/asm-x86/domain.h
+@@ -319,6 +319,9 @@ struct arch_domain
+     uint32_t pci_cf8;
+     uint8_t cmos_idx;
+ 
++    /* Use VERW on return-to-guest for its flushing side effect. */
++    bool verw;
++
+     union {
+         struct pv_domain pv;
+         struct hvm_domain hvm;
+diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
+index f76029523610..751355f471f4 100644
+--- a/xen/include/asm-x86/spec_ctrl.h
++++ b/xen/include/asm-x86/spec_ctrl.h
+@@ -24,6 +24,7 @@
+ #define SCF_use_shadow (1 << 0)
+ #define SCF_ist_wrmsr  (1 << 1)
+ #define SCF_ist_rsb    (1 << 2)
++#define SCF_verw       (1 << 3)
+ 
+ #ifndef __ASSEMBLY__
+ 
+@@ -32,6 +33,7 @@
+ #include <asm/msr-index.h>
+ 
+ void init_speculation_mitigations(void);
++void spec_ctrl_init_domain(struct domain *d);
+ 
+ extern bool opt_ibpb;
+ extern bool opt_ssbd;
+diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
+index 02b3b18ce69f..5a590bac44aa 100644
+--- a/xen/include/asm-x86/spec_ctrl_asm.h
++++ b/xen/include/asm-x86/spec_ctrl_asm.h
+@@ -136,6 +136,19 @@
+ #endif
+ .endm
+ 
++.macro DO_SPEC_CTRL_COND_VERW
++/*
++ * Requires %rsp=cpuinfo
++ *
++ * Issue a VERW for its flushing side effect, if indicated.  This is a Spectre
++ * v1 gadget, but the IRET/VMEntry is serialising.
++ */
++    testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp)
++    jz .L\@_verw_skip
++    verw CPUINFO_verw_sel(%rsp)
++.L\@_verw_skip:
++.endm
++
+ .macro DO_SPEC_CTRL_ENTRY maybexen:req
+ /*
+  * Requires %rsp=regs (also cpuinfo if !maybexen)
+@@ -231,8 +244,7 @@
+ #define SPEC_CTRL_EXIT_TO_PV                                            \
+     ALTERNATIVE "",                                                     \
+         DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV;              \
+-    ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)),           \
+-        X86_FEATURE_SC_VERW_PV
++    DO_SPEC_CTRL_COND_VERW
+ 
+ /*
+  * Use in IST interrupt/exception context.  May interrupt Xen or PV context.
+-- 
+2.35.1
+
diff --git a/0031-x86-spec-ctrl-Enumeration-for-MMIO-Stale-Data-contro.patch b/0031-x86-spec-ctrl-Enumeration-for-MMIO-Stale-Data-contro.patch
new file mode 100644
index 0000000..6934800
--- /dev/null
+++ b/0031-x86-spec-ctrl-Enumeration-for-MMIO-Stale-Data-contro.patch
@@ -0,0 +1,98 @@
+From a83108736db0ddaa5855f5abda6dcc8ae4fe25e9 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Mon, 20 Sep 2021 18:47:49 +0100
+Subject: [PATCH 31/32] x86/spec-ctrl: Enumeration for MMIO Stale Data controls
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The three *_NO bits indicate non-susceptibility to the SSDP, FBSDP and PSDP
+data movement primitives.
+
+FB_CLEAR indicates that the VERW instruction has re-gained it's Fill Buffer
+flushing side effect.  This is only enumerated on parts where VERW had
+previously lost it's flushing side effect due to the MDS/TAA vulnerabilities
+being fixed in hardware.
+
+FB_CLEAR_CTRL is available on a subset of FB_CLEAR parts where the Fill Buffer
+clearing side effect of VERW can be turned off for performance reasons.
+
+This is part of XSA-404.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+(cherry picked from commit 2ebe8fe9b7e0d36e9ec3cfe4552b2b197ef0dcec)
+---
+ xen/arch/x86/spec_ctrl.c        | 11 ++++++++---
+ xen/include/asm-x86/msr-index.h |  6 ++++++
+ 2 files changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 21730aa03071..d285538bde9f 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -323,7 +323,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+      * Hardware read-only information, stating immunity to certain issues, or
+      * suggestions of which mitigation to use.
+      */
+-    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s\n",
++    printk("  Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+            (caps & ARCH_CAPS_RDCL_NO)                        ? " RDCL_NO"        : "",
+            (caps & ARCH_CAPS_IBRS_ALL)                       ? " IBRS_ALL"       : "",
+            (caps & ARCH_CAPS_RSBA)                           ? " RSBA"           : "",
+@@ -332,13 +332,16 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+            (caps & ARCH_CAPS_SSB_NO)                         ? " SSB_NO"         : "",
+            (caps & ARCH_CAPS_MDS_NO)                         ? " MDS_NO"         : "",
+            (caps & ARCH_CAPS_TAA_NO)                         ? " TAA_NO"         : "",
++           (caps & ARCH_CAPS_SBDR_SSDP_NO)                   ? " SBDR_SSDP_NO"   : "",
++           (caps & ARCH_CAPS_FBSDP_NO)                       ? " FBSDP_NO"       : "",
++           (caps & ARCH_CAPS_PSDP_NO)                        ? " PSDP_NO"        : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS))    ? " IBRS_ALWAYS"    : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS))   ? " STIBP_ALWAYS"   : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_FAST))      ? " IBRS_FAST"      : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBRS_SAME_MODE)) ? " IBRS_SAME_MODE" : "");
+ 
+     /* Hardware features which need driving to mitigate issues. */
+-    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s\n",
++    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBPB)) ||
+            (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB))          ? " IBPB"           : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBRS)) ||
+@@ -353,7 +356,9 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+            (_7d0 & cpufeat_mask(X86_FEATURE_MD_CLEAR))       ? " MD_CLEAR"       : "",
+            (_7d0 & cpufeat_mask(X86_FEATURE_SRBDS_CTRL))     ? " SRBDS_CTRL"     : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_VIRT_SSBD))      ? " VIRT_SSBD"      : "",
+-           (caps & ARCH_CAPS_TSX_CTRL)                       ? " TSX_CTRL"       : "");
++           (caps & ARCH_CAPS_TSX_CTRL)                       ? " TSX_CTRL"       : "",
++           (caps & ARCH_CAPS_FB_CLEAR)                       ? " FB_CLEAR"       : "",
++           (caps & ARCH_CAPS_FB_CLEAR_CTRL)                  ? " FB_CLEAR_CTRL"  : "");
+ 
+     /* Compiled-in support which pertains to mitigations. */
+     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) )
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index 31964b88af7a..72bc32ba04ff 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -66,6 +66,11 @@
+ #define  ARCH_CAPS_IF_PSCHANGE_MC_NO        (_AC(1, ULL) <<  6)
+ #define  ARCH_CAPS_TSX_CTRL                 (_AC(1, ULL) <<  7)
+ #define  ARCH_CAPS_TAA_NO                   (_AC(1, ULL) <<  8)
++#define  ARCH_CAPS_SBDR_SSDP_NO             (_AC(1, ULL) << 13)
++#define  ARCH_CAPS_FBSDP_NO                 (_AC(1, ULL) << 14)
++#define  ARCH_CAPS_PSDP_NO                  (_AC(1, ULL) << 15)
++#define  ARCH_CAPS_FB_CLEAR                 (_AC(1, ULL) << 17)
++#define  ARCH_CAPS_FB_CLEAR_CTRL            (_AC(1, ULL) << 18)
+ 
+ #define MSR_FLUSH_CMD                       0x0000010b
+ #define  FLUSH_CMD_L1D                      (_AC(1, ULL) <<  0)
+@@ -83,6 +88,7 @@
+ #define  MCU_OPT_CTRL_RNGDS_MITG_DIS        (_AC(1, ULL) <<  0)
+ #define  MCU_OPT_CTRL_RTM_ALLOW             (_AC(1, ULL) <<  1)
+ #define  MCU_OPT_CTRL_RTM_LOCKED            (_AC(1, ULL) <<  2)
++#define  MCU_OPT_CTRL_FB_CLEAR_DIS          (_AC(1, ULL) <<  3)
+ 
+ #define MSR_RTIT_OUTPUT_BASE                0x00000560
+ #define MSR_RTIT_OUTPUT_MASK                0x00000561
+-- 
+2.35.1
+
diff --git a/0032-x86-spec-ctrl-Add-spec-ctrl-unpriv-mmio.patch b/0032-x86-spec-ctrl-Add-spec-ctrl-unpriv-mmio.patch
new file mode 100644
index 0000000..a5ac3e9
--- /dev/null
+++ b/0032-x86-spec-ctrl-Add-spec-ctrl-unpriv-mmio.patch
@@ -0,0 +1,187 @@
+From 2e82446cb252f6c8ac697e81f4155872c69afde4 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3@citrix.com>
+Date: Mon, 13 Jun 2022 19:18:32 +0100
+Subject: [PATCH 32/32] x86/spec-ctrl: Add spec-ctrl=unpriv-mmio
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Per Xen's support statement, PCI passthrough should be to trusted domains
+because the overall system security depends on factors outside of Xen's
+control.
+
+As such, Xen, in a supported configuration, is not vulnerable to DRPW/SBDR.
+
+However, users who have risk assessed their configuration may be happy with
+the risk of DoS, but unhappy with the risk of cross-domain data leakage.  Such
+users should enable this option.
+
+On CPUs vulnerable to MDS, the existing mitigations are the best we can do to
+mitigate MMIO cross-domain data leakage.
+
+On CPUs fixed to MDS but vulnerable MMIO stale data leakage, this option:
+
+ * On CPUs susceptible to FBSDP, mitigates cross-domain fill buffer leakage
+   using FB_CLEAR.
+ * On CPUs susceptible to SBDR, mitigates RNG data recovery by engaging the
+   srb-lock, previously used to mitigate SRBDS.
+
+Both mitigations require microcode from IPU 2022.1, May 2022.
+
+This is part of XSA-404.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
+Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
+(cherry picked from commit 8c24b70fedcb52633b2370f834d8a2be3f7fa38e)
+---
+ docs/misc/xen-command-line.pandoc | 14 +++++++--
+ xen/arch/x86/spec_ctrl.c          | 48 ++++++++++++++++++++++++-------
+ 2 files changed, 48 insertions(+), 14 deletions(-)
+
+diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
+index d5cb09f86541..a642e43476a2 100644
+--- a/docs/misc/xen-command-line.pandoc
++++ b/docs/misc/xen-command-line.pandoc
+@@ -2235,7 +2235,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
+ ### spec-ctrl (x86)
+ > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb,md-clear}=<bool>,
+ >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu,
+->              l1d-flush,branch-harden,srb-lock}=<bool> ]`
++>              l1d-flush,branch-harden,srb-lock,unpriv-mmio}=<bool> ]`
+ 
+ Controls for speculative execution sidechannel mitigations.  By default, Xen
+ will pick the most appropriate mitigations based on compiled in support,
+@@ -2314,8 +2314,16 @@ Xen will enable this mitigation.
+ On hardware supporting SRBDS_CTRL, the `srb-lock=` option can be used to force
+ or prevent Xen from protect the Special Register Buffer from leaking stale
+ data. By default, Xen will enable this mitigation, except on parts where MDS
+-is fixed and TAA is fixed/mitigated (in which case, there is believed to be no
+-way for an attacker to obtain the stale data).
++is fixed and TAA is fixed/mitigated and there are no unprivileged MMIO
++mappings (in which case, there is believed to be no way for an attacker to
++obtain stale data).
++
++The `unpriv-mmio=` boolean indicates whether the system has (or will have)
++less than fully privileged domains granted access to MMIO devices.  By
++default, this option is disabled.  If enabled, Xen will use the `FB_CLEAR`
++and/or `SRBDS_CTRL` functionality available in the Intel May 2022 microcode
++release to mitigate cross-domain leakage of data via the MMIO Stale Data
++vulnerabilities.
+ 
+ ### sync_console
+ > `= <boolean>`
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index d285538bde9f..099113ba41e6 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -67,6 +67,8 @@ static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */
+ static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */
+ 
+ static int8_t __initdata opt_srb_lock = -1;
++static bool __initdata opt_unpriv_mmio;
++static bool __read_mostly opt_fb_clear_mmio;
+ 
+ static int __init parse_spec_ctrl(const char *s)
+ {
+@@ -184,6 +186,8 @@ static int __init parse_spec_ctrl(const char *s)
+             opt_branch_harden = val;
+         else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 )
+             opt_srb_lock = val;
++        else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 )
++            opt_unpriv_mmio = val;
+         else
+             rc = -EINVAL;
+ 
+@@ -392,7 +396,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+            opt_srb_lock                              ? " SRB_LOCK+" : " SRB_LOCK-",
+            opt_ibpb                                  ? " IBPB"  : "",
+            opt_l1d_flush                             ? " L1D_FLUSH" : "",
+-           opt_md_clear_pv || opt_md_clear_hvm       ? " VERW"  : "",
++           opt_md_clear_pv || opt_md_clear_hvm ||
++           opt_fb_clear_mmio                         ? " VERW"  : "",
+            opt_branch_harden                         ? " BRANCH_HARDEN" : "");
+ 
+     /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
+@@ -941,7 +946,9 @@ void spec_ctrl_init_domain(struct domain *d)
+ {
+     bool pv = is_pv_domain(d);
+ 
+-    d->arch.verw = pv ? opt_md_clear_pv : opt_md_clear_hvm;
++    d->arch.verw =
++        (pv ? opt_md_clear_pv : opt_md_clear_hvm) ||
++        (opt_fb_clear_mmio && is_iommu_enabled(d));
+ }
+ 
+ void __init init_speculation_mitigations(void)
+@@ -1195,6 +1202,18 @@ void __init init_speculation_mitigations(void)
+ 
+     mds_calculations(caps);
+ 
++    /*
++     * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have
++     * reintroduced the VERW fill buffer flushing side effect because of a
++     * susceptibility to FBSDP.
++     *
++     * If unprivileged guests have (or will have) MMIO mappings, we can
++     * mitigate cross-domain leakage of fill buffer data by issuing VERW on
++     * the return-to-guest path.
++     */
++    if ( opt_unpriv_mmio )
++        opt_fb_clear_mmio = caps & ARCH_CAPS_FB_CLEAR;
++
+     /*
+      * By default, enable PV and HVM mitigations on MDS-vulnerable hardware.
+      * This will only be a token effort for MLPDS/MFBDS when HT is enabled,
+@@ -1208,18 +1227,20 @@ void __init init_speculation_mitigations(void)
+                             boot_cpu_has(X86_FEATURE_MD_CLEAR));
+ 
+     /*
+-     * Enable MDS defences as applicable.  The Idle blocks need using if
+-     * either PV or HVM defences are used.
++     * Enable MDS/MMIO defences as applicable.  The Idle blocks need using if
++     * either the PV or HVM MDS defences are used, or if we may give MMIO
++     * access to untrusted guests.
+      *
+      * HVM is more complicated.  The MD_CLEAR microcode extends L1D_FLUSH with
+      * equivalent semantics to avoid needing to perform both flushes on the
+-     * HVM path.  Therefore, we don't need VERW in addition to L1D_FLUSH.
++     * HVM path.  Therefore, we don't need VERW in addition to L1D_FLUSH (for
++     * MDS mitigations.  L1D_FLUSH is not safe for MMIO mitigations.)
+      *
+      * After calculating the appropriate idle setting, simplify
+      * opt_md_clear_hvm to mean just "should we VERW on the way into HVM
+      * guests", so spec_ctrl_init_domain() can calculate suitable settings.
+      */
+-    if ( opt_md_clear_pv || opt_md_clear_hvm )
++    if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio )
+         setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE);
+     opt_md_clear_hvm &= !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush;
+ 
+@@ -1284,14 +1305,19 @@ void __init init_speculation_mitigations(void)
+      * On some SRBDS-affected hardware, it may be safe to relax srb-lock by
+      * default.
+      *
+-     * On parts which enumerate MDS_NO and not TAA_NO, TSX is the only known
+-     * way to access the Fill Buffer.  If TSX isn't available (inc. SKU
+-     * reasons on some models), or TSX is explicitly disabled, then there is
+-     * no need for the extra overhead to protect RDRAND/RDSEED.
++     * All parts with SRBDS_CTRL suffer SSDP, the mechanism by which stale RNG
++     * data becomes available to other contexts.  To recover the data, an
++     * attacker needs to use:
++     *  - SBDS (MDS or TAA to sample the cores fill buffer)
++     *  - SBDR (Architecturally retrieve stale transaction buffer contents)
++     *  - DRPW (Architecturally latch stale fill buffer data)
++     *
++     * On MDS_NO parts, and with TAA_NO or TSX unavailable/disabled, and there
++     * is no unprivileged MMIO access, the RNG data doesn't need protecting.
+      */
+     if ( cpu_has_srbds_ctrl )
+     {
+-        if ( opt_srb_lock == -1 &&
++        if ( opt_srb_lock == -1 && !opt_unpriv_mmio &&
+              (caps & (ARCH_CAPS_MDS_NO|ARCH_CAPS_TAA_NO)) == ARCH_CAPS_MDS_NO &&
+              (!cpu_has_hle || ((caps & ARCH_CAPS_TSX_CTRL) && rtm_disabled)) )
+             opt_srb_lock = 0;
+-- 
+2.35.1
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0e845b5
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/create-patches b/create-patches
new file mode 100755
index 0000000..8e8c9fa
--- /dev/null
+++ b/create-patches
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+cd "${SCRIPT_DIR}"
+
+if [[ ! -v XEN_REPO_DIR ]]; then
+	XEN_REPO_DIR="${HOME}/repos/xen"
+fi
+
+XEN_VERSION="${1}"
+OUR_PATCHES_VERSION="${2}"
+
+XEN_VER_COMPONENTS=( ${XEN_VERSION//./ } )
+XEN_MAJOR_MINOR_VERSION="${XEN_VER_COMPONENTS[0]}.${XEN_VER_COMPONENTS[1]}"
+
+
+git -C "${XEN_REPO_DIR}" fetch origin
+
+readarray -d '' CURRENT_PATCHES < <(find . -maxdepth 1 -type f -name "*.patch" -print0)
+if [[ ${CURRENT_PATCHES[@]} -gt 0 ]]; then
+	git rm -f *.patch
+fi
+
+PATCH_RANGE_START="RELEASE-${XEN_VERSION}"
+PATCH_RANGE_END="staging-${XEN_MAJOR_MINOR_VERSION}"
+git -C "${XEN_REPO_DIR}" format-patch \
+	-o "${SCRIPT_DIR}" \
+	${PATCH_RANGE_START}..origin/${PATCH_RANGE_END}
+
+XEN_NEXT_PATCHLEVEL=$((XEN_VER_COMPONENTS[2]+1))
+XEN_NEXT_VERSION="${XEN_MAJOR_MINOR_VERSION}.${XEN_NEXT_PATCHLEVEL}"
+
+PATCH_RANGE_START_ID=$(git -C "${XEN_REPO_DIR}" rev-parse ${PATCH_RANGE_START})
+PATCH_RANGE_END_ID=$(git -C "${XEN_REPO_DIR}" rev-parse ${PATCH_RANGE_END})
+
+cat <<EOF > "info.txt"
+Xen upstream patchset #${OUR_PATCHES_VERSION} for ${XEN_NEXT_VERSION}-pre
+
+Containing patches from
+$PATCH_RANGE_START ($PATCH_RANGE_START_ID)
+to
+$PATCH_RANGE_END ($PATCH_RANGE_END_ID)
+EOF
+
+git add \
+	info.txt \
+	*.patch
+
+TAG="${XEN_NEXT_VERSION}-pre-patchset-${OUR_PATCHES_VERSION}"
+DESCRIPTION="Xen ${TAG}"
+
+git commit \
+	--signoff \
+	-m "${DESCRIPTION}"
+
+git tag \
+	-s \
+	-m "${DESCRIPTION}" \
+	"${TAG}"
diff --git a/info.txt b/info.txt
new file mode 100644
index 0000000..2310ace
--- /dev/null
+++ b/info.txt
@@ -0,0 +1,6 @@
+Xen Upstream Patchset #0 for 4.16.2-pre
+
+Containing patches from
+RELEASE-4.16.1 (13fee86475f3831d7a1ecf6d7e0acbc2ac779f7e)
+to
+staging-4.16 (2e82446cb252f6c8ac697e81f4155872c69afde4)
author	Florian Schmaus <flow@gentoo.org>	2022-07-01 20:20:49 +0200
committer	Florian Schmaus <flow@gentoo.org>	2022-07-01 20:26:39 +0200
commit	a16128cfac11b2f6462bbbc993cced2636abb312 (patch)
tree	19b04c0f36afd341ba8a0ffbedf91ada49a332f1
download	xen-upstream-patches-a16128cfac11b2f6462bbbc993cced2636abb312.tar.gz xen-upstream-patches-a16128cfac11b2f6462bbbc993cced2636abb312.tar.bz2 xen-upstream-patches-a16128cfac11b2f6462bbbc993cced2636abb312.zip