1 files changed, 172 insertions, 0 deletions
diff --git a/0031-x86-irq-handle-moving-interrupts-in-_assign_irq_vect.patch b/0031-x86-irq-handle-moving-interrupts-in-_assign_irq_vect.patch
new file mode 100644
index 0000000..96e87cd
--- /dev/null
+++ b/0031-x86-irq-handle-moving-interrupts-in-_assign_irq_vect.patch
@@ -0,0 +1,172 @@
+From 3a8f4ec75d8ed8da6370deac95c341cbada96802 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com>
+Date: Wed, 26 Jun 2024 13:42:05 +0200
+Subject: [PATCH 31/56] x86/irq: handle moving interrupts in
+ _assign_irq_vector()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Currently there's logic in fixup_irqs() that attempts to prevent
+_assign_irq_vector() from failing, as fixup_irqs() is required to evacuate all
+interrupts from the CPUs not present in the input mask.  The current logic in
+fixup_irqs() is incomplete, as it doesn't deal with interrupts that have
+move_cleanup_count > 0 and a non-empty ->arch.old_cpu_mask field.
+
+Instead of attempting to fixup the interrupt descriptor in fixup_irqs() so that
+_assign_irq_vector() cannot fail, introduce logic in _assign_irq_vector()
+to deal with interrupts that have either move_{in_progress,cleanup_count} set
+and no remaining online CPUs in ->arch.cpu_mask.
+
+If _assign_irq_vector() is requested to move an interrupt in the state
+described above, first attempt to see if ->arch.old_cpu_mask contains any valid
+CPUs that could be used as fallback, and if that's the case do move the
+interrupt back to the previous destination.  Note this is easier because the
+vector hasn't been released yet, so there's no need to allocate and setup a new
+vector on the destination.
+
+Due to the logic in fixup_irqs() that clears offline CPUs from
+->arch.old_cpu_mask (and releases the old vector if the mask becomes empty) it
+shouldn't be possible to get into _assign_irq_vector() with
+->arch.move_{in_progress,cleanup_count} set but no online CPUs in
+->arch.old_cpu_mask.
+
+However if ->arch.move_{in_progress,cleanup_count} is set and the interrupt has
+also changed affinity, it's possible the members of ->arch.old_cpu_mask are no
+longer part of the affinity set, move the interrupt to a different CPU part of
+the provided mask and keep the current ->arch.old_{cpu_mask,vector} for the
+pending interrupt movement to be completed.
+
+Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
+Reviewed-by: Jan Beulich <jbeulich@suse.com>
+master commit: 369558924a642bbb0cb731e9a3375958867cb17b
+master date: 2024-06-18 15:15:10 +0200
+---
+ xen/arch/x86/irq.c | 97 ++++++++++++++++++++++++++++++++--------------
+ 1 file changed, 68 insertions(+), 29 deletions(-)
+
+diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
+index f877327975..13ef61a5b7 100644
+--- a/xen/arch/x86/irq.c
++++ b/xen/arch/x86/irq.c
+@@ -553,7 +553,58 @@ static int _assign_irq_vector(struct irq_desc *desc, const cpumask_t *mask)
+     }
+ 
+     if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count )
+-        return -EAGAIN;
++    {
++        /*
++         * If the current destination is online refuse to shuffle.  Retry after
++         * the in-progress movement has finished.
++         */
++        if ( cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map) )
++            return -EAGAIN;
++
++        /*
++         * Due to the logic in fixup_irqs() that clears offlined CPUs from
++         * ->arch.old_cpu_mask it shouldn't be possible to get here with
++         * ->arch.move_{in_progress,cleanup_count} set and no online CPUs in
++         * ->arch.old_cpu_mask.
++         */
++        ASSERT(valid_irq_vector(desc->arch.old_vector));
++        ASSERT(cpumask_intersects(desc->arch.old_cpu_mask, &cpu_online_map));
++
++        if ( cpumask_intersects(desc->arch.old_cpu_mask, mask) )
++        {
++            /*
++             * Fallback to the old destination if moving is in progress and the
++             * current destination is to be offlined.  This is only possible if
++             * the CPUs in old_cpu_mask intersect with the affinity mask passed
++             * in the 'mask' parameter.
++             */
++            desc->arch.vector = desc->arch.old_vector;
++            cpumask_and(desc->arch.cpu_mask, desc->arch.old_cpu_mask, mask);
++
++            /* Undo any possibly done cleanup. */
++            for_each_cpu(cpu, desc->arch.cpu_mask)
++                per_cpu(vector_irq, cpu)[desc->arch.vector] = irq;
++
++            /* Cancel the pending move and release the current vector. */
++            desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED;
++            cpumask_clear(desc->arch.old_cpu_mask);
++            desc->arch.move_in_progress = 0;
++            desc->arch.move_cleanup_count = 0;
++            if ( desc->arch.used_vectors )
++            {
++                ASSERT(test_bit(old_vector, desc->arch.used_vectors));
++                clear_bit(old_vector, desc->arch.used_vectors);
++            }
++
++            return 0;
++        }
++
++        /*
++         * There's an interrupt movement in progress but the destination(s) in
++         * ->arch.old_cpu_mask are not suitable given the 'mask' parameter, go
++         * through the full logic to find a new vector in a suitable CPU.
++         */
++    }
+ 
+     err = -ENOSPC;
+ 
+@@ -609,7 +660,22 @@ next:
+         current_vector = vector;
+         current_offset = offset;
+ 
+-        if ( valid_irq_vector(old_vector) )
++        if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count )
++        {
++            ASSERT(!cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map));
++            /*
++             * Special case when evacuating an interrupt from a CPU to be
++             * offlined and the interrupt was already in the process of being
++             * moved.  Leave ->arch.old_{vector,cpu_mask} as-is and just
++             * replace ->arch.{cpu_mask,vector} with the new destination.
++             * Cleanup will be done normally for the old fields, just release
++             * the current vector here.
++             */
++            if ( desc->arch.used_vectors &&
++                 !test_and_clear_bit(old_vector, desc->arch.used_vectors) )
++                ASSERT_UNREACHABLE();
++        }
++        else if ( valid_irq_vector(old_vector) )
+         {
+             cpumask_and(desc->arch.old_cpu_mask, desc->arch.cpu_mask,
+                         &cpu_online_map);
+@@ -2620,33 +2686,6 @@ void fixup_irqs(const cpumask_t *mask, bool verbose)
+             continue;
+         }
+ 
+-        /*
+-         * In order for the affinity adjustment below to be successful, we
+-         * need _assign_irq_vector() to succeed. This in particular means
+-         * clearing desc->arch.move_in_progress if this would otherwise
+-         * prevent the function from succeeding. Since there's no way for the
+-         * flag to get cleared anymore when there's no possible destination
+-         * left (the only possibility then would be the IRQs enabled window
+-         * after this loop), there's then also no race with us doing it here.
+-         *
+-         * Therefore the logic here and there need to remain in sync.
+-         */
+-        if ( desc->arch.move_in_progress &&
+-             !cpumask_intersects(mask, desc->arch.cpu_mask) )
+-        {
+-            unsigned int cpu;
+-
+-            cpumask_and(affinity, desc->arch.old_cpu_mask, &cpu_online_map);
+-
+-            spin_lock(&vector_lock);
+-            for_each_cpu(cpu, affinity)
+-                per_cpu(vector_irq, cpu)[desc->arch.old_vector] = ~irq;
+-            spin_unlock(&vector_lock);
+-
+-            release_old_vec(desc);
+-            desc->arch.move_in_progress = 0;
+-        }
+-
+         if ( !cpumask_intersects(mask, desc->affinity) )
+         {
+             break_affinity = true;
+-- 
+2.45.2
+