diff options
Diffstat (limited to '0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch')
-rw-r--r-- | 0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch | 106 |
1 files changed, 0 insertions, 106 deletions
diff --git a/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch b/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch deleted file mode 100644 index 4a46326..0000000 --- a/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 846fb984b506135917c2862d2e4607005d6afdeb Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 2 Apr 2024 16:20:09 +0200 -Subject: [PATCH 65/67] x86/boot: Improve the boot watchdog determination of - stuck cpus -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Right now, check_nmi_watchdog() has two processing loops over all online CPUs -using prev_nmi_count as storage. - -Use a cpumask_t instead (1/32th as much initdata) and have wait_for_nmis() -make the determination of whether it is stuck, rather than having both -functions needing to agree on how many ticks mean stuck. - -More importantly though, it means we can use the standard cpumask -infrastructure, including turning this: - - (XEN) Brought up 512 CPUs - (XEN) Testing NMI watchdog on all CPUs: {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511} stuck - -into the rather more manageable: - - (XEN) Brought up 512 CPUs - (XEN) Testing NMI watchdog on all CPUs: {0-511} stuck - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: 9e18f339830c828798aef465556d4029d83476a0 -master date: 2024-03-19 18:29:37 +0000 ---- - xen/arch/x86/nmi.c | 33 ++++++++++++++------------------- - 1 file changed, 14 insertions(+), 19 deletions(-) - -diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c -index 7c9591b65e..dd31034ac8 100644 ---- a/xen/arch/x86/nmi.c -+++ b/xen/arch/x86/nmi.c -@@ -150,6 +150,8 @@ int nmi_active; - - static void __init cf_check wait_for_nmis(void *p) - { -+ cpumask_t *stuck_cpus = p; -+ unsigned int cpu = smp_processor_id(); - unsigned int start_count = this_cpu(nmi_count); - unsigned long ticks = 10 * 1000 * cpu_khz / nmi_hz; - unsigned long s, e; -@@ -158,42 +160,35 @@ static void __init cf_check wait_for_nmis(void *p) - do { - cpu_relax(); - if ( this_cpu(nmi_count) >= start_count + 2 ) -- break; -+ return; -+ - e = rdtsc(); -- } while( e - s < ticks ); -+ } while ( e - s < ticks ); -+ -+ /* Timeout. Mark ourselves as stuck. */ -+ cpumask_set_cpu(cpu, stuck_cpus); - } - - void __init check_nmi_watchdog(void) - { -- static unsigned int __initdata prev_nmi_count[NR_CPUS]; -- int cpu; -- bool ok = true; -+ static cpumask_t __initdata stuck_cpus; - - if ( nmi_watchdog == NMI_NONE ) - return; - - printk("Testing NMI watchdog on all CPUs:"); - -- for_each_online_cpu ( cpu ) -- prev_nmi_count[cpu] = per_cpu(nmi_count, cpu); -- - /* - * Wait at most 10 ticks for 2 watchdog NMIs on each CPU. - * Busy-wait on all CPUs: the LAPIC counter that the NMI watchdog - * uses only runs while the core's not halted - */ -- on_selected_cpus(&cpu_online_map, wait_for_nmis, NULL, 1); -- -- for_each_online_cpu ( cpu ) -- { -- if ( per_cpu(nmi_count, cpu) - prev_nmi_count[cpu] < 2 ) -- { -- printk(" %d", cpu); -- ok = false; -- } -- } -+ on_selected_cpus(&cpu_online_map, wait_for_nmis, &stuck_cpus, 1); - -- printk(" %s\n", ok ? "ok" : "stuck"); -+ if ( cpumask_empty(&stuck_cpus) ) -+ printk("ok\n"); -+ else -+ printk("{%*pbl} stuck\n", CPUMASK_PR(&stuck_cpus)); - - /* - * Now that we know it works we can reduce NMI frequency to --- -2.44.0 - |