summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/2.6.22/20044_xen3-patch-2.6.19.patch1')
-rw-r--r--trunk/2.6.22/20044_xen3-patch-2.6.19.patch112637
1 files changed, 12637 insertions, 0 deletions
diff --git a/trunk/2.6.22/20044_xen3-patch-2.6.19.patch1 b/trunk/2.6.22/20044_xen3-patch-2.6.19.patch1
new file mode 100644
index 0000000..908b07d
--- /dev/null
+++ b/trunk/2.6.22/20044_xen3-patch-2.6.19.patch1
@@ -0,0 +1,12637 @@
+From: www.kernel.org
+Subject: Linux 2.6.19
+Patch-mainline: 2.6.19
+
+Automatically created from "patches.kernel.org/patch-2.6.19" by xen-port-patches.py
+
+Acked-by: jbeulich@novell.com
+
+Index: 10.3-2007-11-26/arch/i386/Kconfig
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/Kconfig 2007-09-03 09:52:56.000000000 +0200
++++ 10.3-2007-11-26/arch/i386/Kconfig 2007-10-22 13:53:08.000000000 +0200
+@@ -222,7 +222,7 @@ endchoice
+ config PARAVIRT
+ bool "Paravirtualization support (EXPERIMENTAL)"
+ depends on EXPERIMENTAL
+- depends on !(X86_VISWS || X86_VOYAGER)
++ depends on !(X86_VISWS || X86_VOYAGER || X86_XEN)
+ help
+ Paravirtualization is a way of running multiple instances of
+ Linux on the same machine, under a hypervisor. This option
+Index: 10.3-2007-11-26/arch/i386/kernel/acpi/Makefile
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/acpi/Makefile 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/acpi/Makefile 2007-10-22 13:53:08.000000000 +0200
+@@ -7,5 +7,7 @@ endif
+
+ ifdef CONFIG_XEN
+ include $(srctree)/scripts/Makefile.xen
++n-obj-xen := cstate.o
++obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+ obj-y := $(call cherrypickxen, $(obj-y), $(src))
+ endif
+Index: 10.3-2007-11-26/arch/i386/kernel/acpi/boot-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/acpi/boot-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/acpi/boot-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -26,9 +26,12 @@
+ #include <linux/init.h>
+ #include <linux/acpi.h>
+ #include <linux/efi.h>
++#include <linux/cpumask.h>
+ #include <linux/module.h>
+ #include <linux/dmi.h>
+ #include <linux/irq.h>
++#include <linux/bootmem.h>
++#include <linux/ioport.h>
+
+ #include <asm/pgtable.h>
+ #include <asm/io_apic.h>
+@@ -36,11 +39,17 @@
+ #include <asm/io.h>
+ #include <asm/mpspec.h>
+
+-#ifdef CONFIG_X86_64
++static int __initdata acpi_force = 0;
++
++#ifdef CONFIG_ACPI
++int acpi_disabled = 0;
++#else
++int acpi_disabled = 1;
++#endif
++EXPORT_SYMBOL(acpi_disabled);
+
+-extern void __init clustered_apic_check(void);
++#ifdef CONFIG_X86_64
+
+-extern int gsi_irq_sharing(int gsi);
+ #include <asm/proto.h>
+
+ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
+@@ -53,8 +62,6 @@ static inline int acpi_madt_oem_check(ch
+ #include <mach_mpparse.h>
+ #endif /* CONFIG_X86_LOCAL_APIC */
+
+-static inline int gsi_irq_sharing(int gsi) { return gsi; }
+-
+ #endif /* X86 */
+
+ #define BAD_MADT_ENTRY(entry, end) ( \
+@@ -63,7 +70,7 @@ static inline int gsi_irq_sharing(int gs
+
+ #define PREFIX "ACPI: "
+
+-int acpi_noirq __initdata; /* skip ACPI IRQ initialization */
++int acpi_noirq; /* skip ACPI IRQ initialization */
+ int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */
+ int acpi_ht __initdata = 1; /* enable HT */
+
+@@ -75,6 +82,7 @@ EXPORT_SYMBOL(acpi_strict);
+ acpi_interrupt_flags acpi_sci_flags __initdata;
+ int acpi_sci_override_gsi __initdata;
+ int acpi_skip_timer_override __initdata;
++int acpi_use_timer_override __initdata;
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+@@ -327,7 +335,7 @@ acpi_parse_ioapic(acpi_table_entry_heade
+ /*
+ * Parse Interrupt Source Override for the ACPI SCI
+ */
+-static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger)
++static void acpi_sci_ioapic_setup(u32 bus_irq, u32 gsi, u16 polarity, u16 trigger)
+ {
+ if (trigger == 0) /* compatible SCI trigger is level */
+ trigger = 3;
+@@ -347,13 +355,13 @@ static void acpi_sci_ioapic_setup(u32 gs
+ * If GSI is < 16, this will update its flags,
+ * else it will create a new mp_irqs[] entry.
+ */
+- mp_override_legacy_irq(gsi, polarity, trigger, gsi);
++ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+
+ /*
+ * stash over-ride to indicate we've been here
+ * and for later update of acpi_fadt
+ */
+- acpi_sci_override_gsi = gsi;
++ acpi_sci_override_gsi = bus_irq;
+ return;
+ }
+
+@@ -371,7 +379,7 @@ acpi_parse_int_src_ovr(acpi_table_entry_
+ acpi_table_print_madt_entry(header);
+
+ if (intsrc->bus_irq == acpi_fadt.sci_int) {
+- acpi_sci_ioapic_setup(intsrc->global_irq,
++ acpi_sci_ioapic_setup(intsrc->bus_irq, intsrc->global_irq,
+ intsrc->flags.polarity,
+ intsrc->flags.trigger);
+ return 0;
+@@ -461,12 +469,7 @@ void __init acpi_pic_sci_set_trigger(uns
+
+ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+ {
+-#ifdef CONFIG_X86_IO_APIC
+- if (use_pci_vector() && !platform_legacy_irq(gsi))
+- *irq = IO_APIC_VECTOR(gsi);
+- else
+-#endif
+- *irq = gsi_irq_sharing(gsi);
++ *irq = gsi;
+ return 0;
+ }
+
+@@ -508,16 +511,76 @@ EXPORT_SYMBOL(acpi_register_gsi);
+ #ifdef CONFIG_ACPI_HOTPLUG_CPU
+ int acpi_map_lsapic(acpi_handle handle, int *pcpu)
+ {
+- /* TBD */
+- return -EINVAL;
++ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
++ union acpi_object *obj;
++ struct acpi_table_lapic *lapic;
++ cpumask_t tmp_map, new_map;
++ u8 physid;
++ int cpu;
++
++ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
++ return -EINVAL;
++
++ if (!buffer.length || !buffer.pointer)
++ return -EINVAL;
++
++ obj = buffer.pointer;
++ if (obj->type != ACPI_TYPE_BUFFER ||
++ obj->buffer.length < sizeof(*lapic)) {
++ kfree(buffer.pointer);
++ return -EINVAL;
++ }
++
++ lapic = (struct acpi_table_lapic *)obj->buffer.pointer;
++
++ if ((lapic->header.type != ACPI_MADT_LAPIC) ||
++ (!lapic->flags.enabled)) {
++ kfree(buffer.pointer);
++ return -EINVAL;
++ }
++
++ physid = lapic->id;
++
++ kfree(buffer.pointer);
++ buffer.length = ACPI_ALLOCATE_BUFFER;
++ buffer.pointer = NULL;
++
++ tmp_map = cpu_present_map;
++ mp_register_lapic(physid, lapic->flags.enabled);
++
++ /*
++ * If mp_register_lapic successfully generates a new logical cpu
++ * number, then the following will get us exactly what was mapped
++ */
++ cpus_andnot(new_map, cpu_present_map, tmp_map);
++ if (cpus_empty(new_map)) {
++ printk ("Unable to map lapic to logical cpu number\n");
++ return -EINVAL;
++ }
++
++ cpu = first_cpu(new_map);
++
++ *pcpu = cpu;
++ return 0;
+ }
+
+ EXPORT_SYMBOL(acpi_map_lsapic);
+
+ int acpi_unmap_lsapic(int cpu)
+ {
+- /* TBD */
+- return -EINVAL;
++ int i;
++
++ for_each_possible_cpu(i) {
++ if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) {
++ x86_acpiid_to_apicid[i] = -1;
++ break;
++ }
++ }
++ x86_cpu_to_apicid[cpu] = -1;
++ cpu_clear(cpu, cpu_present_map);
++ num_processors--;
++
++ return (0);
+ }
+
+ EXPORT_SYMBOL(acpi_unmap_lsapic);
+@@ -582,6 +645,8 @@ static int __init acpi_parse_sbf(unsigne
+ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
+ {
+ struct acpi_table_hpet *hpet_tbl;
++ struct resource *hpet_res;
++ resource_size_t res_start;
+
+ if (!phys || !size)
+ return -EINVAL;
+@@ -597,12 +662,26 @@ static int __init acpi_parse_hpet(unsign
+ "memory.\n");
+ return -1;
+ }
++
++#define HPET_RESOURCE_NAME_SIZE 9
++ hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
++ if (hpet_res) {
++ memset(hpet_res, 0, sizeof(*hpet_res));
++ hpet_res->name = (void *)&hpet_res[1];
++ hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
++ snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE,
++ "HPET %u", hpet_tbl->number);
++ hpet_res->end = (1 * 1024) - 1;
++ }
++
+ #ifdef CONFIG_X86_64
+ vxtime.hpet_address = hpet_tbl->addr.addrl |
+ ((long)hpet_tbl->addr.addrh << 32);
+
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, vxtime.hpet_address);
++
++ res_start = vxtime.hpet_address;
+ #else /* X86 */
+ {
+ extern unsigned long hpet_address;
+@@ -610,9 +689,17 @@ static int __init acpi_parse_hpet(unsign
+ hpet_address = hpet_tbl->addr.addrl;
+ printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
+ hpet_tbl->id, hpet_address);
++
++ res_start = hpet_address;
+ }
+ #endif /* X86 */
+
++ if (hpet_res) {
++ hpet_res->start = res_start;
++ hpet_res->end += res_start;
++ insert_resource(&iomem_resource, hpet_res);
++ }
++
+ return 0;
+ }
+ #else
+@@ -796,7 +883,7 @@ static int __init acpi_parse_madt_ioapic
+ * pretend we got one so we can set the SCI flags.
+ */
+ if (!acpi_sci_override_gsi)
+- acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0);
++ acpi_sci_ioapic_setup(acpi_fadt.sci_int, acpi_fadt.sci_int, 0, 0);
+
+ /* Fill in identity legacy mapings where no override */
+ mp_config_acpi_legacy_irqs();
+@@ -863,8 +950,6 @@ static void __init acpi_process_madt(voi
+ return;
+ }
+
+-extern int acpi_force;
+-
+ #ifdef __i386__
+
+ static int __init disable_acpi_irq(struct dmi_system_id *d)
+@@ -1166,3 +1251,82 @@ int __init acpi_boot_init(void)
+
+ return 0;
+ }
++
++static int __init parse_acpi(char *arg)
++{
++ if (!arg)
++ return -EINVAL;
++
++ /* "acpi=off" disables both ACPI table parsing and interpreter */
++ if (strcmp(arg, "off") == 0) {
++ disable_acpi();
++ }
++ /* acpi=force to over-ride black-list */
++ else if (strcmp(arg, "force") == 0) {
++ acpi_force = 1;
++ acpi_ht = 1;
++ acpi_disabled = 0;
++ }
++ /* acpi=strict disables out-of-spec workarounds */
++ else if (strcmp(arg, "strict") == 0) {
++ acpi_strict = 1;
++ }
++ /* Limit ACPI just to boot-time to enable HT */
++ else if (strcmp(arg, "ht") == 0) {
++ if (!acpi_force)
++ disable_acpi();
++ acpi_ht = 1;
++ }
++ /* "acpi=noirq" disables ACPI interrupt routing */
++ else if (strcmp(arg, "noirq") == 0) {
++ acpi_noirq_set();
++ } else {
++ /* Core will printk when we return error. */
++ return -EINVAL;
++ }
++ return 0;
++}
++early_param("acpi", parse_acpi);
++
++/* FIXME: Using pci= for an ACPI parameter is a travesty. */
++static int __init parse_pci(char *arg)
++{
++ if (arg && strcmp(arg, "noacpi") == 0)
++ acpi_disable_pci();
++ return 0;
++}
++early_param("pci", parse_pci);
++
++#ifdef CONFIG_X86_IO_APIC
++static int __init parse_acpi_skip_timer_override(char *arg)
++{
++ acpi_skip_timer_override = 1;
++ return 0;
++}
++early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override);
++
++static int __init parse_acpi_use_timer_override(char *arg)
++{
++ acpi_use_timer_override = 1;
++ return 0;
++}
++early_param("acpi_use_timer_override", parse_acpi_use_timer_override);
++#endif /* CONFIG_X86_IO_APIC */
++
++static int __init setup_acpi_sci(char *s)
++{
++ if (!s)
++ return -EINVAL;
++ if (!strcmp(s, "edge"))
++ acpi_sci_flags.trigger = 1;
++ else if (!strcmp(s, "level"))
++ acpi_sci_flags.trigger = 3;
++ else if (!strcmp(s, "high"))
++ acpi_sci_flags.polarity = 1;
++ else if (!strcmp(s, "low"))
++ acpi_sci_flags.polarity = 3;
++ else
++ return -EINVAL;
++ return 0;
++}
++early_param("acpi_sci", setup_acpi_sci);
+Index: 10.3-2007-11-26/arch/i386/kernel/apic-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/apic-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/apic-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -54,7 +54,6 @@ static cpumask_t timer_bcast_ipi;
+ /*
+ * Knob to control our willingness to enable the local APIC.
+ */
+-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+
+ /*
+ * Debug level
+@@ -102,7 +101,7 @@ int get_physical_broadcast(void)
+
+ #ifndef CONFIG_XEN
+ #ifndef CONFIG_SMP
+-static void up_apic_timer_interrupt_call(struct pt_regs *regs)
++static void up_apic_timer_interrupt_call(void)
+ {
+ int cpu = smp_processor_id();
+
+@@ -111,11 +110,11 @@ static void up_apic_timer_interrupt_call
+ */
+ per_cpu(irq_stat, cpu).apic_timer_irqs++;
+
+- smp_local_timer_interrupt(regs);
++ smp_local_timer_interrupt();
+ }
+ #endif
+
+-void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
++void smp_send_timer_broadcast_ipi(void)
+ {
+ cpumask_t mask;
+
+@@ -128,7 +127,7 @@ void smp_send_timer_broadcast_ipi(struct
+ * We can directly call the apic timer interrupt handler
+ * in UP case. Minus all irq related functions
+ */
+- up_apic_timer_interrupt_call(regs);
++ up_apic_timer_interrupt_call();
+ #endif
+ }
+ }
+Index: 10.3-2007-11-26/arch/i386/kernel/cpu/common-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/cpu/common-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/cpu/common-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -43,7 +43,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
+
+ extern int disable_pse;
+
+-static void default_init(struct cpuinfo_x86 * c)
++static void __cpuinit default_init(struct cpuinfo_x86 * c)
+ {
+ /* Not much we can do here... */
+ /* Check if at least it has cpuid */
+@@ -56,7 +56,7 @@ static void default_init(struct cpuinfo_
+ }
+ }
+
+-static struct cpu_dev default_cpu = {
++static struct cpu_dev __cpuinitdata default_cpu = {
+ .c_init = default_init,
+ .c_vendor = "Unknown",
+ };
+@@ -191,7 +191,16 @@ static void __cpuinit get_cpu_vendor(str
+
+ static int __init x86_fxsr_setup(char * s)
+ {
++ /* Tell all the other CPU's to not use it... */
+ disable_x86_fxsr = 1;
++
++ /*
++ * ... and clear the bits early in the boot_cpu_data
++ * so that the bootup process doesn't try to do this
++ * either.
++ */
++ clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
++ clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
+ return 1;
+ }
+ __setup("nofxsr", x86_fxsr_setup);
+@@ -272,7 +281,7 @@ static void __init early_cpu_detect(void
+ }
+ }
+
+-void __cpuinit generic_identify(struct cpuinfo_x86 * c)
++static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
+ {
+ u32 tfms, xlvl;
+ int ebx;
+@@ -698,8 +707,7 @@ old_gdt:
+ */
+ atomic_inc(&init_mm.mm_count);
+ current->active_mm = &init_mm;
+- if (current->mm)
+- BUG();
++ BUG_ON(current->mm);
+ enter_lazy_tlb(&init_mm, current);
+
+ load_esp0(t, thread);
+@@ -712,7 +720,7 @@ old_gdt:
+ #endif
+
+ /* Clear %fs and %gs. */
+- asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
++ asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
+
+ /* Clear all 6 debug registers: */
+ set_debugreg(0, 0);
+Index: 10.3-2007-11-26/arch/i386/kernel/crash.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/crash.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/crash.c 2007-10-22 13:53:08.000000000 +0200
+@@ -135,6 +135,8 @@ void machine_crash_shutdown(struct pt_re
+ #if defined(CONFIG_X86_IO_APIC)
+ disable_IO_APIC();
+ #endif
+-#endif /* CONFIG_XEN */
+ crash_save_cpu(regs, safe_smp_processor_id());
++#else
++ crash_save_cpu(regs, smp_processor_id());
++#endif /* CONFIG_XEN */
+ }
+Index: 10.3-2007-11-26/arch/i386/kernel/entry-xen.S
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/entry-xen.S 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/entry-xen.S 2007-10-22 13:53:08.000000000 +0200
+@@ -80,8 +80,12 @@ VM_MASK = 0x00020000
+ NMI_MASK = 0x80000000
+
+ #ifndef CONFIG_XEN
+-#define DISABLE_INTERRUPTS cli
+-#define ENABLE_INTERRUPTS sti
++/* These are replaces for paravirtualization */
++#define DISABLE_INTERRUPTS cli
++#define ENABLE_INTERRUPTS sti
++#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
++#define INTERRUPT_RETURN iret
++#define GET_CR0_INTO_EAX movl %cr0, %eax
+ #else
+ /* Offsets into shared_info_t. */
+ #define evtchn_upcall_pending /* 0 */
+@@ -99,15 +103,29 @@ NMI_MASK = 0x80000000
+
+ #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
+ #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
++#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
+ #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
+ __DISABLE_INTERRUPTS
+ #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
+ __ENABLE_INTERRUPTS
+-#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
++#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
++sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
++ __TEST_PENDING ; \
++ jnz 14f # process more events if necessary... ; \
++ movl ESI(%esp), %esi ; \
++ sysexit ; \
++14: __DISABLE_INTERRUPTS ; \
++ TRACE_IRQS_OFF ; \
++sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
++ push %esp ; \
++ call evtchn_do_upcall ; \
++ add $4,%esp ; \
++ jmp ret_from_intr
++#define INTERRUPT_RETURN iret
+ #endif
+
+ #ifdef CONFIG_PREEMPT
+-#define preempt_stop cli; TRACE_IRQS_OFF
++#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
+ #else
+ #define preempt_stop
+ #define resume_kernel restore_nocheck
+@@ -206,18 +224,21 @@ NMI_MASK = 0x80000000
+
+ #define RING0_INT_FRAME \
+ CFI_STARTPROC simple;\
++ CFI_SIGNAL_FRAME;\
+ CFI_DEF_CFA esp, 3*4;\
+ /*CFI_OFFSET cs, -2*4;*/\
+ CFI_OFFSET eip, -3*4
+
+ #define RING0_EC_FRAME \
+ CFI_STARTPROC simple;\
++ CFI_SIGNAL_FRAME;\
+ CFI_DEF_CFA esp, 4*4;\
+ /*CFI_OFFSET cs, -2*4;*/\
+ CFI_OFFSET eip, -3*4
+
+ #define RING0_PTREGS_FRAME \
+ CFI_STARTPROC simple;\
++ CFI_SIGNAL_FRAME;\
+ CFI_DEF_CFA esp, OLDESP-EBX;\
+ /*CFI_OFFSET cs, CS-OLDESP;*/\
+ CFI_OFFSET eip, EIP-OLDESP;\
+@@ -263,8 +284,9 @@ ret_from_intr:
+ check_userspace:
+ movl EFLAGS(%esp), %eax # mix EFLAGS and CS
+ movb CS(%esp), %al
+- testl $(VM_MASK | 2), %eax
+- jz resume_kernel
++ andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
++ cmpl $USER_RPL, %eax
++ jb resume_kernel # not returning to v8086 or userspace
+ ENTRY(resume_userspace)
+ DISABLE_INTERRUPTS # make sure we don't miss an interrupt
+ # setting need_resched or sigpending
+@@ -277,7 +299,7 @@ ENTRY(resume_userspace)
+
+ #ifdef CONFIG_PREEMPT
+ ENTRY(resume_kernel)
+- cli
++ DISABLE_INTERRUPTS
+ cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
+ jnz restore_nocheck
+ need_resched:
+@@ -297,6 +319,7 @@ need_resched:
+ # sysenter call handler stub
+ ENTRY(sysenter_entry)
+ CFI_STARTPROC simple
++ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA esp, 0
+ CFI_REGISTER esp, ebp
+ movl SYSENTER_stack_esp0(%esp),%esp
+@@ -305,7 +328,7 @@ sysenter_past_esp:
+ * No need to follow this irqs on/off section: the syscall
+ * disabled irqs and here we enable it straight after entry:
+ */
+- sti
++ ENABLE_INTERRUPTS
+ pushl $(__USER_DS)
+ CFI_ADJUST_CFA_OFFSET 4
+ /*CFI_REL_OFFSET ss, 0*/
+@@ -359,26 +382,8 @@ sysenter_past_esp:
+ movl EIP(%esp), %edx
+ movl OLDESP(%esp), %ecx
+ xorl %ebp,%ebp
+-#ifdef CONFIG_XEN
+ TRACE_IRQS_ON
+- __ENABLE_INTERRUPTS
+-sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
+- __TEST_PENDING
+- jnz 14f # process more events if necessary...
+- movl ESI(%esp), %esi
+- sysexit
+-14: __DISABLE_INTERRUPTS
+- TRACE_IRQS_OFF
+-sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
+- push %esp
+- call evtchn_do_upcall
+- add $4,%esp
+- jmp ret_from_intr
+-#else
+- TRACE_IRQS_ON
+- sti
+- sysexit
+-#endif /* !CONFIG_XEN */
++ ENABLE_INTERRUPTS_SYSEXIT
+ CFI_ENDPROC
+
+
+@@ -419,8 +424,8 @@ restore_all:
+ # See comments in process.c:copy_thread() for details.
+ movb OLDSS(%esp), %ah
+ movb CS(%esp), %al
+- andl $(VM_MASK | (4 << 8) | 3), %eax
+- cmpl $((4 << 8) | 3), %eax
++ andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
++ cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
+ CFI_REMEMBER_STATE
+ je ldt_ss # returning to user-space with LDT SS
+ restore_nocheck:
+@@ -442,12 +447,11 @@ restore_nocheck_notrace:
+ RESTORE_REGS
+ addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
+-1: iret
++1: INTERRUPT_RETURN
+ .section .fixup,"ax"
+ iret_exc:
+ #ifndef CONFIG_XEN
+- TRACE_IRQS_ON
+- sti
++ ENABLE_INTERRUPTS
+ #endif
+ pushl $0 # no error code
+ pushl $do_iret_error
+@@ -473,7 +477,7 @@ ldt_ss:
+ * dosemu and wine happy. */
+ subl $8, %esp # reserve space for switch16 pointer
+ CFI_ADJUST_CFA_OFFSET 8
+- cli
++ DISABLE_INTERRUPTS
+ TRACE_IRQS_OFF
+ movl %esp, %eax
+ /* Set up the 16bit stack frame with switch32 pointer on top,
+@@ -483,7 +487,7 @@ ldt_ss:
+ TRACE_IRQS_IRET
+ RESTORE_REGS
+ lss 20+4(%esp), %esp # switch to 16bit stack
+-1: iret
++1: INTERRUPT_RETURN
+ .section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+@@ -499,7 +503,7 @@ scrit: /**** START OF CRITICAL REGION **
+ RESTORE_REGS
+ addl $4, %esp
+ CFI_ADJUST_CFA_OFFSET -4
+-1: iret
++1: INTERRUPT_RETURN
+ .section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+@@ -688,11 +692,9 @@ ENTRY(name) \
+ #define UNWIND_ESPFIX_STACK
+ #endif
+
+-ENTRY(divide_error)
+- RING0_INT_FRAME
+- pushl $0 # no error code
+- CFI_ADJUST_CFA_OFFSET 4
+- pushl $do_divide_error
++KPROBE_ENTRY(page_fault)
++ RING0_EC_FRAME
++ pushl $do_page_fault
+ CFI_ADJUST_CFA_OFFSET 4
+ ALIGN
+ error_code:
+@@ -742,6 +744,7 @@ error_code:
+ call *%edi
+ jmp ret_from_exception
+ CFI_ENDPROC
++KPROBE_END(page_fault)
+
+ #ifdef CONFIG_XEN
+ # A note on the "critical region" in our callback handler.
+@@ -901,7 +904,7 @@ ENTRY(device_not_available)
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+ #ifndef CONFIG_XEN
+- movl %cr0, %eax
++ GET_CR0_INTO_EAX
+ testl $0x4, %eax # EM (math emulation bit)
+ je device_available_emulate
+ pushl $0 # temporary storage for ORIG_EIP
+@@ -936,9 +939,15 @@ device_available_emulate:
+ jne ok; \
+ label: \
+ movl SYSENTER_stack_esp0+offset(%esp),%esp; \
++ CFI_DEF_CFA esp, 0; \
++ CFI_UNDEFINED eip; \
+ pushfl; \
++ CFI_ADJUST_CFA_OFFSET 4; \
+ pushl $__KERNEL_CS; \
+- pushl $sysenter_past_esp
++ CFI_ADJUST_CFA_OFFSET 4; \
++ pushl $sysenter_past_esp; \
++ CFI_ADJUST_CFA_OFFSET 4; \
++ CFI_REL_OFFSET eip, 0
+ #endif /* CONFIG_XEN */
+
+ KPROBE_ENTRY(debug)
+@@ -957,7 +966,8 @@ debug_stack_correct:
+ call do_debug
+ jmp ret_from_exception
+ CFI_ENDPROC
+- .previous .text
++KPROBE_END(debug)
++
+ #ifndef CONFIG_XEN
+ /*
+ * NMI is doubly nasty. It can happen _while_ we're handling
+@@ -967,7 +977,7 @@ debug_stack_correct:
+ * check whether we got an NMI on the debug path where the debug
+ * fault happened on the sysenter path.
+ */
+-ENTRY(nmi)
++KPROBE_ENTRY(nmi)
+ RING0_INT_FRAME
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+@@ -992,6 +1002,7 @@ ENTRY(nmi)
+ cmpl $sysenter_entry,12(%esp)
+ je nmi_debug_stack_check
+ nmi_stack_correct:
++ /* We have a RING0_INT_FRAME here */
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+ SAVE_ALL
+@@ -1002,9 +1013,12 @@ nmi_stack_correct:
+ CFI_ENDPROC
+
+ nmi_stack_fixup:
++ RING0_INT_FRAME
+ FIX_STACK(12,nmi_stack_correct, 1)
+ jmp nmi_stack_correct
++
+ nmi_debug_stack_check:
++ /* We have a RING0_INT_FRAME here */
+ cmpw $__KERNEL_CS,16(%esp)
+ jne nmi_stack_correct
+ cmpl $debug,(%esp)
+@@ -1015,8 +1029,10 @@ nmi_debug_stack_check:
+ jmp nmi_stack_correct
+
+ nmi_16bit_stack:
+- RING0_INT_FRAME
+- /* create the pointer to lss back */
++ /* We have a RING0_INT_FRAME here.
++ *
++ * create the pointer to lss back
++ */
+ pushl %ss
+ CFI_ADJUST_CFA_OFFSET 4
+ pushl %esp
+@@ -1037,14 +1053,14 @@ nmi_16bit_stack:
+ call do_nmi
+ RESTORE_REGS
+ lss 12+4(%esp), %esp # back to 16bit stack
+-1: iret
++1: INTERRUPT_RETURN
+ CFI_ENDPROC
+ .section __ex_table,"a"
+ .align 4
+ .long 1b,iret_exc
+ .previous
+ #else
+-ENTRY(nmi)
++KPROBE_ENTRY(nmi)
+ RING0_INT_FRAME
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET 4
+@@ -1056,6 +1072,7 @@ ENTRY(nmi)
+ jmp restore_all
+ CFI_ENDPROC
+ #endif
++KPROBE_END(nmi)
+
+ KPROBE_ENTRY(int3)
+ RING0_INT_FRAME
+@@ -1067,7 +1084,7 @@ KPROBE_ENTRY(int3)
+ call do_int3
+ jmp ret_from_exception
+ CFI_ENDPROC
+- .previous .text
++KPROBE_END(int3)
+
+ ENTRY(overflow)
+ RING0_INT_FRAME
+@@ -1132,7 +1149,7 @@ KPROBE_ENTRY(general_protection)
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+- .previous .text
++KPROBE_END(general_protection)
+
+ ENTRY(alignment_check)
+ RING0_EC_FRAME
+@@ -1141,13 +1158,14 @@ ENTRY(alignment_check)
+ jmp error_code
+ CFI_ENDPROC
+
+-KPROBE_ENTRY(page_fault)
+- RING0_EC_FRAME
+- pushl $do_page_fault
++ENTRY(divide_error)
++ RING0_INT_FRAME
++ pushl $0 # no error code
++ CFI_ADJUST_CFA_OFFSET 4
++ pushl $do_divide_error
+ CFI_ADJUST_CFA_OFFSET 4
+ jmp error_code
+ CFI_ENDPROC
+- .previous .text
+
+ #ifdef CONFIG_X86_MCE
+ ENTRY(machine_check)
+@@ -1209,6 +1227,19 @@ ENTRY(fixup_4gb_segment)
+ jmp error_code
+ CFI_ENDPROC
+
++ENTRY(kernel_thread_helper)
++ pushl $0 # fake return address for unwinder
++ CFI_STARTPROC
++ movl %edx,%eax
++ push %edx
++ CFI_ADJUST_CFA_OFFSET 4
++ call *%ebx
++ push %eax
++ CFI_ADJUST_CFA_OFFSET 4
++ call do_exit
++ CFI_ENDPROC
++ENDPROC(kernel_thread_helper)
++
+ .section .rodata,"a"
+ .align 4
+ #include "syscall_table.S"
+Index: 10.3-2007-11-26/arch/i386/kernel/head-xen.S
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/head-xen.S 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/head-xen.S 2007-10-22 13:53:08.000000000 +0200
+@@ -62,7 +62,7 @@ ENTRY(startup_32)
+ movl %eax,%gs
+ cld # gcc2 wants the direction flag cleared at all times
+
+- pushl %eax # fake return address
++ pushl $0 # fake return address for unwinder
+ jmp start_kernel
+
+ #define HYPERCALL_PAGE_OFFSET 0x1000
+Index: 10.3-2007-11-26/arch/i386/kernel/io_apic-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/io_apic-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/io_apic-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -31,6 +31,9 @@
+ #include <linux/acpi.h>
+ #include <linux/module.h>
+ #include <linux/sysdev.h>
++#include <linux/pci.h>
++#include <linux/msi.h>
++#include <linux/htirq.h>
+
+ #include <asm/io.h>
+ #include <asm/smp.h>
+@@ -38,13 +41,15 @@
+ #include <asm/timer.h>
+ #include <asm/i8259.h>
+ #include <asm/nmi.h>
++#include <asm/msidef.h>
++#include <asm/hypertransport.h>
+
+ #include <mach_apic.h>
++#include <mach_apicdef.h>
+
+ #include "io_ports.h"
+
+ #ifdef CONFIG_XEN
+-
+ #include <xen/interface/xen.h>
+ #include <xen/interface/physdev.h>
+
+@@ -55,32 +60,7 @@
+
+ unsigned long io_apic_irqs;
+
+-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
+-{
+- struct physdev_apic apic_op;
+- int ret;
+-
+- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+- apic_op.reg = reg;
+- ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
+- if (ret)
+- return ret;
+- return apic_op.value;
+-}
+-
+-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+-{
+- struct physdev_apic apic_op;
+-
+- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+- apic_op.reg = reg;
+- apic_op.value = value;
+- HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
+-}
+-
+-#define io_apic_read(a,r) xen_io_apic_read(a,r)
+-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
+-
++#define clear_IO_APIC() ((void)0)
+ #endif /* CONFIG_XEN */
+
+ int (*ioapic_renumber_irq)(int ioapic, int irq);
+@@ -105,7 +85,7 @@ int sis_apic_bug = -1;
+ */
+ int nr_ioapic_registers[MAX_IO_APICS];
+
+-int disable_timer_pin_1 __initdata;
++static int disable_timer_pin_1 __initdata;
+
+ /*
+ * Rough estimation of how many shared IRQs there are, can
+@@ -125,12 +105,122 @@ static struct irq_pin_list {
+ int apic, pin, next;
+ } irq_2_pin[PIN_MAP_SIZE];
+
+-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+-#ifdef CONFIG_PCI_MSI
+-#define vector_to_irq(vector) \
+- (platform_legacy_irq(vector) ? vector : vector_irq[vector])
++#ifndef CONFIG_XEN
++struct io_apic {
++ unsigned int index;
++ unsigned int unused[3];
++ unsigned int data;
++};
++
++static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
++{
++ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
++ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
++}
++#endif
++
++static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
++{
++#ifndef CONFIG_XEN
++ struct io_apic __iomem *io_apic = io_apic_base(apic);
++ writel(reg, &io_apic->index);
++ return readl(&io_apic->data);
+ #else
+-#define vector_to_irq(vector) (vector)
++ struct physdev_apic apic_op;
++ int ret;
++
++ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
++ apic_op.reg = reg;
++ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
++ if (ret)
++ return ret;
++ return apic_op.value;
++#endif
++}
++
++static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
++{
++#ifndef CONFIG_XEN
++ struct io_apic __iomem *io_apic = io_apic_base(apic);
++ writel(reg, &io_apic->index);
++ writel(value, &io_apic->data);
++#else
++ struct physdev_apic apic_op;
++
++ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
++ apic_op.reg = reg;
++ apic_op.value = value;
++ HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
++#endif
++}
++
++#ifndef CONFIG_XEN
++/*
++ * Re-write a value: to be used for read-modify-write
++ * cycles where the read already set up the index register.
++ *
++ * Older SiS APIC requires we rewrite the index register
++ */
++static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
++{
++ volatile struct io_apic *io_apic = io_apic_base(apic);
++ if (sis_apic_bug)
++ writel(reg, &io_apic->index);
++ writel(value, &io_apic->data);
++}
++#else
++#define io_apic_modify io_apic_write
++#endif
++
++union entry_union {
++ struct { u32 w1, w2; };
++ struct IO_APIC_route_entry entry;
++};
++
++static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
++{
++ union entry_union eu;
++ unsigned long flags;
++ spin_lock_irqsave(&ioapic_lock, flags);
++ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
++ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
++ spin_unlock_irqrestore(&ioapic_lock, flags);
++ return eu.entry;
++}
++
++/*
++ * When we write a new IO APIC routing entry, we need to write the high
++ * word first! If the mask bit in the low word is clear, we will enable
++ * the interrupt, and we need to make sure the entry is fully populated
++ * before that happens.
++ */
++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
++{
++ unsigned long flags;
++ union entry_union eu;
++ eu.entry = e;
++ spin_lock_irqsave(&ioapic_lock, flags);
++ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++ spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++#ifndef CONFIG_XEN
++/*
++ * When we mask an IO APIC routing entry, we need to write the low
++ * word first, in order to set the mask bit before we change the
++ * high bits!
++ */
++static void ioapic_mask_entry(int apic, int pin)
++{
++ unsigned long flags;
++ union entry_union eu = { .entry.mask = 1 };
++
++ spin_lock_irqsave(&ioapic_lock, flags);
++ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++ spin_unlock_irqrestore(&ioapic_lock, flags);
++}
+ #endif
+
+ /*
+@@ -156,9 +246,7 @@ static void add_pin_to_irq(unsigned int
+ entry->pin = pin;
+ }
+
+-#ifdef CONFIG_XEN
+-#define clear_IO_APIC() ((void)0)
+-#else
++#ifndef CONFIG_XEN
+ /*
+ * Reroute an IRQ to a different pin.
+ */
+@@ -243,25 +331,16 @@ static void unmask_IO_APIC_irq (unsigned
+ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+ {
+ struct IO_APIC_route_entry entry;
+- unsigned long flags;
+
+ /* Check delivery_mode to be sure we're not clearing an SMI pin */
+- spin_lock_irqsave(&ioapic_lock, flags);
+- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ entry = ioapic_read_entry(apic, pin);
+ if (entry.delivery_mode == dest_SMI)
+ return;
+
+ /*
+ * Disable it in the IO-APIC irq-routing table:
+ */
+- memset(&entry, 0, sizeof(entry));
+- entry.mask = 1;
+- spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ ioapic_mask_entry(apic, pin);
+ }
+
+ static void clear_IO_APIC (void)
+@@ -301,7 +380,7 @@ static void set_ioapic_affinity_irq(unsi
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+- set_irq_info(irq, cpumask);
++ set_native_irq_info(irq, cpumask);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+
+@@ -1207,40 +1286,40 @@ static inline int IO_APIC_irq_trigger(in
+ /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+ u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
+
+-int assign_irq_vector(int irq)
++static int __assign_irq_vector(int irq)
+ {
+- unsigned long flags;
+ int vector;
+ struct physdev_irq irq_op;
+
+- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
++ BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
+
+- spin_lock_irqsave(&vector_lock, flags);
+-
+- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+- spin_unlock_irqrestore(&vector_lock, flags);
+- return IO_APIC_VECTOR(irq);
+- }
++ if (irq_vector[irq] > 0)
++ return irq_vector[irq];
+
+ irq_op.irq = irq;
+- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+- spin_unlock_irqrestore(&vector_lock, flags);
++ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
+ return -ENOSPC;
+- }
+
+ vector = irq_op.vector;
+- vector_irq[vector] = irq;
+- if (irq != AUTO_ASSIGN)
+- IO_APIC_VECTOR(irq) = vector;
++ irq_vector[irq] = vector;
++
++ return vector;
++}
++
++static int assign_irq_vector(int irq)
++{
++ unsigned long flags;
++ int vector;
+
++ spin_lock_irqsave(&vector_lock, flags);
++ vector = __assign_irq_vector(irq);
+ spin_unlock_irqrestore(&vector_lock, flags);
+
+ return vector;
+ }
+
+ #ifndef CONFIG_XEN
+-static struct hw_interrupt_type ioapic_level_type;
+-static struct hw_interrupt_type ioapic_edge_type;
++static struct irq_chip ioapic_chip;
+
+ #define IOAPIC_AUTO -1
+ #define IOAPIC_EDGE 0
+@@ -1248,16 +1327,16 @@ static struct hw_interrupt_type ioapic_e
+
+ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+ {
+- unsigned idx;
+-
+- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+-
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+- irq_desc[idx].chip = &ioapic_level_type;
+- else
+- irq_desc[idx].chip = &ioapic_edge_type;
+- set_intr_gate(vector, interrupt[idx]);
++ set_irq_chip_and_handler_name(irq, &ioapic_chip,
++ handle_fasteoi_irq, "fasteoi");
++ else {
++ irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
++ set_irq_chip_and_handler_name(irq, &ioapic_chip,
++ handle_edge_irq, "edge");
++ }
++ set_intr_gate(vector, interrupt[irq]);
+ }
+ #else
+ #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
+@@ -1328,9 +1407,8 @@ static void __init setup_IO_APIC_irqs(vo
+ if (!apic && (irq < 16))
+ disable_8259A_irq(irq);
+ }
++ ioapic_write_entry(apic, pin, entry);
+ spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ set_native_irq_info(irq, TARGET_CPUS);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+@@ -1347,7 +1425,6 @@ static void __init setup_IO_APIC_irqs(vo
+ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
+ {
+ struct IO_APIC_route_entry entry;
+- unsigned long flags;
+
+ memset(&entry,0,sizeof(entry));
+
+@@ -1372,15 +1449,13 @@ static void __init setup_ExtINT_IRQ0_pin
+ * The timer IRQ doesn't have to know that behind the
+ * scene we have a 8259A-master in AEOI mode ...
+ */
+- irq_desc[0].chip = &ioapic_edge_type;
++ irq_desc[0].chip = &ioapic_chip;
++ set_irq_handler(0, handle_edge_irq);
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+- spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ ioapic_write_entry(apic, pin, entry);
+
+ enable_8259A_irq(0);
+ }
+@@ -1490,10 +1565,7 @@ void __init print_IO_APIC(void)
+ for (i = 0; i <= reg_01.bits.entries; i++) {
+ struct IO_APIC_route_entry entry;
+
+- spin_lock_irqsave(&ioapic_lock, flags);
+- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ entry = ioapic_read_entry(apic, i);
+
+ printk(KERN_DEBUG " %02x %03X %02X ",
+ i,
+@@ -1513,17 +1585,12 @@ void __init print_IO_APIC(void)
+ );
+ }
+ }
+- if (use_pci_vector())
+- printk(KERN_INFO "Using vector-based indexing\n");
+ printk(KERN_DEBUG "IRQ to pin mappings:\n");
+ for (i = 0; i < NR_IRQS; i++) {
+ struct irq_pin_list *entry = irq_2_pin + i;
+ if (entry->pin < 0)
+ continue;
+- if (use_pci_vector() && !platform_legacy_irq(i))
+- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+- else
+- printk(KERN_DEBUG "IRQ%d ", i);
++ printk(KERN_DEBUG "IRQ%d ", i);
+ for (;;) {
+ printk("-> %d:%d", entry->apic, entry->pin);
+ if (!entry->next)
+@@ -1716,10 +1783,7 @@ static void __init enable_IO_APIC(void)
+ /* See if any of the pins is in ExtINT mode */
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ struct IO_APIC_route_entry entry;
+- spin_lock_irqsave(&ioapic_lock, flags);
+- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ entry = ioapic_read_entry(apic, pin);
+
+
+ /* If the interrupt line is enabled and in ExtInt mode
+@@ -1777,7 +1841,6 @@ void disable_IO_APIC(void)
+ */
+ if (ioapic_i8259.pin != -1) {
+ struct IO_APIC_route_entry entry;
+- unsigned long flags;
+
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 0; /* Enabled */
+@@ -1794,12 +1857,7 @@ void disable_IO_APIC(void)
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+- spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+- *(((int *)&entry)+1));
+- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+- *(((int *)&entry)+0));
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+ }
+ disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+ #endif
+@@ -1966,6 +2024,8 @@ static int __init timer_irq_works(void)
+ */
+
+ /*
++ * Startup quirk:
++ *
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+@@ -1973,8 +2033,10 @@ static int __init timer_irq_works(void)
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
++ *
++ * (We do this for level-triggered IRQs too - it cannot hurt.)
+ */
+-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
++static unsigned int startup_ioapic_irq(unsigned int irq)
+ {
+ int was_pending = 0;
+ unsigned long flags;
+@@ -1991,47 +2053,18 @@ static unsigned int startup_edge_ioapic_
+ return was_pending;
+ }
+
+-/*
+- * Once we have recorded IRQ_PENDING already, we can mask the
+- * interrupt for real. This prevents IRQ storms from unhandled
+- * devices.
+- */
+-static void ack_edge_ioapic_irq(unsigned int irq)
+-{
+- move_irq(irq);
+- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+- == (IRQ_PENDING | IRQ_DISABLED))
+- mask_IO_APIC_irq(irq);
+- ack_APIC_irq();
+-}
+-
+-/*
+- * Level triggered interrupts can just be masked,
+- * and shutting down and starting up the interrupt
+- * is the same as enabling and disabling them -- except
+- * with a startup need to return a "was pending" value.
+- *
+- * Level triggered interrupts are special because we
+- * do not touch any IO-APIC register while handling
+- * them. We ack the APIC in the end-IRQ handler, not
+- * in the start-IRQ-handler. Protection against reentrance
+- * from the same interrupt is still provided, both by the
+- * generic IRQ layer and by the fact that an unacked local
+- * APIC does not accept IRQs.
+- */
+-static unsigned int startup_level_ioapic_irq (unsigned int irq)
++static void ack_ioapic_irq(unsigned int irq)
+ {
+- unmask_IO_APIC_irq(irq);
+-
+- return 0; /* don't check for pending */
++ move_native_irq(irq);
++ ack_APIC_irq();
+ }
+
+-static void end_level_ioapic_irq (unsigned int irq)
++static void ack_ioapic_quirk_irq(unsigned int irq)
+ {
+ unsigned long v;
+ int i;
+
+- move_irq(irq);
++ move_native_irq(irq);
+ /*
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+@@ -2051,7 +2084,7 @@ static void end_level_ioapic_irq (unsign
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul. --macro
+ */
+- i = IO_APIC_VECTOR(irq);
++ i = irq_vector[irq];
+
+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+@@ -2066,104 +2099,24 @@ static void end_level_ioapic_irq (unsign
+ }
+ }
+
+-#ifdef CONFIG_PCI_MSI
+-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- return startup_edge_ioapic_irq(irq);
+-}
+-
+-static void ack_edge_ioapic_vector(unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- move_native_irq(vector);
+- ack_edge_ioapic_irq(irq);
+-}
+-
+-static unsigned int startup_level_ioapic_vector (unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- return startup_level_ioapic_irq (irq);
+-}
+-
+-static void end_level_ioapic_vector (unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- move_native_irq(vector);
+- end_level_ioapic_irq(irq);
+-}
+-
+-static void mask_IO_APIC_vector (unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- mask_IO_APIC_irq(irq);
+-}
+-
+-static void unmask_IO_APIC_vector (unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- unmask_IO_APIC_irq(irq);
+-}
+-
+-#ifdef CONFIG_SMP
+-static void set_ioapic_affinity_vector (unsigned int vector,
+- cpumask_t cpu_mask)
++static int ioapic_retrigger_irq(unsigned int irq)
+ {
+- int irq = vector_to_irq(vector);
+-
+- set_native_irq_info(vector, cpu_mask);
+- set_ioapic_affinity_irq(irq, cpu_mask);
+-}
+-#endif
+-#endif
+-
+-static int ioapic_retrigger(unsigned int irq)
+-{
+- send_IPI_self(IO_APIC_VECTOR(irq));
++ send_IPI_self(irq_vector[irq]);
+
+ return 1;
+ }
+
+-/*
+- * Level and edge triggered IO-APIC interrupts need different handling,
+- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+- * handled with the level-triggered descriptor, but that one has slightly
+- * more overhead. Level-triggered interrupts cannot be handled with the
+- * edge-triggered handler, without risking IRQ storms and other ugly
+- * races.
+- */
+-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
+- .typename = "IO-APIC-edge",
+- .startup = startup_edge_ioapic,
+- .shutdown = shutdown_edge_ioapic,
+- .enable = enable_edge_ioapic,
+- .disable = disable_edge_ioapic,
+- .ack = ack_edge_ioapic,
+- .end = end_edge_ioapic,
++static struct irq_chip ioapic_chip __read_mostly = {
++ .name = "IO-APIC",
++ .startup = startup_ioapic_irq,
++ .mask = mask_IO_APIC_irq,
++ .unmask = unmask_IO_APIC_irq,
++ .ack = ack_ioapic_irq,
++ .eoi = ack_ioapic_quirk_irq,
+ #ifdef CONFIG_SMP
+- .set_affinity = set_ioapic_affinity,
++ .set_affinity = set_ioapic_affinity_irq,
+ #endif
+- .retrigger = ioapic_retrigger,
+-};
+-
+-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
+- .typename = "IO-APIC-level",
+- .startup = startup_level_ioapic,
+- .shutdown = shutdown_level_ioapic,
+- .enable = enable_level_ioapic,
+- .disable = disable_level_ioapic,
+- .ack = mask_and_ack_level_ioapic,
+- .end = end_level_ioapic,
+-#ifdef CONFIG_SMP
+- .set_affinity = set_ioapic_affinity,
+-#endif
+- .retrigger = ioapic_retrigger,
++ .retrigger = ioapic_retrigger_irq,
+ };
+ #endif /* !CONFIG_XEN */
+
+@@ -2184,12 +2137,7 @@ static inline void init_IO_APIC_traps(vo
+ */
+ for (irq = 0; irq < NR_IRQS ; irq++) {
+ int tmp = irq;
+- if (use_pci_vector()) {
+- if (!platform_legacy_irq(tmp))
+- if ((tmp = vector_to_irq(tmp)) == -1)
+- continue;
+- }
+- if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
++ if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
+ /*
+ * Hmm.. We don't have an entry for this,
+ * so default to an old-fashioned 8259
+@@ -2200,22 +2148,23 @@ static inline void init_IO_APIC_traps(vo
+ #ifndef CONFIG_XEN
+ else
+ /* Strange. Oh, well.. */
+- irq_desc[irq].chip = &no_irq_type;
++ irq_desc[irq].chip = &no_irq_chip;
+ #endif
+ }
+ }
+ }
+
+ #ifndef CONFIG_XEN
+-static void enable_lapic_irq (unsigned int irq)
+-{
+- unsigned long v;
++/*
++ * The local APIC irq-chip implementation:
++ */
+
+- v = apic_read(APIC_LVT0);
+- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
++static void ack_apic(unsigned int irq)
++{
++ ack_APIC_irq();
+ }
+
+-static void disable_lapic_irq (unsigned int irq)
++static void mask_lapic_irq (unsigned int irq)
+ {
+ unsigned long v;
+
+@@ -2223,21 +2172,19 @@ static void disable_lapic_irq (unsigned
+ apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
+ }
+
+-static void ack_lapic_irq (unsigned int irq)
++static void unmask_lapic_irq (unsigned int irq)
+ {
+- ack_APIC_irq();
+-}
++ unsigned long v;
+
+-static void end_lapic_irq (unsigned int i) { /* nothing */ }
++ v = apic_read(APIC_LVT0);
++ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
++}
+
+-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
+- .typename = "local-APIC-edge",
+- .startup = NULL, /* startup_irq() not used for IRQ0 */
+- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
+- .enable = enable_lapic_irq,
+- .disable = disable_lapic_irq,
+- .ack = ack_lapic_irq,
+- .end = end_lapic_irq
++static struct irq_chip lapic_chip __read_mostly = {
++ .name = "local-APIC-edge",
++ .mask = mask_lapic_irq,
++ .unmask = unmask_lapic_irq,
++ .eoi = ack_apic,
+ };
+
+ static void setup_nmi (void)
+@@ -2270,17 +2217,13 @@ static inline void unlock_ExtINT_logic(v
+ int apic, pin, i;
+ struct IO_APIC_route_entry entry0, entry1;
+ unsigned char save_control, save_freq_select;
+- unsigned long flags;
+
+ pin = find_isa_irq_pin(8, mp_INT);
+ apic = find_isa_irq_apic(8, mp_INT);
+ if (pin == -1)
+ return;
+
+- spin_lock_irqsave(&ioapic_lock, flags);
+- *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+- *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ entry0 = ioapic_read_entry(apic, pin);
+ clear_IO_APIC_pin(apic, pin);
+
+ memset(&entry1, 0, sizeof(entry1));
+@@ -2293,10 +2236,7 @@ static inline void unlock_ExtINT_logic(v
+ entry1.trigger = 0;
+ entry1.vector = 0;
+
+- spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ ioapic_write_entry(apic, pin, entry1);
+
+ save_control = CMOS_READ(RTC_CONTROL);
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+@@ -2315,10 +2255,7 @@ static inline void unlock_ExtINT_logic(v
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+ clear_IO_APIC_pin(apic, pin);
+
+- spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ ioapic_write_entry(apic, pin, entry0);
+ }
+
+ int timer_uses_ioapic_pin_0;
+@@ -2418,7 +2355,8 @@ static inline void check_timer(void)
+ printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
+
+ disable_8259A_irq(0);
+- irq_desc[0].chip = &lapic_irq_type;
++ set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
++ "fasteio");
+ apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
+ enable_8259A_irq(0);
+
+@@ -2530,17 +2468,12 @@ static int ioapic_suspend(struct sys_dev
+ {
+ struct IO_APIC_route_entry *entry;
+ struct sysfs_ioapic_data *data;
+- unsigned long flags;
+ int i;
+
+ data = container_of(dev, struct sysfs_ioapic_data, dev);
+ entry = data->entry;
+- spin_lock_irqsave(&ioapic_lock, flags);
+- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+- }
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
++ entry[i] = ioapic_read_entry(dev->id, i);
+
+ return 0;
+ }
+@@ -2562,11 +2495,9 @@ static int ioapic_resume(struct sys_devi
+ reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+ io_apic_write(dev->id, 0, reg_00.raw);
+ }
+- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+- }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
++ ioapic_write_entry(dev->id, i, entry[i]);
+
+ return 0;
+ }
+@@ -2612,6 +2543,242 @@ static int __init ioapic_init_sysfs(void
+
+ device_initcall(ioapic_init_sysfs);
+
++#ifndef CONFIG_XEN
++/*
++ * Dynamic irq allocate and deallocation
++ */
++int create_irq(void)
++{
++ /* Allocate an unused irq */
++ int irq, new, vector;
++ unsigned long flags;
++
++ irq = -ENOSPC;
++ spin_lock_irqsave(&vector_lock, flags);
++ for (new = (NR_IRQS - 1); new >= 0; new--) {
++ if (platform_legacy_irq(new))
++ continue;
++ if (irq_vector[new] != 0)
++ continue;
++ vector = __assign_irq_vector(new);
++ if (likely(vector > 0))
++ irq = new;
++ break;
++ }
++ spin_unlock_irqrestore(&vector_lock, flags);
++
++ if (irq >= 0) {
++#ifndef CONFIG_XEN
++ set_intr_gate(vector, interrupt[irq]);
++#endif
++ dynamic_irq_init(irq);
++ }
++ return irq;
++}
++
++void destroy_irq(unsigned int irq)
++{
++ unsigned long flags;
++
++ dynamic_irq_cleanup(irq);
++
++ spin_lock_irqsave(&vector_lock, flags);
++ irq_vector[irq] = 0;
++ spin_unlock_irqrestore(&vector_lock, flags);
++}
++#endif
++
++/*
++ * MSI mesage composition
++ */
++#ifdef CONFIG_PCI_MSI
++static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
++{
++ int vector;
++ unsigned dest;
++
++ vector = assign_irq_vector(irq);
++ if (vector >= 0) {
++ dest = cpu_mask_to_apicid(TARGET_CPUS);
++
++ msg->address_hi = MSI_ADDR_BASE_HI;
++ msg->address_lo =
++ MSI_ADDR_BASE_LO |
++ ((INT_DEST_MODE == 0) ?
++ MSI_ADDR_DEST_MODE_PHYSICAL:
++ MSI_ADDR_DEST_MODE_LOGICAL) |
++ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++ MSI_ADDR_REDIRECTION_CPU:
++ MSI_ADDR_REDIRECTION_LOWPRI) |
++ MSI_ADDR_DEST_ID(dest);
++
++ msg->data =
++ MSI_DATA_TRIGGER_EDGE |
++ MSI_DATA_LEVEL_ASSERT |
++ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++ MSI_DATA_DELIVERY_FIXED:
++ MSI_DATA_DELIVERY_LOWPRI) |
++ MSI_DATA_VECTOR(vector);
++ }
++ return vector;
++}
++
++#ifdef CONFIG_SMP
++static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
++{
++ struct msi_msg msg;
++ unsigned int dest;
++ cpumask_t tmp;
++ int vector;
++
++ cpus_and(tmp, mask, cpu_online_map);
++ if (cpus_empty(tmp))
++ tmp = TARGET_CPUS;
++
++ vector = assign_irq_vector(irq);
++ if (vector < 0)
++ return;
++
++ dest = cpu_mask_to_apicid(mask);
++
++ read_msi_msg(irq, &msg);
++
++ msg.data &= ~MSI_DATA_VECTOR_MASK;
++ msg.data |= MSI_DATA_VECTOR(vector);
++ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
++ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
++
++ write_msi_msg(irq, &msg);
++ set_native_irq_info(irq, mask);
++}
++#endif /* CONFIG_SMP */
++
++/*
++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
++ * which implement the MSI or MSI-X Capability Structure.
++ */
++static struct irq_chip msi_chip = {
++ .name = "PCI-MSI",
++ .unmask = unmask_msi_irq,
++ .mask = mask_msi_irq,
++ .ack = ack_ioapic_irq,
++#ifdef CONFIG_SMP
++ .set_affinity = set_msi_irq_affinity,
++#endif
++ .retrigger = ioapic_retrigger_irq,
++};
++
++int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
++{
++ struct msi_msg msg;
++ int ret;
++ ret = msi_compose_msg(dev, irq, &msg);
++ if (ret < 0)
++ return ret;
++
++ write_msi_msg(irq, &msg);
++
++ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
++ "edge");
++
++ return 0;
++}
++
++void arch_teardown_msi_irq(unsigned int irq)
++{
++ return;
++}
++
++#endif /* CONFIG_PCI_MSI */
++
++/*
++ * Hypertransport interrupt support
++ */
++#ifdef CONFIG_HT_IRQ
++
++#ifdef CONFIG_SMP
++
++static void target_ht_irq(unsigned int irq, unsigned int dest)
++{
++ struct ht_irq_msg msg;
++ fetch_ht_irq_msg(irq, &msg);
++
++ msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
++ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
++
++ msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
++ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
++
++ write_ht_irq_msg(irq, &msg);
++}
++
++static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
++{
++ unsigned int dest;
++ cpumask_t tmp;
++
++ cpus_and(tmp, mask, cpu_online_map);
++ if (cpus_empty(tmp))
++ tmp = TARGET_CPUS;
++
++ cpus_and(mask, tmp, CPU_MASK_ALL);
++
++ dest = cpu_mask_to_apicid(mask);
++
++ target_ht_irq(irq, dest);
++ set_native_irq_info(irq, mask);
++}
++#endif
++
++static struct irq_chip ht_irq_chip = {
++ .name = "PCI-HT",
++ .mask = mask_ht_irq,
++ .unmask = unmask_ht_irq,
++ .ack = ack_ioapic_irq,
++#ifdef CONFIG_SMP
++ .set_affinity = set_ht_irq_affinity,
++#endif
++ .retrigger = ioapic_retrigger_irq,
++};
++
++int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
++{
++ int vector;
++
++ vector = assign_irq_vector(irq);
++ if (vector >= 0) {
++ struct ht_irq_msg msg;
++ unsigned dest;
++ cpumask_t tmp;
++
++ cpus_clear(tmp);
++ cpu_set(vector >> 8, tmp);
++ dest = cpu_mask_to_apicid(tmp);
++
++ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
++
++ msg.address_lo =
++ HT_IRQ_LOW_BASE |
++ HT_IRQ_LOW_DEST_ID(dest) |
++ HT_IRQ_LOW_VECTOR(vector) |
++ ((INT_DEST_MODE == 0) ?
++ HT_IRQ_LOW_DM_PHYSICAL :
++ HT_IRQ_LOW_DM_LOGICAL) |
++ HT_IRQ_LOW_RQEOI_EDGE |
++ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++ HT_IRQ_LOW_MT_FIXED :
++ HT_IRQ_LOW_MT_ARBITRATED) |
++ HT_IRQ_LOW_IRQ_MASKED;
++
++ write_ht_irq_msg(irq, &msg);
++
++ set_irq_chip_and_handler_name(irq, &ht_irq_chip,
++ handle_edge_irq, "edge");
++ }
++ return vector;
++}
++#endif /* CONFIG_HT_IRQ */
++
+ /* --------------------------------------------------------------------------
+ ACPI-based IOAPIC Configuration
+ -------------------------------------------------------------------------- */
+@@ -2765,13 +2932,34 @@ int io_apic_set_pci_routing (int ioapic,
+ if (!ioapic && (irq < 16))
+ disable_8259A_irq(irq);
+
++ ioapic_write_entry(ioapic, pin, entry);
+ spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
++ set_native_irq_info(irq, TARGET_CPUS);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+ }
+
+ #endif /* CONFIG_ACPI */
++
++static int __init parse_disable_timer_pin_1(char *arg)
++{
++ disable_timer_pin_1 = 1;
++ return 0;
++}
++early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
++
++static int __init parse_enable_timer_pin_1(char *arg)
++{
++ disable_timer_pin_1 = -1;
++ return 0;
++}
++early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
++
++static int __init parse_noapic(char *arg)
++{
++ /* disable IO-APIC */
++ disable_ioapic_setup();
++ return 0;
++}
++early_param("noapic", parse_noapic);
+Index: 10.3-2007-11-26/arch/i386/kernel/irq-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/irq-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/irq-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -53,8 +53,10 @@ static union irq_ctx *softirq_ctx[NR_CPU
+ */
+ fastcall unsigned int do_IRQ(struct pt_regs *regs)
+ {
++ struct pt_regs *old_regs;
+ /* high bit used in ret_from_ code */
+ int irq = ~regs->orig_eax;
++ struct irq_desc *desc = irq_desc + irq;
+ #ifdef CONFIG_4KSTACKS
+ union irq_ctx *curctx, *irqctx;
+ u32 *isp;
+@@ -66,6 +68,7 @@ fastcall unsigned int do_IRQ(struct pt_r
+ BUG();
+ }
+
++ old_regs = set_irq_regs(regs);
+ irq_enter();
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /* Debugging check for stack overflow: is there less than 1KB free? */
+@@ -110,19 +113,20 @@ fastcall unsigned int do_IRQ(struct pt_r
+ (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+
+ asm volatile(
+- " xchgl %%ebx,%%esp \n"
+- " call __do_IRQ \n"
++ " xchgl %%ebx,%%esp \n"
++ " call *%%edi \n"
+ " movl %%ebx,%%esp \n"
+ : "=a" (arg1), "=d" (arg2), "=b" (ebx)
+- : "0" (irq), "1" (regs), "2" (isp)
+- : "memory", "cc", "ecx"
++ : "0" (irq), "1" (desc), "2" (isp),
++ "D" (desc->handle_irq)
++ : "memory", "cc"
+ );
+ } else
+ #endif
+- __do_IRQ(irq, regs);
++ desc->handle_irq(irq, desc);
+
+ irq_exit();
+-
++ set_irq_regs(old_regs);
+ return 1;
+ }
+
+@@ -253,7 +257,8 @@ int show_interrupts(struct seq_file *p,
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ #endif
+- seq_printf(p, " %14s", irq_desc[i].chip->typename);
++ seq_printf(p, " %8s", irq_desc[i].chip->name);
++ seq_printf(p, "-%-8s", irq_desc[i].name);
+ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+Index: 10.3-2007-11-26/arch/i386/kernel/ldt-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/ldt-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/ldt-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -1,5 +1,5 @@
+ /*
+- * linux/kernel/ldt.c
++ * linux/arch/i386/kernel/ldt.c
+ *
+ * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+Index: 10.3-2007-11-26/arch/i386/kernel/machine_kexec.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/machine_kexec.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/machine_kexec.c 2007-10-22 13:53:08.000000000 +0200
+@@ -137,6 +137,7 @@ NORET_TYPE void machine_kexec(struct kim
+ */
+ static int __init parse_crashkernel(char *arg)
+ {
++#ifndef CONFIG_XEN
+ unsigned long size, base;
+ size = memparse(arg, &arg);
+ if (*arg == '@') {
+@@ -147,6 +148,10 @@ static int __init parse_crashkernel(char
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
++#else
++ printk("Ignoring crashkernel command line, "
++ "parameter will be supplied by xen\n");
++#endif
+ return 0;
+ }
+ early_param("crashkernel", parse_crashkernel);
+Index: 10.3-2007-11-26/arch/i386/kernel/microcode-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/microcode-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/microcode-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -2,6 +2,7 @@
+ * Intel CPU Microcode Update Driver for Linux
+ *
+ * Copyright (C) 2000-2004 Tigran Aivazian
++ * 2006 Shaohua Li <shaohua.li@intel.com>
+ *
+ * This driver allows to upgrade microcode on Intel processors
+ * belonging to IA-32 family - PentiumPro, Pentium II,
+@@ -33,7 +34,9 @@
+ #include <linux/spinlock.h>
+ #include <linux/mm.h>
+ #include <linux/mutex.h>
+-#include <linux/syscalls.h>
++#include <linux/cpu.h>
++#include <linux/firmware.h>
++#include <linux/platform_device.h>
+
+ #include <asm/msr.h>
+ #include <asm/uaccess.h>
+@@ -55,12 +58,7 @@ module_param(verbose, int, 0644);
+ /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+ static DEFINE_MUTEX(microcode_mutex);
+
+-static int microcode_open (struct inode *unused1, struct file *unused2)
+-{
+- return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
+-}
+-
+-
++#ifdef CONFIG_MICROCODE_OLD_INTERFACE
+ static int do_microcode_update (const void __user *ubuf, size_t len)
+ {
+ int err;
+@@ -85,6 +83,11 @@ static int do_microcode_update (const vo
+ return err;
+ }
+
++static int microcode_open (struct inode *unused1, struct file *unused2)
++{
++ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
++}
++
+ static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
+ {
+ ssize_t ret;
+@@ -117,7 +120,7 @@ static struct miscdevice microcode_dev =
+ .fops = &microcode_fops,
+ };
+
+-static int __init microcode_init (void)
++static int __init microcode_dev_init (void)
+ {
+ int error;
+
+@@ -129,6 +132,68 @@ static int __init microcode_init (void)
+ return error;
+ }
+
++ return 0;
++}
++
++static void __exit microcode_dev_exit (void)
++{
++ misc_deregister(&microcode_dev);
++}
++
++MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
++#else
++#define microcode_dev_init() 0
++#define microcode_dev_exit() do { } while(0)
++#endif
++
++/* fake device for request_firmware */
++static struct platform_device *microcode_pdev;
++
++static int request_microcode(void)
++{
++ char name[30];
++ const struct cpuinfo_x86 *c = &boot_cpu_data;
++ const struct firmware *firmware;
++ int error;
++ struct xen_platform_op op;
++
++ sprintf(name,"intel-ucode/%02x-%02x-%02x",
++ c->x86, c->x86_model, c->x86_mask);
++ error = request_firmware(&firmware, name, &microcode_pdev->dev);
++ if (error) {
++ pr_debug("ucode data file %s load failed\n", name);
++ return error;
++ }
++
++ op.cmd = XENPF_microcode_update;
++ set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data);
++ op.u.microcode.length = firmware->size;
++ error = HYPERVISOR_platform_op(&op);
++
++ release_firmware(firmware);
++
++ if (error)
++ pr_debug("ucode load failed\n");
++
++ return error;
++}
++
++static int __init microcode_init (void)
++{
++ int error;
++
++ error = microcode_dev_init();
++ if (error)
++ return error;
++ microcode_pdev = platform_device_register_simple("microcode", -1,
++ NULL, 0);
++ if (IS_ERR(microcode_pdev)) {
++ microcode_dev_exit();
++ return PTR_ERR(microcode_pdev);
++ }
++
++ request_microcode();
++
+ printk(KERN_INFO
+ "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
+ return 0;
+@@ -136,9 +201,9 @@ static int __init microcode_init (void)
+
+ static void __exit microcode_exit (void)
+ {
+- misc_deregister(&microcode_dev);
++ microcode_dev_exit();
++ platform_device_unregister(microcode_pdev);
+ }
+
+ module_init(microcode_init)
+ module_exit(microcode_exit)
+-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
+Index: 10.3-2007-11-26/arch/i386/kernel/mpparse-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/mpparse-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/mpparse-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -30,6 +30,7 @@
+ #include <asm/io_apic.h>
+
+ #include <mach_apic.h>
++#include <mach_apicdef.h>
+ #include <mach_mpparse.h>
+ #include <bios_ebda.h>
+
+@@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
+ /* Processor that is doing the boot up */
+ unsigned int boot_cpu_physical_apicid = -1U;
+ /* Internal processor count */
+-static unsigned int __devinitdata num_processors;
++unsigned int __cpuinitdata num_processors;
+
+ /* Bitmask of physically existing CPUs */
+ physid_mask_t phys_cpu_present_map;
+@@ -235,12 +236,14 @@ static void __init MP_bus_info (struct m
+
+ mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+
++#if MAX_MP_BUSSES < 256
+ if (m->mpc_busid >= MAX_MP_BUSSES) {
+ printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
+ " is too large, max. supported is %d\n",
+ m->mpc_busid, str, MAX_MP_BUSSES - 1);
+ return;
+ }
++#endif
+
+ if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+ mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+@@ -300,19 +303,6 @@ static void __init MP_lintsrc_info (stru
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+- /*
+- * Well it seems all SMP boards in existence
+- * use ExtINT/LVT1 == LINT0 and
+- * NMI/LVT2 == LINT1 - the following check
+- * will show us if this assumptions is false.
+- * Until then we do not have to add baggage.
+- */
+- if ((m->mpc_irqtype == mp_ExtINT) &&
+- (m->mpc_destapiclint != 0))
+- BUG();
+- if ((m->mpc_irqtype == mp_NMI) &&
+- (m->mpc_destapiclint != 1))
+- BUG();
+ }
+
+ #ifdef CONFIG_X86_NUMAQ
+@@ -838,8 +828,7 @@ int es7000_plat;
+
+ #ifdef CONFIG_ACPI
+
+-void __init mp_register_lapic_address (
+- u64 address)
++void __init mp_register_lapic_address(u64 address)
+ {
+ #ifndef CONFIG_XEN
+ mp_lapic_addr = (unsigned long) address;
+@@ -853,13 +842,10 @@ void __init mp_register_lapic_address (
+ #endif
+ }
+
+-
+-void __devinit mp_register_lapic (
+- u8 id,
+- u8 enabled)
++void __devinit mp_register_lapic (u8 id, u8 enabled)
+ {
+ struct mpc_config_processor processor;
+- int boot_cpu = 0;
++ int boot_cpu = 0;
+
+ if (MAX_APICS - id <= 0) {
+ printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+@@ -898,11 +884,9 @@ static struct mp_ioapic_routing {
+ u32 pin_programmed[4];
+ } mp_ioapic_routing[MAX_IO_APICS];
+
+-
+-static int mp_find_ioapic (
+- int gsi)
++static int mp_find_ioapic (int gsi)
+ {
+- int i = 0;
++ int i = 0;
+
+ /* Find the IOAPIC that manages this GSI. */
+ for (i = 0; i < nr_ioapics; i++) {
+@@ -915,15 +899,11 @@ static int mp_find_ioapic (
+
+ return -1;
+ }
+-
+
+-void __init mp_register_ioapic (
+- u8 id,
+- u32 address,
+- u32 gsi_base)
++void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
+ {
+- int idx = 0;
+- int tmpid;
++ int idx = 0;
++ int tmpid;
+
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+@@ -971,16 +951,10 @@ void __init mp_register_ioapic (
+ mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
+ mp_ioapic_routing[idx].gsi_base,
+ mp_ioapic_routing[idx].gsi_end);
+-
+- return;
+ }
+
+-
+-void __init mp_override_legacy_irq (
+- u8 bus_irq,
+- u8 polarity,
+- u8 trigger,
+- u32 gsi)
++void __init
++mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
+ {
+ struct mpc_config_intsrc intsrc;
+ int ioapic = -1;
+@@ -1018,15 +992,13 @@ void __init mp_override_legacy_irq (
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+-
+- return;
+ }
+
+ void __init mp_config_acpi_legacy_irqs (void)
+ {
+ struct mpc_config_intsrc intsrc;
+- int i = 0;
+- int ioapic = -1;
++ int i = 0;
++ int ioapic = -1;
+
+ /*
+ * Fabricate the legacy ISA bus (bus #31).
+@@ -1095,12 +1067,12 @@ void __init mp_config_acpi_legacy_irqs (
+
+ #define MAX_GSI_NUM 4096
+
+-int mp_register_gsi (u32 gsi, int triggering, int polarity)
++int mp_register_gsi(u32 gsi, int triggering, int polarity)
+ {
+- int ioapic = -1;
+- int ioapic_pin = 0;
+- int idx, bit = 0;
+- static int pci_irq = 16;
++ int ioapic = -1;
++ int ioapic_pin = 0;
++ int idx, bit = 0;
++ static int pci_irq = 16;
+ /*
+ * Mapping between Global System Interrups, which
+ * represent all possible interrupts, and IRQs
+Index: 10.3-2007-11-26/arch/i386/kernel/pci-dma-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/pci-dma-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/pci-dma-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -83,8 +83,7 @@ dma_map_sg(struct device *hwdev, struct
+ {
+ int i, rc;
+
+- if (direction == DMA_NONE)
+- BUG();
++ BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(nents == 0 || sg[0].length == 0);
+
+ if (swiotlb) {
+@@ -115,7 +114,7 @@ dma_unmap_sg(struct device *hwdev, struc
+ {
+ int i;
+
+- BUG_ON(direction == DMA_NONE);
++ BUG_ON(!valid_dma_direction(direction));
+ if (swiotlb)
+ swiotlb_unmap_sg(hwdev, sg, nents, direction);
+ else {
+@@ -132,8 +131,7 @@ dma_map_page(struct device *dev, struct
+ {
+ dma_addr_t dma_addr;
+
+- BUG_ON(direction == DMA_NONE);
+-
++ BUG_ON(!valid_dma_direction(direction));
+ if (swiotlb) {
+ dma_addr = swiotlb_map_page(
+ dev, page, offset, size, direction);
+@@ -150,7 +148,7 @@ void
+ dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+ {
+- BUG_ON(direction == DMA_NONE);
++ BUG_ON(!valid_dma_direction(direction));
+ if (swiotlb)
+ swiotlb_unmap_page(dev, dma_address, size, direction);
+ else
+@@ -332,8 +330,7 @@ dma_map_single(struct device *dev, void
+ {
+ dma_addr_t dma;
+
+- if (direction == DMA_NONE)
+- BUG();
++ BUG_ON(!valid_dma_direction(direction));
+ WARN_ON(size == 0);
+
+ if (swiotlb) {
+@@ -354,8 +351,7 @@ void
+ dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+ {
+- if (direction == DMA_NONE)
+- BUG();
++ BUG_ON(!valid_dma_direction(direction));
+ if (swiotlb)
+ swiotlb_unmap_single(dev, dma_addr, size, direction);
+ else
+Index: 10.3-2007-11-26/arch/i386/kernel/process-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/process-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/process-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -37,6 +37,7 @@
+ #include <linux/kallsyms.h>
+ #include <linux/ptrace.h>
+ #include <linux/random.h>
++#include <linux/personality.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -186,7 +187,7 @@ void cpu_idle(void)
+ void cpu_idle_wait(void)
+ {
+ unsigned int cpu, this_cpu = get_cpu();
+- cpumask_t map;
++ cpumask_t map, tmp = current->cpus_allowed;
+
+ set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+ put_cpu();
+@@ -208,6 +209,8 @@ void cpu_idle_wait(void)
+ }
+ cpus_and(map, map, cpu_online_map);
+ } while (!cpus_empty(map));
++
++ set_cpus_allowed(current, tmp);
+ }
+ EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+@@ -240,9 +243,9 @@ void show_regs(struct pt_regs * regs)
+ if (user_mode_vm(regs))
+ printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
+ printk(" EFLAGS: %08lx %s (%s %.*s)\n",
+- regs->eflags, print_tainted(), system_utsname.release,
+- (int)strcspn(system_utsname.version, " "),
+- system_utsname.version);
++ regs->eflags, print_tainted(), init_utsname()->release,
++ (int)strcspn(init_utsname()->version, " "),
++ init_utsname()->version);
+ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+ regs->eax,regs->ebx,regs->ecx,regs->edx);
+ printk("ESI: %08lx EDI: %08lx EBP: %08lx",
+@@ -264,15 +267,6 @@ void show_regs(struct pt_regs * regs)
+ * the "args".
+ */
+ extern void kernel_thread_helper(void);
+-__asm__(".section .text\n"
+- ".align 4\n"
+- "kernel_thread_helper:\n\t"
+- "movl %edx,%eax\n\t"
+- "pushl %edx\n\t"
+- "call *%ebx\n\t"
+- "pushl %eax\n\t"
+- "call do_exit\n"
+- ".previous");
+
+ /*
+ * Create a kernel thread
+@@ -290,7 +284,7 @@ int kernel_thread(int (*fn)(void *), voi
+ regs.xes = __USER_DS;
+ regs.orig_eax = -1;
+ regs.eip = (unsigned long) kernel_thread_helper;
+- regs.xcs = GET_KERNEL_CS();
++ regs.xcs = __KERNEL_CS | get_kernel_rpl();
+ regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
+
+ /* Ok, create the new process.. */
+@@ -368,13 +362,12 @@ int copy_thread(int nr, unsigned long cl
+
+ tsk = current;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
+- p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
++ p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
++ IO_BITMAP_BYTES, GFP_KERNEL);
+ if (!p->thread.io_bitmap_ptr) {
+ p->thread.io_bitmap_max = 0;
+ return -ENOMEM;
+ }
+- memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
+- IO_BITMAP_BYTES);
+ set_tsk_thread_flag(p, TIF_IO_BITMAP);
+ }
+
+@@ -847,7 +840,7 @@ asmlinkage int sys_get_thread_area(struc
+
+ unsigned long arch_align_stack(unsigned long sp)
+ {
+- if (randomize_va_space)
++ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ sp -= get_random_int() % 8192;
+ return sp & ~0xf;
+ }
+Index: 10.3-2007-11-26/arch/i386/kernel/setup-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/setup-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/setup-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -56,6 +56,7 @@
+ #include <asm/apic.h>
+ #include <asm/e820.h>
+ #include <asm/mpspec.h>
++#include <asm/mmzone.h>
+ #include <asm/setup.h>
+ #include <asm/arch_hooks.h>
+ #include <asm/sections.h>
+@@ -105,18 +106,6 @@ EXPORT_SYMBOL(boot_cpu_data);
+
+ unsigned long mmu_cr4_features;
+
+-#ifdef CONFIG_ACPI
+- int acpi_disabled = 0;
+-#else
+- int acpi_disabled = 1;
+-#endif
+-EXPORT_SYMBOL(acpi_disabled);
+-
+-#ifdef CONFIG_ACPI
+-int __initdata acpi_force = 0;
+-extern acpi_interrupt_flags acpi_sci_flags;
+-#endif
+-
+ /* for MCA, but anyone else can use it if they want */
+ unsigned int machine_id;
+ #ifdef CONFIG_MCA
+@@ -170,7 +159,6 @@ struct e820map machine_e820;
+ #endif
+
+ extern void early_cpu_init(void);
+-extern void generic_apic_probe(char *);
+ extern int root_mountflags;
+
+ unsigned long saved_videomode;
+@@ -243,9 +231,6 @@ static struct resource adapter_rom_resou
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
+ } };
+
+-#define ADAPTER_ROM_RESOURCES \
+- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
+-
+ static struct resource video_rom_resource = {
+ .name = "Video ROM",
+ .start = 0xc0000,
+@@ -307,9 +292,6 @@ static struct resource standard_io_resou
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+ } };
+
+-#define STANDARD_IO_RESOURCES \
+- (sizeof standard_io_resources / sizeof standard_io_resources[0])
+-
+ #define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
+
+ static int __init romchecksum(unsigned char *rom, unsigned long length)
+@@ -372,7 +354,7 @@ static void __init probe_roms(void)
+ }
+
+ /* check for adapter roms on 2k boundaries */
+- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
++ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
+ rom = isa_bus_to_virt(start);
+ if (!romsignature(rom))
+ continue;
+@@ -764,246 +746,152 @@ static inline void copy_edd(void)
+ }
+ #endif
+
+-static void __init parse_cmdline_early (char ** cmdline_p)
++static int __initdata user_defined_memmap = 0;
++
++/*
++ * "mem=nopentium" disables the 4MB page tables.
++ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
++ * to <mem>, overriding the bios size.
++ * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
++ * <start> to <start>+<mem>, overriding the bios size.
++ *
++ * HPA tells me bootloaders need to parse mem=, so no new
++ * option should be mem= [also see Documentation/i386/boot.txt]
++ */
++static int __init parse_mem(char *arg)
+ {
+- char c = ' ', *to = command_line, *from = saved_command_line;
+- int len = 0, max_cmdline;
+- int userdef = 0;
+-
+- if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
+- max_cmdline = COMMAND_LINE_SIZE;
+- memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
+- /* Save unparsed command line copy for /proc/cmdline */
+- saved_command_line[max_cmdline-1] = '\0';
+-
+- for (;;) {
+- if (c != ' ')
+- goto next_char;
+- /*
+- * "mem=nopentium" disables the 4MB page tables.
+- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+- * to <mem>, overriding the bios size.
+- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+- * <start> to <start>+<mem>, overriding the bios size.
+- *
+- * HPA tells me bootloaders need to parse mem=, so no new
+- * option should be mem= [also see Documentation/i386/boot.txt]
+- */
+- if (!memcmp(from, "mem=", 4)) {
+- if (to != command_line)
+- to--;
+- if (!memcmp(from+4, "nopentium", 9)) {
+- from += 9+4;
+- clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
+- disable_pse = 1;
+- } else {
+- /* If the user specifies memory size, we
+- * limit the BIOS-provided memory map to
+- * that size. exactmap can be used to specify
+- * the exact map. mem=number can be used to
+- * trim the existing memory map.
+- */
+- unsigned long long mem_size;
+-
+- mem_size = memparse(from+4, &from);
+- limit_regions(mem_size);
+- userdef=1;
+- }
+- }
++ if (!arg)
++ return -EINVAL;
+
+- else if (!memcmp(from, "memmap=", 7)) {
+- if (to != command_line)
+- to--;
+- if (!memcmp(from+7, "exactmap", 8)) {
+-#ifdef CONFIG_CRASH_DUMP
+- /* If we are doing a crash dump, we
+- * still need to know the real mem
+- * size before original memory map is
+- * reset.
+- */
+- find_max_pfn();
+- saved_max_pfn = max_pfn;
+-#endif
+- from += 8+7;
+- e820.nr_map = 0;
+- userdef = 1;
+- } else {
+- /* If the user specifies memory size, we
+- * limit the BIOS-provided memory map to
+- * that size. exactmap can be used to specify
+- * the exact map. mem=number can be used to
+- * trim the existing memory map.
+- */
+- unsigned long long start_at, mem_size;
++ if (strcmp(arg, "nopentium") == 0) {
++ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
++ disable_pse = 1;
++ } else {
++ /* If the user specifies memory size, we
++ * limit the BIOS-provided memory map to
++ * that size. exactmap can be used to specify
++ * the exact map. mem=number can be used to
++ * trim the existing memory map.
++ */
++ unsigned long long mem_size;
+
+- mem_size = memparse(from+7, &from);
+- if (*from == '@') {
+- start_at = memparse(from+1, &from);
+- add_memory_region(start_at, mem_size, E820_RAM);
+- } else if (*from == '#') {
+- start_at = memparse(from+1, &from);
+- add_memory_region(start_at, mem_size, E820_ACPI);
+- } else if (*from == '$') {
+- start_at = memparse(from+1, &from);
+- add_memory_region(start_at, mem_size, E820_RESERVED);
+- } else {
+- limit_regions(mem_size);
+- userdef=1;
+- }
+- }
+- }
+-
+- else if (!memcmp(from, "noexec=", 7))
+- noexec_setup(from + 7);
++ mem_size = memparse(arg, &arg);
++ limit_regions(mem_size);
++ user_defined_memmap = 1;
++ }
++ return 0;
++}
++early_param("mem", parse_mem);
+
++static int __init parse_memmap(char *arg)
++{
++ if (!arg)
++ return -EINVAL;
+
+-#ifdef CONFIG_X86_MPPARSE
+- /*
+- * If the BIOS enumerates physical processors before logical,
+- * maxcpus=N at enumeration-time can be used to disable HT.
++ if (strcmp(arg, "exactmap") == 0) {
++#ifdef CONFIG_CRASH_DUMP
++ /* If we are doing a crash dump, we
++ * still need to know the real mem
++ * size before original memory map is
++ * reset.
+ */
+- else if (!memcmp(from, "maxcpus=", 8)) {
+- extern unsigned int maxcpus;
+-
+- maxcpus = simple_strtoul(from + 8, NULL, 0);
+- }
++ find_max_pfn();
++ saved_max_pfn = max_pfn;
+ #endif
++ e820.nr_map = 0;
++ user_defined_memmap = 1;
++ } else {
++ /* If the user specifies memory size, we
++ * limit the BIOS-provided memory map to
++ * that size. exactmap can be used to specify
++ * the exact map. mem=number can be used to
++ * trim the existing memory map.
++ */
++ unsigned long long start_at, mem_size;
+
+-#ifdef CONFIG_ACPI
+- /* "acpi=off" disables both ACPI table parsing and interpreter */
+- else if (!memcmp(from, "acpi=off", 8)) {
+- disable_acpi();
+- }
+-
+- /* acpi=force to over-ride black-list */
+- else if (!memcmp(from, "acpi=force", 10)) {
+- acpi_force = 1;
+- acpi_ht = 1;
+- acpi_disabled = 0;
+- }
+-
+- /* acpi=strict disables out-of-spec workarounds */
+- else if (!memcmp(from, "acpi=strict", 11)) {
+- acpi_strict = 1;
+- }
+-
+- /* Limit ACPI just to boot-time to enable HT */
+- else if (!memcmp(from, "acpi=ht", 7)) {
+- if (!acpi_force)
+- disable_acpi();
+- acpi_ht = 1;
+- }
+-
+- /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
+- else if (!memcmp(from, "pci=noacpi", 10)) {
+- acpi_disable_pci();
+- }
+- /* "acpi=noirq" disables ACPI interrupt routing */
+- else if (!memcmp(from, "acpi=noirq", 10)) {
+- acpi_noirq_set();
++ mem_size = memparse(arg, &arg);
++ if (*arg == '@') {
++ start_at = memparse(arg+1, &arg);
++ add_memory_region(start_at, mem_size, E820_RAM);
++ } else if (*arg == '#') {
++ start_at = memparse(arg+1, &arg);
++ add_memory_region(start_at, mem_size, E820_ACPI);
++ } else if (*arg == '$') {
++ start_at = memparse(arg+1, &arg);
++ add_memory_region(start_at, mem_size, E820_RESERVED);
++ } else {
++ limit_regions(mem_size);
++ user_defined_memmap = 1;
+ }
++ }
++ return 0;
++}
++early_param("memmap", parse_memmap);
+
+- else if (!memcmp(from, "acpi_sci=edge", 13))
+- acpi_sci_flags.trigger = 1;
+-
+- else if (!memcmp(from, "acpi_sci=level", 14))
+- acpi_sci_flags.trigger = 3;
++#ifdef CONFIG_PROC_VMCORE
++/* elfcorehdr= specifies the location of elf core header
++ * stored by the crashed kernel.
++ */
++static int __init parse_elfcorehdr(char *arg)
++{
++ if (!arg)
++ return -EINVAL;
+
+- else if (!memcmp(from, "acpi_sci=high", 13))
+- acpi_sci_flags.polarity = 1;
++ elfcorehdr_addr = memparse(arg, &arg);
++ return 0;
++}
++early_param("elfcorehdr", parse_elfcorehdr);
++#endif /* CONFIG_PROC_VMCORE */
+
+- else if (!memcmp(from, "acpi_sci=low", 12))
+- acpi_sci_flags.polarity = 3;
++/*
++ * highmem=size forces highmem to be exactly 'size' bytes.
++ * This works even on boxes that have no highmem otherwise.
++ * This also works to reduce highmem size on bigger boxes.
++ */
++static int __init parse_highmem(char *arg)
++{
++ if (!arg)
++ return -EINVAL;
+
+-#ifdef CONFIG_X86_IO_APIC
+- else if (!memcmp(from, "acpi_skip_timer_override", 24))
+- acpi_skip_timer_override = 1;
++ highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
++ return 0;
++}
++early_param("highmem", parse_highmem);
+
+- if (!memcmp(from, "disable_timer_pin_1", 19))
+- disable_timer_pin_1 = 1;
+- if (!memcmp(from, "enable_timer_pin_1", 18))
+- disable_timer_pin_1 = -1;
+-
+- /* disable IO-APIC */
+- else if (!memcmp(from, "noapic", 6))
+- disable_ioapic_setup();
+-#endif /* CONFIG_X86_IO_APIC */
+-#endif /* CONFIG_ACPI */
++/*
++ * vmalloc=size forces the vmalloc area to be exactly 'size'
++ * bytes. This can be used to increase (or decrease) the
++ * vmalloc area - the default is 128m.
++ */
++static int __init parse_vmalloc(char *arg)
++{
++ if (!arg)
++ return -EINVAL;
+
+-#ifdef CONFIG_X86_LOCAL_APIC
+- /* enable local APIC */
+- else if (!memcmp(from, "lapic", 5))
+- lapic_enable();
+-
+- /* disable local APIC */
+- else if (!memcmp(from, "nolapic", 6))
+- lapic_disable();
+-#endif /* CONFIG_X86_LOCAL_APIC */
++ __VMALLOC_RESERVE = memparse(arg, &arg);
++ return 0;
++}
++early_param("vmalloc", parse_vmalloc);
+
+-#ifdef CONFIG_KEXEC
+- /* crashkernel=size@addr specifies the location to reserve for
+- * a crash kernel. By reserving this memory we guarantee
+- * that linux never set's it up as a DMA target.
+- * Useful for holding code to do something appropriate
+- * after a kernel panic.
+- */
+- else if (!memcmp(from, "crashkernel=", 12)) {
+ #ifndef CONFIG_XEN
+- unsigned long size, base;
+- size = memparse(from+12, &from);
+- if (*from == '@') {
+- base = memparse(from+1, &from);
+- /* FIXME: Do I want a sanity check
+- * to validate the memory range?
+- */
+- crashk_res.start = base;
+- crashk_res.end = base + size - 1;
+- }
+-#else
+- printk("Ignoring crashkernel command line, "
+- "parameter will be supplied by xen\n");
+-#endif
+- }
+-#endif
+-#ifdef CONFIG_PROC_VMCORE
+- /* elfcorehdr= specifies the location of elf core header
+- * stored by the crashed kernel.
+- */
+- else if (!memcmp(from, "elfcorehdr=", 11))
+- elfcorehdr_addr = memparse(from+11, &from);
+-#endif
++/*
++ * reservetop=size reserves a hole at the top of the kernel address space which
++ * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
++ * so relocating the fixmap can be done before paging initialization.
++ */
++static int __init parse_reservetop(char *arg)
++{
++ unsigned long address;
+
+- /*
+- * highmem=size forces highmem to be exactly 'size' bytes.
+- * This works even on boxes that have no highmem otherwise.
+- * This also works to reduce highmem size on bigger boxes.
+- */
+- else if (!memcmp(from, "highmem=", 8))
+- highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
+-
+- /*
+- * vmalloc=size forces the vmalloc area to be exactly 'size'
+- * bytes. This can be used to increase (or decrease) the
+- * vmalloc area - the default is 128m.
+- */
+- else if (!memcmp(from, "vmalloc=", 8))
+- __VMALLOC_RESERVE = memparse(from+8, &from);
++ if (!arg)
++ return -EINVAL;
+
+- next_char:
+- c = *(from++);
+- if (!c)
+- break;
+- if (COMMAND_LINE_SIZE <= ++len)
+- break;
+- *(to++) = c;
+- }
+- *to = '\0';
+- *cmdline_p = command_line;
+- if (userdef) {
+- printk(KERN_INFO "user-defined physical RAM map:\n");
+- print_memory_map("user");
+- }
++ address = memparse(arg, &arg);
++ reserve_top_address(address);
++ return 0;
+ }
++early_param("reservetop", parse_reservetop);
++#endif
+
+ /*
+ * Callback for efi_memory_walk.
+@@ -1024,7 +912,7 @@ efi_find_max_pfn(unsigned long start, un
+ static int __init
+ efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
+ {
+- memory_present(0, start, end);
++ memory_present(0, PFN_UP(start), PFN_DOWN(end));
+ return 0;
+ }
+
+@@ -1262,6 +1150,14 @@ static unsigned long __init setup_memory
+ }
+ printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+ pages_to_mb(highend_pfn - highstart_pfn));
++ num_physpages = highend_pfn;
++ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
++#else
++ num_physpages = max_low_pfn;
++ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
++#endif
++#ifdef CONFIG_FLATMEM
++ max_mapnr = num_physpages;
+ #endif
+ printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
+ pages_to_mb(max_low_pfn));
+@@ -1273,9 +1169,9 @@ static unsigned long __init setup_memory
+
+ void __init zone_sizes_init(void)
+ {
+- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+- unsigned int max_dma, low;
++ unsigned long max_zone_pfns[MAX_NR_ZONES];
+
++ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+ /*
+ * XEN: Our notion of "DMA memory" is fake when running over Xen.
+ * We simply put all RAM in the DMA zone so that those drivers which
+@@ -1283,19 +1179,16 @@ void __init zone_sizes_init(void)
+ * Those drivers that *do* require lowmem are screwed anyway when
+ * running over Xen!
+ */
+- max_dma = max_low_pfn;
+- low = max_low_pfn;
+-
+- if (low < max_dma)
+- zones_size[ZONE_DMA] = low;
+- else {
+- zones_size[ZONE_DMA] = max_dma;
+- zones_size[ZONE_NORMAL] = low - max_dma;
++ max_zone_pfns[ZONE_DMA] = max_low_pfn;
++ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+ #ifdef CONFIG_HIGHMEM
+- zones_size[ZONE_HIGHMEM] = highend_pfn - low;
++ max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
++ add_active_range(0, 0, highend_pfn);
++#else
++ add_active_range(0, 0, max_low_pfn);
+ #endif
+- }
+- free_area_init(zones_size);
++
++ free_area_init_nodes(max_zone_pfns);
+ }
+ #else
+ extern unsigned long __init setup_memory(void);
+@@ -1352,6 +1245,7 @@ void __init setup_bootmem_allocator(void
+ */
+ acpi_reserve_bootmem();
+ #endif
++ numa_kva_reserve();
+ #endif /* !CONFIG_XEN */
+
+ #ifdef CONFIG_BLK_DEV_INITRD
+@@ -1541,7 +1435,7 @@ static int __init request_standard_resou
+ request_resource(&iomem_resource, &video_ram_resource);
+
+ /* request I/O space for devices used on all i[345]86 PCs */
+- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
++ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
+ request_resource(&ioport_resource, &standard_io_resources[i]);
+ return 0;
+ }
+@@ -1692,17 +1586,19 @@ void __init setup_arch(char **cmdline_p)
+ data_resource.start = virt_to_phys(_etext);
+ data_resource.end = virt_to_phys(_edata)-1;
+
+- parse_cmdline_early(cmdline_p);
++ if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
++ i = COMMAND_LINE_SIZE;
++ memcpy(saved_command_line, xen_start_info->cmd_line, i);
++ saved_command_line[i - 1] = '\0';
++ parse_early_param();
+
+-#ifdef CONFIG_EARLY_PRINTK
+- {
+- char *s = strstr(*cmdline_p, "earlyprintk=");
+- if (s) {
+- setup_early_printk(strchr(s, '=') + 1);
+- printk("early console enabled\n");
+- }
++ if (user_defined_memmap) {
++ printk(KERN_INFO "user-defined physical RAM map:\n");
++ print_memory_map("user");
+ }
+-#endif
++
++ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
++ *cmdline_p = command_line;
+
+ max_low_pfn = setup_memory();
+
+@@ -1778,7 +1674,7 @@ void __init setup_arch(char **cmdline_p)
+ dmi_scan_machine();
+
+ #ifdef CONFIG_X86_GENERICARCH
+- generic_apic_probe(*cmdline_p);
++ generic_apic_probe();
+ #endif
+ if (efi_enabled)
+ efi_map_memmap();
+@@ -1799,9 +1695,11 @@ void __init setup_arch(char **cmdline_p)
+ acpi_boot_table_init();
+ #endif
+
++#ifdef CONFIG_PCI
+ #ifdef CONFIG_X86_IO_APIC
+ check_acpi_pci(); /* Checks more than just ACPI actually */
+ #endif
++#endif
+
+ #ifdef CONFIG_ACPI
+ acpi_boot_init();
+Index: 10.3-2007-11-26/arch/i386/kernel/smp-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/smp-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/smp-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -279,8 +279,7 @@ static inline void leave_mm (unsigned lo
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
+- struct pt_regs *regs)
++irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
+ {
+ unsigned long cpu;
+
+@@ -443,8 +442,7 @@ void flush_tlb_all(void)
+
+ #else
+
+-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
+- struct pt_regs *regs)
++irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
+ { return 0; }
+ void flush_tlb_current_task(void)
+ { xen_tlb_flush_mask(&current->mm->cpu_vm_mask); }
+@@ -586,16 +584,14 @@ void smp_send_stop(void)
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
+-irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
+- struct pt_regs *regs)
++irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
+ {
+
+ return IRQ_HANDLED;
+ }
+
+ #include <linux/kallsyms.h>
+-irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
+- struct pt_regs *regs)
++irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
+ {
+ void (*func) (void *info) = call_data->func;
+ void *info = call_data->info;
+@@ -622,3 +618,69 @@ irqreturn_t smp_call_function_interrupt(
+ return IRQ_HANDLED;
+ }
+
++/*
++ * this function sends a 'generic call function' IPI to one other CPU
++ * in the system.
++ *
++ * cpu is a standard Linux logical CPU number.
++ */
++static void
++__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
++ int nonatomic, int wait)
++{
++ struct call_data_struct data;
++ int cpus = 1;
++
++ data.func = func;
++ data.info = info;
++ atomic_set(&data.started, 0);
++ data.wait = wait;
++ if (wait)
++ atomic_set(&data.finished, 0);
++
++ call_data = &data;
++ wmb();
++ /* Send a message to all other CPUs and wait for them to respond */
++ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
++
++ /* Wait for response */
++ while (atomic_read(&data.started) != cpus)
++ cpu_relax();
++
++ if (!wait)
++ return;
++
++ while (atomic_read(&data.finished) != cpus)
++ cpu_relax();
++}
++
++/*
++ * smp_call_function_single - Run a function on another CPU
++ * @func: The function to run. This must be fast and non-blocking.
++ * @info: An arbitrary pointer to pass to the function.
++ * @nonatomic: Currently unused.
++ * @wait: If true, wait until function has completed on other CPUs.
++ *
++ * Retrurns 0 on success, else a negative status code.
++ *
++ * Does not return until the remote CPU is nearly ready to execute <func>
++ * or is or has executed.
++ */
++
++int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
++ int nonatomic, int wait)
++{
++ /* prevent preemption and reschedule on another processor */
++ int me = get_cpu();
++ if (cpu == me) {
++ WARN_ON(1);
++ put_cpu();
++ return -EBUSY;
++ }
++ spin_lock_bh(&call_lock);
++ __smp_call_function_single(cpu, func, info, nonatomic, wait);
++ spin_unlock_bh(&call_lock);
++ put_cpu();
++ return 0;
++}
++EXPORT_SYMBOL(smp_call_function_single);
+Index: 10.3-2007-11-26/arch/i386/kernel/time-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/time-xen.c 2007-12-06 17:31:37.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/time-xen.c 2007-12-06 17:31:58.000000000 +0100
+@@ -88,7 +88,6 @@ int pit_latch_buggy; /* ext
+ unsigned long vxtime_hz = PIT_TICK_RATE;
+ struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
+ volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+-unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
+ struct timespec __xtime __section_xtime;
+ struct timezone __sys_tz __section_sys_tz;
+ #endif
+@@ -96,8 +95,6 @@ struct timezone __sys_tz __section_sys_t
+ unsigned int cpu_khz; /* Detected as we calibrate the TSC */
+ EXPORT_SYMBOL(cpu_khz);
+
+-extern unsigned long wall_jiffies;
+-
+ DEFINE_SPINLOCK(rtc_lock);
+ EXPORT_SYMBOL(rtc_lock);
+
+@@ -261,11 +258,10 @@ static void __update_wallclock(time_t se
+ time_t wtm_sec, xtime_sec;
+ u64 tmp, wc_nsec;
+
+- /* Adjust wall-clock time base based on wall_jiffies ticks. */
++ /* Adjust wall-clock time base. */
+ wc_nsec = processed_system_time;
+ wc_nsec += sec * (u64)NSEC_PER_SEC;
+ wc_nsec += nsec;
+- wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
+
+ /* Split wallclock base into seconds and nanoseconds. */
+ tmp = wc_nsec;
+@@ -383,13 +379,10 @@ void do_gettimeofday(struct timeval *tv)
+ shadow = &per_cpu(shadow_time, cpu);
+
+ do {
+- unsigned long lost;
+-
+ local_time_version = shadow->version;
+ seq = read_seqbegin(&xtime_lock);
+
+ usec = get_usec_offset(shadow);
+- lost = jiffies - wall_jiffies;
+
+ /*
+ * If time_adjust is negative then NTP is slowing the clock
+@@ -399,12 +392,7 @@ void do_gettimeofday(struct timeval *tv)
+ if (unlikely(time_adjust < 0)) {
+ max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj;
+ usec = min(usec, max_ntp_tick);
+-
+- if (lost)
+- usec += lost * max_ntp_tick;
+ }
+- else if (unlikely(lost))
+- usec += lost * (USEC_PER_SEC / HZ);
+
+ sec = xtime.tv_sec;
+ usec += (xtime.tv_nsec / NSEC_PER_USEC);
+@@ -509,7 +497,7 @@ static void sync_xen_wallclock(unsigned
+ write_seqlock_irq(&xtime_lock);
+
+ sec = xtime.tv_sec;
+- nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
++ nsec = xtime.tv_nsec;
+ __normalize_time(&sec, &nsec);
+
+ op.cmd = XENPF_settime;
+@@ -583,7 +571,6 @@ unsigned long long sched_clock(void)
+ }
+ #endif
+
+-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
+ unsigned long profile_pc(struct pt_regs *regs)
+ {
+ unsigned long pc = instruction_pointer(regs);
+@@ -604,21 +591,38 @@ unsigned long profile_pc(struct pt_regs
+ return ((unsigned long *)regs->rsp)[1];
+ }
+ #else
+- if (!user_mode_vm(regs) && in_lock_functions(pc))
++#ifdef CONFIG_SMP
++ if (!user_mode_vm(regs) && in_lock_functions(pc)) {
++#ifdef CONFIG_FRAME_POINTER
+ return *(unsigned long *)(regs->ebp + 4);
++#else
++ unsigned long *sp;
++ if ((regs->xcs & 2) == 0)
++ sp = (unsigned long *)&regs->esp;
++ else
++ sp = (unsigned long *)regs->esp;
++ /* Return address is either directly at stack pointer
++ or above a saved eflags. Eflags has bits 22-31 zero,
++ kernel addresses don't. */
++ if (sp[0] >> 22)
++ return sp[0];
++ if (sp[1] >> 22)
++ return sp[1];
++#endif
++ }
++#endif
+ #endif
+
+ return pc;
+ }
+ EXPORT_SYMBOL(profile_pc);
+-#endif
+
+ /*
+ * This is the same as the above, except we _also_ save the current
+ * Time Stamp Counter value at the time of the timer interrupt, so that
+ * we later on can estimate the time of day more exactly.
+ */
+-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t timer_interrupt(int irq, void *dev_id)
+ {
+ s64 delta, delta_cpu, stolen, blocked;
+ u64 sched_time;
+@@ -676,10 +680,14 @@ irqreturn_t timer_interrupt(int irq, voi
+ }
+
+ /* System-wide jiffy work. */
+- while (delta >= NS_PER_TICK) {
+- delta -= NS_PER_TICK;
+- processed_system_time += NS_PER_TICK;
+- do_timer(regs);
++ if (delta >= NS_PER_TICK) {
++ do_div(delta, NS_PER_TICK);
++ processed_system_time += delta * NS_PER_TICK;
++ while (delta > HZ) {
++ do_timer(HZ);
++ delta -= HZ;
++ }
++ do_timer(delta);
+ }
+
+ if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
+@@ -724,7 +732,7 @@ irqreturn_t timer_interrupt(int irq, voi
+ if (delta_cpu > 0) {
+ do_div(delta_cpu, NS_PER_TICK);
+ per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
+- if (user_mode_vm(regs))
++ if (user_mode_vm(get_irq_regs()))
+ account_user_time(current, (cputime_t)delta_cpu);
+ else
+ account_system_time(current, HARDIRQ_OFFSET,
+@@ -738,10 +746,10 @@ irqreturn_t timer_interrupt(int irq, voi
+ /* Local timer processing (see update_process_times()). */
+ run_local_timers();
+ if (rcu_pending(cpu))
+- rcu_check_callbacks(cpu, user_mode_vm(regs));
++ rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
+ scheduler_tick();
+ run_posix_cpu_timers(current);
+- profile_tick(CPU_PROFILING, regs);
++ profile_tick(CPU_PROFILING);
+
+ return IRQ_HANDLED;
+ }
+@@ -913,16 +921,19 @@ void notify_arch_cmos_timer(void)
+ mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
+ }
+
+-static long clock_cmos_diff, sleep_start;
++static long clock_cmos_diff;
++static unsigned long sleep_start;
+
+ static int timer_suspend(struct sys_device *dev, pm_message_t state)
+ {
+ /*
+ * Estimate time zone so that set_time can update the clock
+ */
+- clock_cmos_diff = -get_cmos_time();
++ unsigned long ctime = get_cmos_time();
++
++ clock_cmos_diff = -ctime;
+ clock_cmos_diff += get_seconds();
+- sleep_start = get_cmos_time();
++ sleep_start = ctime;
+ return 0;
+ }
+
+@@ -930,19 +941,29 @@ static int timer_resume(struct sys_devic
+ {
+ unsigned long flags;
+ unsigned long sec;
+- unsigned long sleep_length;
+-
++ unsigned long ctime = get_cmos_time();
++ long sleep_length = (ctime - sleep_start) * HZ;
++ struct timespec ts;
++
++ if (sleep_length < 0) {
++ printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n");
++ /* The time after the resume must not be earlier than the time
++ * before the suspend or some nasty things will happen
++ */
++ sleep_length = 0;
++ ctime = sleep_start;
++ }
+ #ifdef CONFIG_HPET_TIMER
+ if (is_hpet_enabled())
+ hpet_reenable();
+ #endif
+- sec = get_cmos_time() + clock_cmos_diff;
+- sleep_length = (get_cmos_time() - sleep_start) * HZ;
++
++ sec = ctime + clock_cmos_diff;
++ ts.tv_sec = sec;
++ ts.tv_nsec = 0;
++ do_settimeofday(&ts);
+ write_seqlock_irqsave(&xtime_lock, flags);
+- xtime.tv_sec = sec;
+- xtime.tv_nsec = 0;
+ jiffies_64 += sleep_length;
+- wall_jiffies += sleep_length;
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+ touch_softlockup_watchdog();
+ return 0;
+@@ -976,10 +997,11 @@ extern void (*late_time_init)(void);
+ /* Duplicate of time_init() below, with hpet_enable part added */
+ static void __init hpet_time_init(void)
+ {
+- xtime.tv_sec = get_cmos_time();
+- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
+- set_normalized_timespec(&wall_to_monotonic,
+- -xtime.tv_sec, -xtime.tv_nsec);
++ struct timespec ts;
++ ts.tv_sec = get_cmos_time();
++ ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
++
++ do_settimeofday(&ts);
+
+ if ((hpet_enable() >= 0) && hpet_use_timer) {
+ printk("Using HPET for base-timer\n");
+Index: 10.3-2007-11-26/arch/i386/kernel/traps-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/kernel/traps-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/kernel/traps-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -28,6 +28,7 @@
+ #include <linux/kprobes.h>
+ #include <linux/kexec.h>
+ #include <linux/unwind.h>
++#include <linux/uaccess.h>
+
+ #ifdef CONFIG_EISA
+ #include <linux/ioport.h>
+@@ -40,7 +41,6 @@
+
+ #include <asm/processor.h>
+ #include <asm/system.h>
+-#include <asm/uaccess.h>
+ #include <asm/io.h>
+ #include <asm/atomic.h>
+ #include <asm/debugreg.h>
+@@ -51,11 +51,14 @@
+ #include <asm/smp.h>
+ #include <asm/arch_hooks.h>
+ #include <asm/kdebug.h>
++#include <asm/stacktrace.h>
+
+ #include <linux/module.h>
+
+ #include "mach_traps.h"
+
++int panic_on_unrecovered_nmi;
++
+ asmlinkage int system_call(void);
+
+ struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
+@@ -124,62 +127,63 @@ static inline int valid_stack_ptr(struct
+ p < (void *)tinfo + THREAD_SIZE - 3;
+ }
+
+-/*
+- * Print one address/symbol entries per line.
+- */
+-static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
+-{
+- printk(" [<%08lx>] ", addr);
+-
+- print_symbol("%s\n", addr);
+-}
+-
+ static inline unsigned long print_context_stack(struct thread_info *tinfo,
+ unsigned long *stack, unsigned long ebp,
+- char *log_lvl)
++ struct stacktrace_ops *ops, void *data)
+ {
+ unsigned long addr;
+
+ #ifdef CONFIG_FRAME_POINTER
+ while (valid_stack_ptr(tinfo, (void *)ebp)) {
++ unsigned long new_ebp;
+ addr = *(unsigned long *)(ebp + 4);
+- print_addr_and_symbol(addr, log_lvl);
++ ops->address(data, addr);
+ /*
+ * break out of recursive entries (such as
+- * end_of_stack_stop_unwind_function):
++ * end_of_stack_stop_unwind_function). Also,
++ * we can never allow a frame pointer to
++ * move downwards!
+ */
+- if (ebp == *(unsigned long *)ebp)
++ new_ebp = *(unsigned long *)ebp;
++ if (new_ebp <= ebp)
+ break;
+- ebp = *(unsigned long *)ebp;
++ ebp = new_ebp;
+ }
+ #else
+ while (valid_stack_ptr(tinfo, stack)) {
+ addr = *stack++;
+ if (__kernel_text_address(addr))
+- print_addr_and_symbol(addr, log_lvl);
++ ops->address(data, addr);
+ }
+ #endif
+ return ebp;
+ }
+
++struct ops_and_data {
++ struct stacktrace_ops *ops;
++ void *data;
++};
++
+ static asmlinkage int
+-show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
++dump_trace_unwind(struct unwind_frame_info *info, void *data)
+ {
++ struct ops_and_data *oad = (struct ops_and_data *)data;
+ int n = 0;
+
+ while (unwind(info) == 0 && UNW_PC(info)) {
+ n++;
+- print_addr_and_symbol(UNW_PC(info), log_lvl);
++ oad->ops->address(oad->data, UNW_PC(info));
+ if (arch_unw_user_mode(info))
+ break;
+ }
+ return n;
+ }
+
+-static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+- unsigned long *stack, char *log_lvl)
++void dump_trace(struct task_struct *task, struct pt_regs *regs,
++ unsigned long *stack,
++ struct stacktrace_ops *ops, void *data)
+ {
+- unsigned long ebp;
++ unsigned long ebp = 0;
+
+ if (!task)
+ task = current;
+@@ -187,54 +191,116 @@ static void show_trace_log_lvl(struct ta
+ if (call_trace >= 0) {
+ int unw_ret = 0;
+ struct unwind_frame_info info;
++ struct ops_and_data oad = { .ops = ops, .data = data };
+
+ if (regs) {
+ if (unwind_init_frame_info(&info, task, regs) == 0)
+- unw_ret = show_trace_unwind(&info, log_lvl);
++ unw_ret = dump_trace_unwind(&info, &oad);
+ } else if (task == current)
+- unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
++ unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
+ else {
+ if (unwind_init_blocked(&info, task) == 0)
+- unw_ret = show_trace_unwind(&info, log_lvl);
++ unw_ret = dump_trace_unwind(&info, &oad);
+ }
+ if (unw_ret > 0) {
+ if (call_trace == 1 && !arch_unw_user_mode(&info)) {
+- print_symbol("DWARF2 unwinder stuck at %s\n",
++ ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
+ UNW_PC(&info));
+ if (UNW_SP(&info) >= PAGE_OFFSET) {
+- printk("Leftover inexact backtrace:\n");
++ ops->warning(data, "Leftover inexact backtrace:\n");
+ stack = (void *)UNW_SP(&info);
++ if (!stack)
++ return;
++ ebp = UNW_FP(&info);
+ } else
+- printk("Full inexact backtrace again:\n");
++ ops->warning(data, "Full inexact backtrace again:\n");
+ } else if (call_trace >= 1)
+ return;
+ else
+- printk("Full inexact backtrace again:\n");
++ ops->warning(data, "Full inexact backtrace again:\n");
+ } else
+- printk("Inexact backtrace:\n");
++ ops->warning(data, "Inexact backtrace:\n");
+ }
+-
+- if (task == current) {
+- /* Grab ebp right from our regs */
+- asm ("movl %%ebp, %0" : "=r" (ebp) : );
+- } else {
+- /* ebp is the last reg pushed by switch_to */
+- ebp = *(unsigned long *) task->thread.esp;
++ if (!stack) {
++ unsigned long dummy;
++ stack = &dummy;
++ if (task && task != current)
++ stack = (unsigned long *)task->thread.esp;
++ }
++
++#ifdef CONFIG_FRAME_POINTER
++ if (!ebp) {
++ if (task == current) {
++ /* Grab ebp right from our regs */
++ asm ("movl %%ebp, %0" : "=r" (ebp) : );
++ } else {
++ /* ebp is the last reg pushed by switch_to */
++ ebp = *(unsigned long *) task->thread.esp;
++ }
+ }
++#endif
+
+ while (1) {
+ struct thread_info *context;
+ context = (struct thread_info *)
+ ((unsigned long)stack & (~(THREAD_SIZE - 1)));
+- ebp = print_context_stack(context, stack, ebp, log_lvl);
++ ebp = print_context_stack(context, stack, ebp, ops, data);
++ /* Should be after the line below, but somewhere
++ in early boot context comes out corrupted and we
++ can't reference it -AK */
++ if (ops->stack(data, "IRQ") < 0)
++ break;
+ stack = (unsigned long*)context->previous_esp;
+ if (!stack)
+ break;
+- printk("%s =======================\n", log_lvl);
+ }
+ }
++EXPORT_SYMBOL(dump_trace);
++
++static void
++print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
++{
++ printk(data);
++ print_symbol(msg, symbol);
++ printk("\n");
++}
++
++static void print_trace_warning(void *data, char *msg)
++{
++ printk("%s%s\n", (char *)data, msg);
++}
+
+-void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
++static int print_trace_stack(void *data, char *name)
++{
++ return 0;
++}
++
++/*
++ * Print one address/symbol entries per line.
++ */
++static void print_trace_address(void *data, unsigned long addr)
++{
++ printk("%s [<%08lx>] ", (char *)data, addr);
++ print_symbol("%s\n", addr);
++}
++
++static struct stacktrace_ops print_trace_ops = {
++ .warning = print_trace_warning,
++ .warning_symbol = print_trace_warning_symbol,
++ .stack = print_trace_stack,
++ .address = print_trace_address,
++};
++
++static void
++show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
++ unsigned long * stack, char *log_lvl)
++{
++ dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
++ printk("%s =======================\n", log_lvl);
++}
++
++void show_trace(struct task_struct *task, struct pt_regs *regs,
++ unsigned long * stack)
+ {
+ show_trace_log_lvl(task, regs, stack, "");
+ }
+@@ -297,12 +363,13 @@ void show_registers(struct pt_regs *regs
+ ss = regs->xss & 0xffff;
+ }
+ print_modules();
+- printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n"
+- "EFLAGS: %08lx (%s %.*s) \n",
++ printk(KERN_EMERG "CPU: %d\n"
++ KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
++ KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
+ smp_processor_id(), 0xffff & regs->xcs, regs->eip,
+- print_tainted(), regs->eflags, system_utsname.release,
+- (int)strcspn(system_utsname.version, " "),
+- system_utsname.version);
++ print_tainted(), regs->eflags, init_utsname()->release,
++ (int)strcspn(init_utsname()->version, " "),
++ init_utsname()->version);
+ print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
+ printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
+ regs->eax, regs->ebx, regs->ecx, regs->edx);
+@@ -319,6 +386,8 @@ void show_registers(struct pt_regs *regs
+ */
+ if (in_kernel) {
+ u8 __user *eip;
++ int code_bytes = 64;
++ unsigned char c;
+
+ printk("\n" KERN_EMERG "Stack: ");
+ show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
+@@ -326,9 +395,12 @@ void show_registers(struct pt_regs *regs
+ printk(KERN_EMERG "Code: ");
+
+ eip = (u8 __user *)regs->eip - 43;
+- for (i = 0; i < 64; i++, eip++) {
+- unsigned char c;
+-
++ if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
++ /* try starting at EIP */
++ eip = (u8 __user *)regs->eip;
++ code_bytes = 32;
++ }
++ for (i = 0; i < code_bytes; i++, eip++) {
+ if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
+ printk(" Bad EIP value.");
+ break;
+@@ -349,7 +421,7 @@ static void handle_BUG(struct pt_regs *r
+
+ if (eip < PAGE_OFFSET)
+ return;
+- if (__get_user(ud2, (unsigned short __user *)eip))
++ if (probe_kernel_address((unsigned short __user *)eip, ud2))
+ return;
+ if (ud2 != 0x0b0f)
+ return;
+@@ -362,7 +434,8 @@ static void handle_BUG(struct pt_regs *r
+ char *file;
+ char c;
+
+- if (__get_user(line, (unsigned short __user *)(eip + 2)))
++ if (probe_kernel_address((unsigned short __user *)(eip + 2),
++ line))
+ break;
+ if (__get_user(file, (char * __user *)(eip + 4)) ||
+ (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
+@@ -604,18 +677,24 @@ gp_in_kernel:
+ }
+ }
+
+-static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
++static __kprobes void
++mem_parity_error(unsigned char reason, struct pt_regs * regs)
+ {
+- printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
+- "to continue\n");
++ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
++ "CPU %d.\n", reason, smp_processor_id());
+ printk(KERN_EMERG "You probably have a hardware problem with your RAM "
+ "chips\n");
++ if (panic_on_unrecovered_nmi)
++ panic("NMI: Not continuing");
++
++ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+
+ /* Clear and disable the memory parity error line. */
+ clear_mem_error(reason);
+ }
+
+-static void io_check_error(unsigned char reason, struct pt_regs * regs)
++static __kprobes void
++io_check_error(unsigned char reason, struct pt_regs * regs)
+ {
+ printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+ show_registers(regs);
+@@ -624,7 +703,8 @@ static void io_check_error(unsigned char
+ clear_io_check_error(reason);
+ }
+
+-static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
++static __kprobes void
++unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+ {
+ #ifdef CONFIG_MCA
+ /* Might actually be able to figure out what the guilty party
+@@ -634,15 +714,18 @@ static void unknown_nmi_error(unsigned c
+ return;
+ }
+ #endif
+- printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+- reason, smp_processor_id());
+- printk("Dazed and confused, but trying to continue\n");
+- printk("Do you have a strange power saving mode enabled?\n");
++ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
++ "CPU %d.\n", reason, smp_processor_id());
++ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
++ if (panic_on_unrecovered_nmi)
++ panic("NMI: Not continuing");
++
++ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ }
+
+ static DEFINE_SPINLOCK(nmi_print_lock);
+
+-void die_nmi (struct pt_regs *regs, const char *msg)
++void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
+ {
+ if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
+ NOTIFY_STOP)
+@@ -674,7 +757,7 @@ void die_nmi (struct pt_regs *regs, cons
+ do_exit(SIGSEGV);
+ }
+
+-static void default_do_nmi(struct pt_regs * regs)
++static __kprobes void default_do_nmi(struct pt_regs * regs)
+ {
+ unsigned char reason = 0;
+
+@@ -691,12 +774,12 @@ static void default_do_nmi(struct pt_reg
+ * Ok, so this is none of the documented NMI sources,
+ * so it must be the NMI watchdog.
+ */
+- if (nmi_watchdog) {
+- nmi_watchdog_tick(regs);
++ if (nmi_watchdog_tick(regs, reason))
+ return;
+- }
++ if (!do_nmi_callback(regs, smp_processor_id()))
+ #endif
+- unknown_nmi_error(reason, regs);
++ unknown_nmi_error(reason, regs);
++
+ return;
+ }
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+@@ -712,14 +795,7 @@ static void default_do_nmi(struct pt_reg
+ reassert_nmi();
+ }
+
+-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
+-{
+- return 0;
+-}
+-
+-static nmi_callback_t nmi_callback = dummy_nmi_callback;
+-
+-fastcall void do_nmi(struct pt_regs * regs, long error_code)
++fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
+ {
+ int cpu;
+
+@@ -729,25 +805,11 @@ fastcall void do_nmi(struct pt_regs * re
+
+ ++nmi_count(cpu);
+
+- if (!rcu_dereference(nmi_callback)(regs, cpu))
+- default_do_nmi(regs);
++ default_do_nmi(regs);
+
+ nmi_exit();
+ }
+
+-void set_nmi_callback(nmi_callback_t callback)
+-{
+- vmalloc_sync_all();
+- rcu_assign_pointer(nmi_callback, callback);
+-}
+-EXPORT_SYMBOL_GPL(set_nmi_callback);
+-
+-void unset_nmi_callback(void)
+-{
+- nmi_callback = dummy_nmi_callback;
+-}
+-EXPORT_SYMBOL_GPL(unset_nmi_callback);
+-
+ #ifdef CONFIG_KPROBES
+ fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
+ {
+Index: 10.3-2007-11-26/arch/i386/mach-xen/setup.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mach-xen/setup.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/mach-xen/setup.c 2007-10-22 13:53:08.000000000 +0200
+@@ -133,8 +133,10 @@ void __init machine_specific_arch_setup(
+ }
+ #endif
+
+- if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
+- set_fixaddr_top(pp.virt_start);
++ if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
++ hypervisor_virt_start = pp.virt_start;
++ reserve_top_address(0UL - pp.virt_start);
++ }
+
+ machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
+ machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+Index: 10.3-2007-11-26/arch/i386/mm/fault-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mm/fault-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/mm/fault-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -27,21 +27,24 @@
+ #include <asm/uaccess.h>
+ #include <asm/desc.h>
+ #include <asm/kdebug.h>
++#include <asm/segment.h>
+
+ extern void die(const char *,struct pt_regs *,long);
+
+-#ifdef CONFIG_KPROBES
+-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
++static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
++
+ int register_page_fault_notifier(struct notifier_block *nb)
+ {
+ vmalloc_sync_all();
+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+ }
++EXPORT_SYMBOL_GPL(register_page_fault_notifier);
+
+ int unregister_page_fault_notifier(struct notifier_block *nb)
+ {
+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+ }
++EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
+
+ static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+@@ -55,14 +58,6 @@ static inline int notify_page_fault(enum
+ };
+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+ }
+-#else
+-static inline int notify_page_fault(enum die_val val, const char *str,
+- struct pt_regs *regs, long err, int trap, int sig)
+-{
+- return NOTIFY_DONE;
+-}
+-#endif
+-
+
+ /*
+ * Unlock any spinlocks which will prevent us from getting the
+@@ -119,10 +114,10 @@ static inline unsigned long get_segment_
+ }
+
+ /* The standard kernel/user address space limit. */
+- *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
++ *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
+
+ /* By far the most common cases. */
+- if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
++ if (likely(SEGMENT_IS_FLAT_CODE(seg)))
+ return eip;
+
+ /* Check the segment exists, is within the current LDT/GDT size,
+@@ -559,11 +554,7 @@ good_area:
+ write = 0;
+ switch (error_code & 3) {
+ default: /* 3: write, present */
+-#ifdef TEST_VERIFY_AREA
+- if (regs->cs == GET_KERNEL_CS())
+- printk("WP fault at %08lx\n", regs->eip);
+-#endif
+- /* fall through */
++ /* fall through */
+ case 2: /* write, not present */
+ if (!(vma->vm_flags & VM_WRITE))
+ goto bad_area;
+@@ -572,7 +563,7 @@ good_area:
+ case 1: /* read, present */
+ goto bad_area;
+ case 0: /* read, not present */
+- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
++ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+ goto bad_area;
+ }
+
+@@ -704,7 +695,7 @@ no_context:
+ */
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+- if (tsk->pid == 1) {
++ if (is_init(tsk)) {
+ yield();
+ down_read(&mm->mmap_sem);
+ goto survive;
+Index: 10.3-2007-11-26/arch/i386/mm/highmem-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mm/highmem-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/mm/highmem-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -38,11 +38,9 @@ static void *__kmap_atomic(struct page *
+
+ idx = type + KM_TYPE_NR*smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+-#ifdef CONFIG_DEBUG_HIGHMEM
+ if (!pte_none(*(kmap_pte-idx)))
+ BUG();
+-#endif
+- set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
++ set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
+
+ return (void*) vaddr;
+ }
+@@ -62,36 +60,26 @@ void *kmap_atomic_pte(struct page *page,
+
+ void kunmap_atomic(void *kvaddr, enum km_type type)
+ {
+-#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN)
+ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+ enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+
+- if (vaddr < FIXADDR_START) { // FIXME
++#ifdef CONFIG_DEBUG_HIGHMEM
++ if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
+ dec_preempt_count();
+ preempt_check_resched();
+ return;
+ }
+-#endif
+
+-#if defined(CONFIG_DEBUG_HIGHMEM)
+ if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
+ BUG();
+-
+- /*
+- * force other mappings to Oops if they'll try to access
+- * this pte without first remap it
+- */
+- pte_clear(&init_mm, vaddr, kmap_pte-idx);
+- __flush_tlb_one(vaddr);
+-#elif defined(CONFIG_XEN)
++#endif
+ /*
+- * We must ensure there are no dangling pagetable references when
+- * returning memory to Xen (decrease_reservation).
+- * XXX TODO: We could make this faster by only zapping when
+- * kmap_flush_unused is called but that is trickier and more invasive.
++ * Force other mappings to Oops if they'll try to access this pte
++ * without first remap it. Keeping stale mappings around is a bad idea
++ * also, in case the page changes cacheability attributes or becomes
++ * a protected page in a hypervisor.
+ */
+- pte_clear(&init_mm, vaddr, kmap_pte-idx);
+-#endif
++ kpte_clear_flush(kmap_pte-idx, vaddr);
+
+ dec_preempt_count();
+ preempt_check_resched();
+@@ -110,7 +98,6 @@ void *kmap_atomic_pfn(unsigned long pfn,
+ idx = type + KM_TYPE_NR*smp_processor_id();
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
+- __flush_tlb_one(vaddr);
+
+ return (void*) vaddr;
+ }
+Index: 10.3-2007-11-26/arch/i386/mm/init-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mm/init-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/mm/init-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -467,16 +467,22 @@ EXPORT_SYMBOL(__supported_pte_mask);
+ * on Enable
+ * off Disable
+ */
+-void __init noexec_setup(const char *str)
++static int __init noexec_setup(char *str)
+ {
+- if (!strncmp(str, "on",2) && cpu_has_nx) {
+- __supported_pte_mask |= _PAGE_NX;
+- disable_nx = 0;
+- } else if (!strncmp(str,"off",3)) {
++ if (!str || !strcmp(str, "on")) {
++ if (cpu_has_nx) {
++ __supported_pte_mask |= _PAGE_NX;
++ disable_nx = 0;
++ }
++ } else if (!strcmp(str,"off")) {
+ disable_nx = 1;
+ __supported_pte_mask &= ~_PAGE_NX;
+- }
++ } else
++ return -EINVAL;
++
++ return 0;
+ }
++early_param("noexec", noexec_setup);
+
+ int nx_enabled = 0;
+ #ifdef CONFIG_X86_PAE
+@@ -519,6 +525,7 @@ int __init set_kernel_exec(unsigned long
+ pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
+ else
+ pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
++ pte_update_defer(&init_mm, vaddr, pte);
+ __flush_tlb_all();
+ out:
+ return ret;
+@@ -601,18 +608,6 @@ static void __init test_wp_bit(void)
+ }
+ }
+
+-static void __init set_max_mapnr_init(void)
+-{
+-#ifdef CONFIG_HIGHMEM
+- num_physpages = highend_pfn;
+-#else
+- num_physpages = max_low_pfn;
+-#endif
+-#ifdef CONFIG_FLATMEM
+- max_mapnr = num_physpages;
+-#endif
+-}
+-
+ static struct kcore_list kcore_mem, kcore_vmalloc;
+
+ void __init mem_init(void)
+@@ -633,8 +628,7 @@ void __init mem_init(void)
+ #endif
+
+ #ifdef CONFIG_FLATMEM
+- if (!mem_map)
+- BUG();
++ BUG_ON(!mem_map);
+ #endif
+
+ bad_ppro = ppro_with_ram_bug();
+@@ -649,13 +643,6 @@ void __init mem_init(void)
+ }
+ #endif
+
+- set_max_mapnr_init();
+-
+-#ifdef CONFIG_HIGHMEM
+- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
+-#else
+- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
+-#endif
+ printk("vmalloc area: %lx-%lx, maxmem %lx\n",
+ VMALLOC_START,VMALLOC_END,MAXMEM);
+ BUG_ON(VMALLOC_START > VMALLOC_END);
+@@ -697,6 +684,48 @@ void __init mem_init(void)
+ (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
+ );
+
++#if 1 /* double-sanity-check paranoia */
++ printk("virtual kernel memory layout:\n"
++ " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
++#ifdef CONFIG_HIGHMEM
++ " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
++#endif
++ " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
++ " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
++ " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
++ " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
++ " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
++ FIXADDR_START, FIXADDR_TOP,
++ (FIXADDR_TOP - FIXADDR_START) >> 10,
++
++#ifdef CONFIG_HIGHMEM
++ PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
++ (LAST_PKMAP*PAGE_SIZE) >> 10,
++#endif
++
++ VMALLOC_START, VMALLOC_END,
++ (VMALLOC_END - VMALLOC_START) >> 20,
++
++ (unsigned long)__va(0), (unsigned long)high_memory,
++ ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
++
++ (unsigned long)&__init_begin, (unsigned long)&__init_end,
++ ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
++
++ (unsigned long)&_etext, (unsigned long)&_edata,
++ ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
++
++ (unsigned long)&_text, (unsigned long)&_etext,
++ ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
++
++#ifdef CONFIG_HIGHMEM
++ BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
++ BUG_ON(VMALLOC_END > PKMAP_BASE);
++#endif
++ BUG_ON(VMALLOC_START > VMALLOC_END);
++ BUG_ON((unsigned long)high_memory > VMALLOC_START);
++#endif /* double-sanity-check paranoia */
++
+ #ifdef CONFIG_X86_PAE
+ if (!cpu_has_pae)
+ panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
+@@ -727,7 +756,7 @@ void __init mem_init(void)
+ int arch_add_memory(int nid, u64 start, u64 size)
+ {
+ struct pglist_data *pgdata = &contig_page_data;
+- struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
++ struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+
+Index: 10.3-2007-11-26/arch/i386/mm/ioremap-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mm/ioremap-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/mm/ioremap-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -12,7 +12,7 @@
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ #include <linux/module.h>
+-#include <asm/io.h>
++#include <linux/io.h>
+ #include <asm/fixmap.h>
+ #include <asm/cacheflush.h>
+ #include <asm/tlbflush.h>
+@@ -118,7 +118,7 @@ int direct_remap_pfn_range(struct vm_are
+ if (domid == DOMID_SELF)
+ return -EINVAL;
+
+- vma->vm_flags |= VM_IO | VM_RESERVED;
++ vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+
+ vma->vm_mm->context.has_foreign_mappings = 1;
+
+@@ -203,6 +203,7 @@ void __iomem * __ioremap(unsigned long p
+ void __iomem * addr;
+ struct vm_struct * area;
+ unsigned long offset, last_addr;
++ pgprot_t prot;
+ domid_t domid = DOMID_IO;
+
+ /* Don't allow wraparound or zero size */
+@@ -234,6 +235,8 @@ void __iomem * __ioremap(unsigned long p
+ domid = DOMID_SELF;
+ }
+
++ prot = __pgprot(_KERNPG_TABLE | flags);
++
+ /*
+ * Mappings have to be page-aligned
+ */
+@@ -249,10 +252,9 @@ void __iomem * __ioremap(unsigned long p
+ return NULL;
+ area->phys_addr = phys_addr;
+ addr = (void __iomem *) area->addr;
+- flags |= _KERNPG_TABLE;
+ if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
+ phys_addr>>PAGE_SHIFT,
+- size, __pgprot(flags), domid)) {
++ size, prot, domid)) {
+ vunmap((void __force *) addr);
+ return NULL;
+ }
+Index: 10.3-2007-11-26/arch/i386/mm/pgtable-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/mm/pgtable-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/mm/pgtable-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -68,7 +68,9 @@ void show_mem(void)
+ printk(KERN_INFO "%lu pages writeback\n",
+ global_page_state(NR_WRITEBACK));
+ printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
+- printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
++ printk(KERN_INFO "%lu pages slab\n",
++ global_page_state(NR_SLAB_RECLAIMABLE) +
++ global_page_state(NR_SLAB_UNRECLAIMABLE));
+ printk(KERN_INFO "%lu pages pagetables\n",
+ global_page_state(NR_PAGETABLE));
+ }
+@@ -189,18 +191,11 @@ void set_pmd_pfn(unsigned long vaddr, un
+ __flush_tlb_one(vaddr);
+ }
+
+-static int nr_fixmaps = 0;
++static int fixmaps = 0;
+ unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
+-unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
++unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
+ EXPORT_SYMBOL(__FIXADDR_TOP);
+
+-void __init set_fixaddr_top(unsigned long top)
+-{
+- BUG_ON(nr_fixmaps > 0);
+- hypervisor_virt_start = top;
+- __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
+-}
+-
+ void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
+ {
+ unsigned long address = __fix_to_virt(idx);
+@@ -221,7 +216,21 @@ void __set_fixmap (enum fixed_addresses
+ set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags);
+ break;
+ }
+- nr_fixmaps++;
++ fixmaps++;
++}
++
++/**
++ * reserve_top_address - reserves a hole in the top of kernel address space
++ * @reserve - size of hole to reserve
++ *
++ * Can be used to relocate the fixmap area and poke a hole in the top
++ * of kernel address space to make room for a hypervisor.
++ */
++void __init reserve_top_address(unsigned long reserve)
++{
++ BUG_ON(fixmaps > 0);
++ __FIXADDR_TOP = -reserve - PAGE_SIZE;
++ __VMALLOC_RESERVE += reserve;
+ }
+
+ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+Index: 10.3-2007-11-26/arch/i386/pci/irq-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/i386/pci/irq-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/i386/pci/irq-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -985,10 +985,6 @@ static void __init pcibios_fixup_irqs(vo
+ pci_name(bridge), 'A' + pin, irq);
+ }
+ if (irq >= 0) {
+- if (use_pci_vector() &&
+- !platform_legacy_irq(irq))
+- irq = IO_APIC_VECTOR(irq);
+-
+ printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+ pci_name(dev), 'A' + pin, irq);
+ dev->irq = irq;
+@@ -1149,10 +1145,6 @@ static int pirq_enable_irq(struct pci_de
+ }
+ dev = temp_dev;
+ if (irq >= 0) {
+-#ifdef CONFIG_PCI_MSI
+- if (!platform_legacy_irq(irq))
+- irq = IO_APIC_VECTOR(irq);
+-#endif
+ printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
+ pci_name(dev), 'A' + pin, irq);
+ dev->irq = irq;
+@@ -1173,33 +1165,3 @@ static int pirq_enable_irq(struct pci_de
+ }
+ return 0;
+ }
+-
+-int pci_vector_resources(int last, int nr_released)
+-{
+- int count = nr_released;
+-
+- int next = last;
+- int offset = (last % 8);
+-
+- while (next < FIRST_SYSTEM_VECTOR) {
+- next += 8;
+-#ifdef CONFIG_X86_64
+- if (next == IA32_SYSCALL_VECTOR)
+- continue;
+-#else
+- if (next == SYSCALL_VECTOR)
+- continue;
+-#endif
+- count++;
+- if (next >= FIRST_SYSTEM_VECTOR) {
+- if (offset%8) {
+- next = FIRST_DEVICE_VECTOR + offset;
+- offset++;
+- continue;
+- }
+- count--;
+- }
+- }
+-
+- return count;
+-}
+Index: 10.3-2007-11-26/arch/x86_64/ia32/ia32entry-xen.S
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/ia32/ia32entry-xen.S 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/ia32/ia32entry-xen.S 2007-10-22 13:53:08.000000000 +0200
+@@ -88,6 +88,7 @@
+ */
+ ENTRY(ia32_sysenter_target)
+ CFI_STARTPROC32 simple
++ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA rsp,0
+ CFI_REGISTER rsp,rbp
+ __swapgs
+@@ -206,6 +207,7 @@ ENDPROC(ia32_sysenter_target)
+ */
+ ENTRY(ia32_cstar_target)
+ CFI_STARTPROC32 simple
++ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA rsp,PDA_STACKOFFSET
+ CFI_REGISTER rip,rcx
+ /*CFI_REGISTER rflags,r11*/
+@@ -315,6 +317,7 @@ ia32_badarg:
+
+ ENTRY(ia32_syscall)
+ CFI_STARTPROC simple
++ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA rsp,SS+8-RIP
+ /*CFI_REL_OFFSET ss,SS-RIP*/
+ CFI_REL_OFFSET rsp,RSP-RIP
+@@ -397,6 +400,7 @@ ENTRY(ia32_ptregs_common)
+ popq %r11
+ CFI_ENDPROC
+ CFI_STARTPROC32 simple
++ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA rsp,SS+8-ARGOFFSET
+ CFI_REL_OFFSET rax,RAX-ARGOFFSET
+ CFI_REL_OFFSET rcx,RCX-ARGOFFSET
+@@ -730,8 +734,8 @@ ia32_sys_call_table:
+ .quad sys_readlinkat /* 305 */
+ .quad sys_fchmodat
+ .quad sys_faccessat
+- .quad quiet_ni_syscall /* pselect6 for now */
+- .quad quiet_ni_syscall /* ppoll for now */
++ .quad compat_sys_pselect6
++ .quad compat_sys_ppoll
+ .quad sys_unshare /* 310 */
+ .quad compat_sys_set_robust_list
+ .quad compat_sys_get_robust_list
+@@ -740,4 +744,5 @@ ia32_sys_call_table:
+ .quad sys_tee
+ .quad compat_sys_vmsplice
+ .quad compat_sys_move_pages
++ .quad sys_getcpu
+ ia32_syscall_end:
+Index: 10.3-2007-11-26/arch/x86_64/kernel/Makefile
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/Makefile 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/Makefile 2007-10-22 13:53:08.000000000 +0200
+@@ -4,7 +4,7 @@
+
+ extra-y := head.o head64.o init_task.o vmlinux.lds
+ EXTRA_AFLAGS := -traditional
+-obj-y := process.o signal.o entry.o traps.o irq.o \
++obj-y := process.o signal.o entry.o traps.o irq.o nmi.o \
+ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
+ x8664_ksyms.o i387.o syscall.o vsyscall.o \
+ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \
+@@ -21,9 +21,9 @@ obj-$(CONFIG_X86_MSR) += msr.o
+ obj-$(CONFIG_MICROCODE) += microcode.o
+ obj-$(CONFIG_X86_CPUID) += cpuid.o
+ obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o
+-obj-y += apic.o nmi.o
++obj-$(CONFIG_X86_LOCAL_APIC) += apic.o
+ obj-$(CONFIG_X86_XEN_GENAPIC) += genapic.o genapic_xen.o
+-obj-y += io_apic.o mpparse.o genapic.o genapic_flat.o
++obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o genapic.o genapic_flat.o
+ obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
+ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+ obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o
+@@ -68,7 +68,7 @@ pci-dma-y += ../../i386/kernel/pci-dma
+ microcode-$(subst m,y,$(CONFIG_MICROCODE)) := ../../i386/kernel/microcode-xen.o
+ quirks-y := ../../i386/kernel/quirks-xen.o
+
+-n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o
++n-obj-xen := early-quirks.o i8259.o reboot.o i8237.o smpboot.o trampoline.o
+
+ include $(srctree)/scripts/Makefile.xen
+
+Index: 10.3-2007-11-26/arch/x86_64/kernel/acpi/Makefile
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/acpi/Makefile 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/acpi/Makefile 2007-10-22 13:53:08.000000000 +0200
+@@ -8,3 +8,4 @@ processor-y := ../../../i386/kernel/acp
+ endif
+
+ boot-$(CONFIG_XEN) := ../../../i386/kernel/acpi/boot-xen.o
++processor-$(CONFIG_XEN) := ../../../i386/kernel/acpi/processor.o
+Index: 10.3-2007-11-26/arch/x86_64/kernel/apic-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/apic-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/apic-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -43,7 +43,7 @@ int apic_verbosity;
+ */
+ void ack_bad_irq(unsigned int irq)
+ {
+- printk("unexpected IRQ trap at vector %02x\n", irq);
++ printk("unexpected IRQ trap at irq %02x\n", irq);
+ /*
+ * Currently unexpected vectors happen only on SMP and APIC.
+ * We _must_ ack these because every local APIC has only N
+@@ -62,19 +62,19 @@ int setup_profiling_timer(unsigned int m
+ return -EINVAL;
+ }
+
+-void smp_local_timer_interrupt(struct pt_regs *regs)
++void smp_local_timer_interrupt(void)
+ {
+- profile_tick(CPU_PROFILING, regs);
++ profile_tick(CPU_PROFILING);
+ #ifndef CONFIG_XEN
+ #ifdef CONFIG_SMP
+- update_process_times(user_mode(regs));
++ update_process_times(user_mode(get_irq_regs()));
+ #endif
+ #endif
+ /*
+ * We take the 'long' return path, and there every subsystem
+ * grabs the appropriate locks (kernel lock/ irq lock).
+ *
+- * we might want to decouple profiling from the 'long path',
++ * We might want to decouple profiling from the 'long path',
+ * and do the profiling totally in assembly.
+ *
+ * Currently this isn't too much of an issue (performance wise),
+@@ -92,6 +92,8 @@ void smp_local_timer_interrupt(struct pt
+ */
+ void smp_apic_timer_interrupt(struct pt_regs *regs)
+ {
++ struct pt_regs *old_regs = set_irq_regs(regs);
++
+ /*
+ * the NMI deadlock-detector uses this.
+ */
+@@ -109,8 +111,9 @@ void smp_apic_timer_interrupt(struct pt_
+ */
+ exit_idle();
+ irq_enter();
+- smp_local_timer_interrupt(regs);
++ smp_local_timer_interrupt();
+ irq_exit();
++ set_irq_regs(old_regs);
+ }
+
+ /*
+@@ -188,9 +191,8 @@ int disable_apic;
+ int __init APIC_init_uniprocessor (void)
+ {
+ #ifdef CONFIG_X86_IO_APIC
+- if (smp_found_config)
+- if (!skip_ioapic_setup && nr_ioapics)
+- setup_IO_APIC();
++ if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
++ setup_IO_APIC();
+ #endif
+
+ return 1;
+Index: 10.3-2007-11-26/arch/x86_64/kernel/e820-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/e820-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/e820-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -16,6 +16,7 @@
+ #include <linux/string.h>
+ #include <linux/kexec.h>
+ #include <linux/module.h>
++#include <linux/mm.h>
+
+ #include <asm/pgtable.h>
+ #include <asm/page.h>
+@@ -25,6 +26,11 @@
+ #include <asm/sections.h>
+ #include <xen/interface/memory.h>
+
++struct e820map e820 __initdata;
++#ifdef CONFIG_XEN
++struct e820map machine_e820 __initdata;
++#endif
++
+ /*
+ * PFN of last memory page.
+ */
+@@ -41,7 +47,7 @@ unsigned long end_pfn_map;
+ /*
+ * Last pfn which the user wants to use.
+ */
+-unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
++static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
+
+ extern struct resource code_resource, data_resource;
+
+@@ -53,13 +59,13 @@ static inline int bad_addr(unsigned long
+ #ifndef CONFIG_XEN
+ /* various gunk below that needed for SMP startup */
+ if (addr < 0x8000) {
+- *addrp = 0x8000;
++ *addrp = PAGE_ALIGN(0x8000);
+ return 1;
+ }
+
+ /* direct mapping tables of the kernel */
+ if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
+- *addrp = table_end << PAGE_SHIFT;
++ *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
+ return 1;
+ }
+
+@@ -67,23 +73,18 @@ static inline int bad_addr(unsigned long
+ #ifdef CONFIG_BLK_DEV_INITRD
+ if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
+ addr < INITRD_START+INITRD_SIZE) {
+- *addrp = INITRD_START + INITRD_SIZE;
++ *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
+ return 1;
+ }
+ #endif
+- /* kernel code + 640k memory hole (later should not be needed, but
+- be paranoid for now) */
+- if (last >= 640*1024 && addr < 1024*1024) {
+- *addrp = 1024*1024;
+- return 1;
+- }
+- if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
+- *addrp = __pa_symbol(&_end);
++ /* kernel code */
++ if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
++ *addrp = PAGE_ALIGN(__pa_symbol(&_end));
+ return 1;
+ }
+
+ if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
+- *addrp = ebda_addr + ebda_size;
++ *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
+ return 1;
+ }
+
+@@ -132,8 +133,6 @@ int __init e820_all_mapped(unsigned long
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ #else
+- extern struct e820map machine_e820;
+-
+ if (!is_initial_xendomain())
+ return 0;
+ for (i = 0; i < machine_e820.nr_map; i++) {
+@@ -175,7 +174,7 @@ unsigned long __init find_e820_area(unsi
+ continue;
+ while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
+ ;
+- last = addr + size;
++ last = PAGE_ALIGN(addr) + size;
+ if (last > ei->addr + ei->size)
+ continue;
+ if (last > end)
+@@ -185,59 +184,14 @@ unsigned long __init find_e820_area(unsi
+ return -1UL;
+ }
+
+-/*
+- * Free bootmem based on the e820 table for a node.
+- */
+-void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
+-{
+- int i;
+- for (i = 0; i < e820.nr_map; i++) {
+- struct e820entry *ei = &e820.map[i];
+- unsigned long last, addr;
+-
+- if (ei->type != E820_RAM ||
+- ei->addr+ei->size <= start ||
+- ei->addr >= end)
+- continue;
+-
+- addr = round_up(ei->addr, PAGE_SIZE);
+- if (addr < start)
+- addr = start;
+-
+- last = round_down(ei->addr + ei->size, PAGE_SIZE);
+- if (last >= end)
+- last = end;
+-
+- if (last > addr && last-addr >= PAGE_SIZE)
+- free_bootmem_node(pgdat, addr, last-addr);
+- }
+-}
+-
+ /*
+ * Find the highest page frame number we have available
+ */
+ unsigned long __init e820_end_of_ram(void)
+ {
+- int i;
+ unsigned long end_pfn = 0;
++ end_pfn = find_max_pfn_with_active_regions();
+
+- for (i = 0; i < e820.nr_map; i++) {
+- struct e820entry *ei = &e820.map[i];
+- unsigned long start, end;
+-
+- start = round_up(ei->addr, PAGE_SIZE);
+- end = round_down(ei->addr + ei->size, PAGE_SIZE);
+- if (start >= end)
+- continue;
+- if (ei->type == E820_RAM) {
+- if (end > end_pfn<<PAGE_SHIFT)
+- end_pfn = end>>PAGE_SHIFT;
+- } else {
+- if (end > end_pfn_map<<PAGE_SHIFT)
+- end_pfn_map = end>>PAGE_SHIFT;
+- }
+- }
+-
+ if (end_pfn > end_pfn_map)
+ end_pfn_map = end_pfn;
+ if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
+@@ -247,43 +201,10 @@ unsigned long __init e820_end_of_ram(voi
+ if (end_pfn > end_pfn_map)
+ end_pfn = end_pfn_map;
+
++ printk("end_pfn_map = %lu\n", end_pfn_map);
+ return end_pfn;
+ }
+
+-/*
+- * Compute how much memory is missing in a range.
+- * Unlike the other functions in this file the arguments are in page numbers.
+- */
+-unsigned long __init
+-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
+-{
+- unsigned long ram = 0;
+- unsigned long start = start_pfn << PAGE_SHIFT;
+- unsigned long end = end_pfn << PAGE_SHIFT;
+- int i;
+- for (i = 0; i < e820.nr_map; i++) {
+- struct e820entry *ei = &e820.map[i];
+- unsigned long last, addr;
+-
+- if (ei->type != E820_RAM ||
+- ei->addr+ei->size <= start ||
+- ei->addr >= end)
+- continue;
+-
+- addr = round_up(ei->addr, PAGE_SIZE);
+- if (addr < start)
+- addr = start;
+-
+- last = round_down(ei->addr + ei->size, PAGE_SIZE);
+- if (last >= end)
+- last = end;
+-
+- if (last > addr)
+- ram += last - addr;
+- }
+- return ((end - start) - ram) >> PAGE_SHIFT;
+-}
+-
+ /*
+ * Mark e820 reserved areas as busy for the resource manager.
+ */
+@@ -324,6 +245,98 @@ void __init e820_reserve_resources(struc
+ }
+ }
+
++#ifndef CONFIG_XEN
++/* Mark pages corresponding to given address range as nosave */
++static void __init
++e820_mark_nosave_range(unsigned long start, unsigned long end)
++{
++ unsigned long pfn, max_pfn;
++
++ if (start >= end)
++ return;
++
++ printk("Nosave address range: %016lx - %016lx\n", start, end);
++ max_pfn = end >> PAGE_SHIFT;
++ for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
++ if (pfn_valid(pfn))
++ SetPageNosave(pfn_to_page(pfn));
++}
++
++/*
++ * Find the ranges of physical addresses that do not correspond to
++ * e820 RAM areas and mark the corresponding pages as nosave for software
++ * suspend and suspend to RAM.
++ *
++ * This function requires the e820 map to be sorted and without any
++ * overlapping entries and assumes the first e820 area to be RAM.
++ */
++void __init e820_mark_nosave_regions(void)
++{
++ int i;
++ unsigned long paddr;
++
++ paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
++ for (i = 1; i < e820.nr_map; i++) {
++ struct e820entry *ei = &e820.map[i];
++
++ if (paddr < ei->addr)
++ e820_mark_nosave_range(paddr,
++ round_up(ei->addr, PAGE_SIZE));
++
++ paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
++ if (ei->type != E820_RAM)
++ e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
++ paddr);
++
++ if (paddr >= (end_pfn << PAGE_SHIFT))
++ break;
++ }
++}
++#endif
++
++/* Walk the e820 map and register active regions within a node */
++void __init
++e820_register_active_regions(int nid, unsigned long start_pfn,
++ unsigned long end_pfn)
++{
++ int i;
++ unsigned long ei_startpfn, ei_endpfn;
++ for (i = 0; i < e820.nr_map; i++) {
++ struct e820entry *ei = &e820.map[i];
++ ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
++ ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
++ >> PAGE_SHIFT;
++
++ /* Skip map entries smaller than a page */
++ if (ei_startpfn >= ei_endpfn)
++ continue;
++
++ /* Check if end_pfn_map should be updated */
++ if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
++ end_pfn_map = ei_endpfn;
++
++ /* Skip if map is outside the node */
++ if (ei->type != E820_RAM ||
++ ei_endpfn <= start_pfn ||
++ ei_startpfn >= end_pfn)
++ continue;
++
++ /* Check for overlaps */
++ if (ei_startpfn < start_pfn)
++ ei_startpfn = start_pfn;
++ if (ei_endpfn > end_pfn)
++ ei_endpfn = end_pfn;
++
++ /* Obey end_user_pfn to save on memmap */
++ if (ei_startpfn >= end_user_pfn)
++ continue;
++ if (ei_endpfn > end_user_pfn)
++ ei_endpfn = end_user_pfn;
++
++ add_active_range(nid, ei_startpfn, ei_endpfn);
++ }
++}
++
+ /*
+ * Add a memory region to the kernel e820 map.
+ */
+@@ -544,13 +557,6 @@ static int __init sanitize_e820_map(stru
+ * If we're lucky and live on a modern system, the setup code
+ * will have given us a memory map that we can use to properly
+ * set up memory. If we aren't, we'll fake a memory map.
+- *
+- * We check to see that the memory map contains at least 2 elements
+- * before we'll use it, because the detection code in setup.S may
+- * not be perfect and most every PC known to man has two memory
+- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
+- * thinkpad 560x, for example, does not cooperate with the memory
+- * detection code.)
+ */
+ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+ {
+@@ -572,37 +578,20 @@ static int __init copy_e820_map(struct e
+ if (start > end)
+ return -1;
+
+-#ifndef CONFIG_XEN
+- /*
+- * Some BIOSes claim RAM in the 640k - 1M region.
+- * Not right. Fix it up.
+- *
+- * This should be removed on Hammer which is supposed to not
+- * have non e820 covered ISA mappings there, but I had some strange
+- * problems so it stays for now. -AK
+- */
+- if (type == E820_RAM) {
+- if (start < 0x100000ULL && end > 0xA0000ULL) {
+- if (start < 0xA0000ULL)
+- add_memory_region(start, 0xA0000ULL-start, type);
+- if (end <= 0x100000ULL)
+- continue;
+- start = 0x100000ULL;
+- size = end - start;
+- }
+- }
+-#endif
+-
+ add_memory_region(start, size, type);
+ } while (biosmap++,--nr_map);
+ return 0;
+ }
+
++void early_panic(char *msg)
++{
++ early_printk(msg);
++ panic(msg);
++}
++
+ #ifndef CONFIG_XEN
+ void __init setup_memory_region(void)
+ {
+- char *who = "BIOS-e820";
+-
+ /*
+ * Try to copy the BIOS-supplied E820-map.
+ *
+@@ -610,24 +599,10 @@ void __init setup_memory_region(void)
+ * the next section from 1mb->appropriate_mem_k
+ */
+ sanitize_e820_map(E820_MAP, &E820_MAP_NR);
+- if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
+- unsigned long mem_size;
+-
+- /* compare results from other methods and take the greater */
+- if (ALT_MEM_K < EXT_MEM_K) {
+- mem_size = EXT_MEM_K;
+- who = "BIOS-88";
+- } else {
+- mem_size = ALT_MEM_K;
+- who = "BIOS-e801";
+- }
+-
+- e820.nr_map = 0;
+- add_memory_region(0, LOWMEMSIZE(), E820_RAM);
+- add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
+- }
++ if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
++ early_panic("Cannot find a valid memory map");
+ printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+- e820_print_map(who);
++ e820_print_map("BIOS-e820");
+ }
+
+ #else /* CONFIG_XEN */
+@@ -659,20 +634,23 @@ void __init setup_memory_region(void)
+
+ sanitize_e820_map(map, (char *)&memmap.nr_entries);
+
+- BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
++ if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
++ early_panic("Cannot find a valid memory map");
+
+ printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+ e820_print_map("Xen");
+ }
+ #endif
+
+-void __init parse_memopt(char *p, char **from)
+-{
++static int __init parse_memopt(char *p)
++{
+ int i;
+ unsigned long current_end;
+ unsigned long end;
+
+- end_user_pfn = memparse(p, from);
++ if (!p)
++ return -EINVAL;
++ end_user_pfn = memparse(p, &p);
+ end_user_pfn >>= PAGE_SHIFT;
+
+ end = end_user_pfn<<PAGE_SHIFT;
+@@ -689,27 +667,61 @@ void __init parse_memopt(char *p, char *
+ else
+ add_memory_region(current_end, end - current_end, E820_RAM);
+ }
++
++ return 0;
+ }
++early_param("mem", parse_memopt);
++
++static int userdef __initdata;
+
+-void __init parse_memmapopt(char *p, char **from)
++static int __init parse_memmap_opt(char *p)
+ {
++ char *oldp;
+ unsigned long long start_at, mem_size;
+
+- mem_size = memparse(p, from);
+- p = *from;
++ if (!strcmp(p, "exactmap")) {
++#ifdef CONFIG_CRASH_DUMP
++ /* If we are doing a crash dump, we
++ * still need to know the real mem
++ * size before original memory map is
++ * reset.
++ */
++ e820_register_active_regions(0, 0, -1UL);
++ saved_max_pfn = e820_end_of_ram();
++ remove_all_active_ranges();
++#endif
++ end_pfn_map = 0;
++ e820.nr_map = 0;
++ userdef = 1;
++ return 0;
++ }
++
++ oldp = p;
++ mem_size = memparse(p, &p);
++ if (p == oldp)
++ return -EINVAL;
+ if (*p == '@') {
+- start_at = memparse(p+1, from);
++ start_at = memparse(p+1, &p);
+ add_memory_region(start_at, mem_size, E820_RAM);
+ } else if (*p == '#') {
+- start_at = memparse(p+1, from);
++ start_at = memparse(p+1, &p);
+ add_memory_region(start_at, mem_size, E820_ACPI);
+ } else if (*p == '$') {
+- start_at = memparse(p+1, from);
++ start_at = memparse(p+1, &p);
+ add_memory_region(start_at, mem_size, E820_RESERVED);
+ } else {
+ end_user_pfn = (mem_size >> PAGE_SHIFT);
+ }
+- p = *from;
++ return *p == '\0' ? 0 : -EINVAL;
++}
++early_param("memmap", parse_memmap_opt);
++
++void finish_e820_parsing(void)
++{
++ if (userdef) {
++ printk(KERN_INFO "user-defined physical RAM map:\n");
++ e820_print_map("user");
++ }
+ }
+
+ unsigned long pci_mem_start = 0xaeedbabe;
+Index: 10.3-2007-11-26/arch/x86_64/kernel/early_printk-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/early_printk-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/early_printk-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -244,20 +244,16 @@ void early_printk(const char *fmt, ...)
+
+ static int __initdata keep_early;
+
+-int __init setup_early_printk(char *opt)
++static int __init setup_early_printk(char *buf)
+ {
+- char *space;
+- char buf[256];
++ if (!buf)
++ return 0;
+
+ if (early_console_initialized)
+- return 1;
+-
+- strlcpy(buf,opt,sizeof(buf));
+- space = strchr(buf, ' ');
+- if (space)
+- *space = 0;
++ return 0;
++ early_console_initialized = 1;
+
+- if (strstr(buf,"keep"))
++ if (strstr(buf, "keep"))
+ keep_early = 1;
+
+ if (!strncmp(buf, "serial", 6)) {
+@@ -281,11 +277,12 @@ int __init setup_early_printk(char *opt)
+ early_console = &simnow_console;
+ keep_early = 1;
+ }
+- early_console_initialized = 1;
+ register_console(early_console);
+ return 0;
+ }
+
++early_param("earlyprintk", setup_early_printk);
++
+ void __init disable_early_printk(void)
+ {
+ if (!early_console_initialized || !early_console)
+@@ -299,4 +296,3 @@ void __init disable_early_printk(void)
+ }
+ }
+
+-__setup("earlyprintk=", setup_early_printk);
+Index: 10.3-2007-11-26/arch/x86_64/kernel/entry-xen.S
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/entry-xen.S 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/entry-xen.S 2007-10-22 13:53:08.000000000 +0200
+@@ -4,9 +4,6 @@
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
+ * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
+- *
+- * $Id$
+- *
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Asit Mallick <asit.k.mallick@intel.com>
+ * Modified for Xen
+@@ -26,15 +23,25 @@
+ * at the top of the kernel process stack.
+ * - partial stack frame: partially saved registers upto R11.
+ * - full stack frame: Like partial stack frame, but all register saved.
+- *
+- * TODO:
+- * - schedule it carefully for the final hardware.
++ *
++ * Some macro usage:
++ * - CFI macros are used to generate dwarf2 unwind information for better
++ * backtraces. They don't change any code.
++ * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
++ * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
++ * There are unfortunately lots of special cases where some registers
++ * not touched. The macro is a big mess that should be cleaned up.
++ * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
++ * Gives a full stack frame.
++ * - ENTRY/END Define functions in the symbol table.
++ * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
++ * frame that is otherwise undefined after a SYSCALL
++ * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
++ * - errorentry/paranoidentry/zeroentry - Define exception entry points.
+ */
+
+-#define ASSEMBLY 1
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+-#include <asm/smp.h>
+ #include <asm/cache.h>
+ #include <asm/errno.h>
+ #include <asm/dwarf2.h>
+@@ -119,6 +126,7 @@ NMI_MASK = 0x80000000
+ .macro CFI_DEFAULT_STACK start=1,adj=0
+ .if \start
+ CFI_STARTPROC simple
++ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET)
+ .else
+ CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET)
+@@ -180,6 +188,10 @@ NMI_MASK = 0x80000000
+ /* rdi: prev */
+ ENTRY(ret_from_fork)
+ CFI_DEFAULT_STACK
++ push kernel_eflags(%rip)
++ CFI_ADJUST_CFA_OFFSET 4
++ popf # reset kernel eflags
++ CFI_ADJUST_CFA_OFFSET -4
+ call schedule_tail
+ GET_THREAD_INFO(%rcx)
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
+@@ -205,6 +217,7 @@ END(ret_from_fork)
+ */
+ .macro _frame ref
+ CFI_STARTPROC simple
++ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA rsp,SS+8-\ref
+ /*CFI_REL_OFFSET ss,SS-\ref*/
+ CFI_REL_OFFSET rsp,RSP-\ref
+@@ -337,6 +350,8 @@ tracesys:
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ cmpq $__NR_syscall_max,%rax
++ movq $-ENOSYS,%rcx
++ cmova %rcx,%rax
+ ja 1f
+ movq %r10,%rcx /* fixup for C */
+ call *sys_call_table(,%rax,8)
+@@ -352,6 +367,7 @@ END(system_call)
+ */
+ ENTRY(int_ret_from_sys_call)
+ CFI_STARTPROC simple
++ CFI_SIGNAL_FRAME
+ CFI_DEF_CFA rsp,SS+8-ARGOFFSET
+ /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
+ CFI_REL_OFFSET rsp,RSP-ARGOFFSET
+@@ -586,8 +602,7 @@ retint_signal:
+ #ifdef CONFIG_PREEMPT
+ /* Returning to kernel space. Check if we need preemption */
+ /* rcx: threadinfo. interrupts off. */
+- .p2align
+-retint_kernel:
++ENTRY(retint_kernel)
+ cmpl $0,threadinfo_preempt_count(%rcx)
+ jnz retint_restore_args
+ bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
+@@ -647,7 +662,6 @@ ENTRY(call_function_interrupt)
+ END(call_function_interrupt)
+ #endif
+
+-#ifdef CONFIG_X86_LOCAL_APIC
+ ENTRY(apic_timer_interrupt)
+ apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
+ END(apic_timer_interrupt)
+@@ -659,7 +673,6 @@ END(error_interrupt)
+ ENTRY(spurious_interrupt)
+ apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
+ END(spurious_interrupt)
+-#endif
+ #endif /* !CONFIG_XEN */
+
+ /*
+@@ -758,7 +771,9 @@ paranoid_exit\trace:
+ testl $3,CS(%rsp)
+ jnz paranoid_userspace\trace
+ paranoid_swapgs\trace:
++ .if \trace
+ TRACE_IRQS_IRETQ 0
++ .endif
+ swapgs
+ paranoid_restore\trace:
+ RESTORE_ALL 8
+@@ -805,7 +820,7 @@ paranoid_schedule\trace:
+ * Exception entry point. This expects an error code/orig_rax on the stack
+ * and the exception handler in %rax.
+ */
+-ENTRY(error_entry)
++KPROBE_ENTRY(error_entry)
+ _frame RDI
+ CFI_REL_OFFSET rax,0
+ /* rdi slot contains rax, oldrax contains error code */
+@@ -899,7 +914,7 @@ error_kernelspace:
+ jmp error_sti
+ #endif
+ CFI_ENDPROC
+-END(error_entry)
++KPROBE_END(error_entry)
+
+ ENTRY(hypervisor_callback)
+ zeroentry do_hypervisor_callback
+@@ -939,26 +954,6 @@ ENTRY(do_hypervisor_callback) # do_hyp
+ CFI_ENDPROC
+ END(do_hypervisor_callback)
+
+-#ifdef CONFIG_X86_LOCAL_APIC
+-KPROBE_ENTRY(nmi)
+- zeroentry do_nmi_callback
+-ENTRY(do_nmi_callback)
+- CFI_STARTPROC
+- addq $8, %rsp
+- CFI_ENDPROC
+- CFI_DEFAULT_STACK
+- call do_nmi
+- orl $NMI_MASK,EFLAGS(%rsp)
+- RESTORE_REST
+- XEN_BLOCK_EVENTS(%rsi)
+- TRACE_IRQS_OFF
+- GET_THREAD_INFO(%rcx)
+- jmp retint_restore_args
+- CFI_ENDPROC
+- .previous .text
+-END(nmi)
+-#endif
+-
+ ALIGN
+ restore_all_enable_events:
+ CFI_DEFAULT_STACK adj=1
+@@ -1124,7 +1119,7 @@ ENDPROC(child_rip)
+ * do_sys_execve asm fallback arguments:
+ * rdi: name, rsi: argv, rdx: envp, fake frame on the stack
+ */
+-ENTRY(execve)
++ENTRY(kernel_execve)
+ CFI_STARTPROC
+ FAKE_STACK_FRAME $0
+ SAVE_ALL
+@@ -1138,12 +1133,11 @@ ENTRY(execve)
+ UNFAKE_STACK_FRAME
+ ret
+ CFI_ENDPROC
+-ENDPROC(execve)
++ENDPROC(kernel_execve)
+
+ KPROBE_ENTRY(page_fault)
+ errorentry do_page_fault
+-END(page_fault)
+- .previous .text
++KPROBE_END(page_fault)
+
+ ENTRY(coprocessor_error)
+ zeroentry do_coprocessor_error
+@@ -1165,25 +1159,25 @@ KPROBE_ENTRY(debug)
+ zeroentry do_debug
+ /* paranoidexit
+ CFI_ENDPROC */
+-END(debug)
+- .previous .text
++KPROBE_END(debug)
+
+-#if 0
+- /* runs on exception stack */
+ KPROBE_ENTRY(nmi)
+- INTR_FRAME
+- pushq $-1
+- CFI_ADJUST_CFA_OFFSET 8
+- paranoidentry do_nmi, 0, 0
+-#ifdef CONFIG_TRACE_IRQFLAGS
+- paranoidexit 0
+-#else
+- jmp paranoid_exit1
+- CFI_ENDPROC
+-#endif
+-END(nmi)
+- .previous .text
+-#endif
++ zeroentry do_nmi_callback
++KPROBE_END(nmi)
++do_nmi_callback:
++ CFI_STARTPROC
++ addq $8, %rsp
++ CFI_ENDPROC
++ CFI_DEFAULT_STACK
++ call do_nmi
++ orl $NMI_MASK,EFLAGS(%rsp)
++ RESTORE_REST
++ XEN_BLOCK_EVENTS(%rsi)
++ TRACE_IRQS_OFF
++ GET_THREAD_INFO(%rcx)
++ jmp retint_restore_args
++ CFI_ENDPROC
++END(do_nmi_callback)
+
+ KPROBE_ENTRY(int3)
+ /* INTR_FRAME
+@@ -1192,8 +1186,7 @@ KPROBE_ENTRY(int3)
+ zeroentry do_int3
+ /* jmp paranoid_exit1
+ CFI_ENDPROC */
+-END(int3)
+- .previous .text
++KPROBE_END(int3)
+
+ ENTRY(overflow)
+ zeroentry do_overflow
+@@ -1244,8 +1237,7 @@ END(stack_segment)
+
+ KPROBE_ENTRY(general_protection)
+ errorentry do_general_protection
+-END(general_protection)
+- .previous .text
++KPROBE_END(general_protection)
+
+ ENTRY(alignment_check)
+ errorentry do_alignment_check
+Index: 10.3-2007-11-26/arch/x86_64/kernel/genapic_xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/genapic_xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/genapic_xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -71,6 +71,13 @@ static cpumask_t xen_target_cpus(void)
+ return cpu_online_map;
+ }
+
++static cpumask_t xen_vector_allocation_domain(int cpu)
++{
++ cpumask_t domain = CPU_MASK_NONE;
++ cpu_set(cpu, domain);
++ return domain;
++}
++
+ /*
+ * Set up the logical destination ID.
+ * Do nothing, not called now.
+@@ -147,8 +154,8 @@ struct genapic apic_xen = {
+ .int_delivery_mode = dest_LowestPrio,
+ #endif
+ .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+- .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
+ .target_cpus = xen_target_cpus,
++ .vector_allocation_domain = xen_vector_allocation_domain,
+ #ifdef CONFIG_XEN_PRIVILEGED_GUEST
+ .apic_id_registered = xen_apic_id_registered,
+ #endif
+Index: 10.3-2007-11-26/arch/x86_64/kernel/head-xen.S
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/head-xen.S 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/head-xen.S 2007-10-22 13:53:08.000000000 +0200
+@@ -5,9 +5,6 @@
+ * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
+ * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
+- *
+- * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
+- *
+ * Jun Nakajima <jun.nakajima@intel.com>
+ * Modified for Xen
+ */
+@@ -138,7 +135,7 @@ ENTRY(cpu_gdt_table)
+ .quad 0,0 /* TSS */
+ .quad 0,0 /* LDT */
+ .quad 0,0,0 /* three TLS descriptors */
+- .quad 0 /* unused */
++ .quad 0x0000f40000000000 /* node/CPU stored in limit */
+ gdt_end:
+ /* asm/segment.h:GDT_ENTRIES must match this */
+ /* This should be a multiple of the cache line size */
+Index: 10.3-2007-11-26/arch/x86_64/kernel/head64-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/head64-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/head64-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -54,11 +54,9 @@ static void __init copy_bootdata(char *r
+ new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
+ if (!new_data) {
+ if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
+- printk("so old bootloader that it does not support commandline?!\n");
+ return;
+ }
+ new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
+- printk("old bootloader convention, maybe loadlin?\n");
+ }
+ command_line = (char *) ((u64)(new_data));
+ memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
+@@ -70,25 +68,6 @@ static void __init copy_bootdata(char *r
+ memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
+ saved_command_line[max_cmdline-1] = '\0';
+ #endif
+- printk("Bootdata ok (command line is %s)\n", saved_command_line);
+-}
+-
+-static void __init setup_boot_cpu_data(void)
+-{
+- unsigned int dummy, eax;
+-
+- /* get vendor info */
+- cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
+- (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
+- (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
+- (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
+-
+- /* get cpu type */
+- cpuid(1, &eax, &dummy, &dummy,
+- (unsigned int *) &boot_cpu_data.x86_capability);
+- boot_cpu_data.x86 = (eax >> 8) & 0xf;
+- boot_cpu_data.x86_model = (eax >> 4) & 0xf;
+- boot_cpu_data.x86_mask = eax & 0xf;
+ }
+
+ #include <xen/interface/memory.h>
+@@ -101,7 +80,6 @@ void __init x86_64_start_kernel(char * r
+ {
+ struct xen_machphys_mapping mapping;
+ unsigned long machine_to_phys_nr_ents;
+- char *s;
+ int i;
+
+ setup_xen_features();
+@@ -128,10 +106,7 @@ void __init x86_64_start_kernel(char * r
+ asm volatile("lidt %0" :: "m" (idt_descr));
+ #endif
+
+- /*
+- * This must be called really, really early:
+- */
+- lockdep_init();
++ early_printk("Kernel alive\n");
+
+ for (i = 0; i < NR_CPUS; i++)
+ cpu_pda(i) = &boot_cpu_pda[i];
+@@ -141,22 +116,5 @@ void __init x86_64_start_kernel(char * r
+ #ifdef CONFIG_SMP
+ cpu_set(0, cpu_online_map);
+ #endif
+- s = strstr(saved_command_line, "earlyprintk=");
+- if (s != NULL)
+- setup_early_printk(strchr(s, '=') + 1);
+-#ifdef CONFIG_NUMA
+- s = strstr(saved_command_line, "numa=");
+- if (s != NULL)
+- numa_setup(s+5);
+-#endif
+-#ifdef CONFIG_X86_IO_APIC
+- if (strstr(saved_command_line, "disableapic"))
+- disable_apic = 1;
+-#endif
+- /* You need early console to see that */
+- if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
+- panic("Kernel too big for kernel mapping\n");
+-
+- setup_boot_cpu_data();
+ start_kernel();
+ }
+Index: 10.3-2007-11-26/arch/x86_64/kernel/io_apic-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/io_apic-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/io_apic-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -26,9 +26,12 @@
+ #include <linux/delay.h>
+ #include <linux/sched.h>
+ #include <linux/smp_lock.h>
++#include <linux/pci.h>
+ #include <linux/mc146818rtc.h>
+ #include <linux/acpi.h>
+ #include <linux/sysdev.h>
++#include <linux/msi.h>
++#include <linux/htirq.h>
+ #ifdef CONFIG_ACPI
+ #include <acpi/acpi_bus.h>
+ #endif
+@@ -41,6 +44,10 @@
+ #include <asm/acpi.h>
+ #include <asm/dma.h>
+ #include <asm/nmi.h>
++#include <asm/msidef.h>
++#include <asm/hypertransport.h>
++
++static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
+
+ #define __apicdebuginit __init
+
+@@ -48,17 +55,29 @@ int sis_apic_bug; /* not actually suppor
+
+ static int no_timer_check;
+
+-int disable_timer_pin_1 __initdata;
++static int disable_timer_pin_1 __initdata;
+
+-#ifndef CONFIG_XEN
+-int timer_over_8254 __initdata = 0;
++#ifdef CONFIG_XEN
++#include <xen/interface/xen.h>
++#include <xen/interface/physdev.h>
++
++/* Fake i8259 */
++#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
++#define disable_8259A_irq(_irq) ((void)0)
++#define i8259A_irq_pending(_irq) (0)
++
++unsigned long io_apic_irqs;
++
++#define clear_IO_APIC() ((void)0)
++#else
++int timer_over_8254 __initdata = 1;
+
+ /* Where if anywhere is the i8259 connect in external int mode */
+ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+ #endif
+
+ static DEFINE_SPINLOCK(ioapic_lock);
+-static DEFINE_SPINLOCK(vector_lock);
++DEFINE_SPINLOCK(vector_lock);
+
+ /*
+ * # of IRQ routing registers
+@@ -83,28 +102,27 @@ static struct irq_pin_list {
+ short apic, pin, next;
+ } irq_2_pin[PIN_MAP_SIZE];
+
+-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
+-#ifdef CONFIG_PCI_MSI
+-#define vector_to_irq(vector) \
+- (platform_legacy_irq(vector) ? vector : vector_irq[vector])
+-#else
+-#define vector_to_irq(vector) (vector)
+-#endif
+-
+-#ifdef CONFIG_XEN
+-
+-#include <xen/interface/xen.h>
+-#include <xen/interface/physdev.h>
+-
+-/* Fake i8259 */
+-#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
+-#define disable_8259A_irq(_irq) ((void)0)
+-#define i8259A_irq_pending(_irq) (0)
++#ifndef CONFIG_XEN
++struct io_apic {
++ unsigned int index;
++ unsigned int unused[3];
++ unsigned int data;
++};
+
+-unsigned long io_apic_irqs;
++static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
++{
++ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
++ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
++}
++#endif
+
+-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
++static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+ {
++#ifndef CONFIG_XEN
++ struct io_apic __iomem *io_apic = io_apic_base(apic);
++ writel(reg, &io_apic->index);
++ return readl(&io_apic->data);
++#else
+ struct physdev_apic apic_op;
+ int ret;
+
+@@ -114,31 +132,131 @@ static inline unsigned int xen_io_apic_r
+ if (ret)
+ return ret;
+ return apic_op.value;
++#endif
+ }
+
+-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
++static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+ {
++#ifndef CONFIG_XEN
++ struct io_apic __iomem *io_apic = io_apic_base(apic);
++ writel(reg, &io_apic->index);
++ writel(value, &io_apic->data);
++#else
+ struct physdev_apic apic_op;
+
+ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
+ apic_op.reg = reg;
+ apic_op.value = value;
+ HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op);
++#endif
++}
++
++#ifndef CONFIG_XEN
++/*
++ * Re-write a value: to be used for read-modify-write
++ * cycles where the read already set up the index register.
++ */
++static inline void io_apic_modify(unsigned int apic, unsigned int value)
++{
++ struct io_apic __iomem *io_apic = io_apic_base(apic);
++ writel(value, &io_apic->data);
+ }
++#else
++#define io_apic_modify io_apic_write
++#endif
+
+-#define io_apic_read(a,r) xen_io_apic_read(a,r)
+-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
++/*
++ * Synchronize the IO-APIC and the CPU by doing
++ * a dummy read from the IO-APIC
++ */
++static inline void io_apic_sync(unsigned int apic)
++{
++#ifndef CONFIG_XEN
++ struct io_apic __iomem *io_apic = io_apic_base(apic);
++ readl(&io_apic->data);
++#endif
++}
+
+-#define clear_IO_APIC() ((void)0)
++union entry_union {
++ struct { u32 w1, w2; };
++ struct IO_APIC_route_entry entry;
++};
+
+-#else
++static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
++{
++ union entry_union eu;
++ unsigned long flags;
++ spin_lock_irqsave(&ioapic_lock, flags);
++ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
++ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
++ spin_unlock_irqrestore(&ioapic_lock, flags);
++ return eu.entry;
++}
++
++/*
++ * When we write a new IO APIC routing entry, we need to write the high
++ * word first! If the mask bit in the low word is clear, we will enable
++ * the interrupt, and we need to make sure the entry is fully populated
++ * before that happens.
++ */
++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
++{
++ unsigned long flags;
++ union entry_union eu;
++ eu.entry = e;
++ spin_lock_irqsave(&ioapic_lock, flags);
++ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++ spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++#ifndef CONFIG_XEN
++/*
++ * When we mask an IO APIC routing entry, we need to write the low
++ * word first, in order to set the mask bit before we change the
++ * high bits!
++ */
++static void ioapic_mask_entry(int apic, int pin)
++{
++ unsigned long flags;
++ union entry_union eu = { .entry.mask = 1 };
++
++ spin_lock_irqsave(&ioapic_lock, flags);
++ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++ spin_unlock_irqrestore(&ioapic_lock, flags);
++}
+
+ #ifdef CONFIG_SMP
++static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
++{
++ int apic, pin;
++ struct irq_pin_list *entry = irq_2_pin + irq;
++
++ BUG_ON(irq >= NR_IRQS);
++ for (;;) {
++ unsigned int reg;
++ apic = entry->apic;
++ pin = entry->pin;
++ if (pin == -1)
++ break;
++ io_apic_write(apic, 0x11 + pin*2, dest);
++ reg = io_apic_read(apic, 0x10 + pin*2);
++ reg &= ~0x000000ff;
++ reg |= vector;
++ io_apic_modify(apic, reg);
++ if (!entry->next)
++ break;
++ entry = irq_2_pin + entry->next;
++ }
++}
++
+ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+ {
+ unsigned long flags;
+ unsigned int dest;
+ cpumask_t tmp;
++ int vector;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+@@ -146,7 +264,11 @@ static void set_ioapic_affinity_irq(unsi
+
+ cpus_and(mask, tmp, CPU_MASK_ALL);
+
+- dest = cpu_mask_to_apicid(mask);
++ vector = assign_irq_vector(irq, mask, &tmp);
++ if (vector < 0)
++ return;
++
++ dest = cpu_mask_to_apicid(tmp);
+
+ /*
+ * Only the high 8 bits are valid.
+@@ -154,13 +276,12 @@ static void set_ioapic_affinity_irq(unsi
+ dest = SET_APIC_LOGICAL_ID(dest);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+- __DO_ACTION(1, = dest, )
+- set_irq_info(irq, mask);
++ __target_IO_APIC_irq(irq, dest, vector);
++ set_native_irq_info(irq, mask);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+ #endif
+-
+-#endif /* !CONFIG_XEN */
++#endif
+
+ /*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+@@ -240,24 +361,15 @@ static void unmask_IO_APIC_irq (unsigned
+ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+ {
+ struct IO_APIC_route_entry entry;
+- unsigned long flags;
+
+ /* Check delivery_mode to be sure we're not clearing an SMI pin */
+- spin_lock_irqsave(&ioapic_lock, flags);
+- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ entry = ioapic_read_entry(apic, pin);
+ if (entry.delivery_mode == dest_SMI)
+ return;
+ /*
+ * Disable it in the IO-APIC irq-routing table:
+ */
+- memset(&entry, 0, sizeof(entry));
+- entry.mask = 1;
+- spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
+- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ ioapic_mask_entry(apic, pin);
+ }
+
+ static void clear_IO_APIC (void)
+@@ -271,16 +383,6 @@ static void clear_IO_APIC (void)
+
+ #endif /* !CONFIG_XEN */
+
+-static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
+-
+-/*
+- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+- * specific CPU-side IRQs.
+- */
+-
+-#define MAX_PIRQS 8
+-static int pirq_entries [MAX_PIRQS];
+-static int pirqs_enabled;
+ int skip_ioapic_setup;
+ int ioapic_force;
+
+@@ -289,18 +391,17 @@ int ioapic_force;
+ static int __init disable_ioapic_setup(char *str)
+ {
+ skip_ioapic_setup = 1;
+- return 1;
++ return 0;
+ }
++early_param("noapic", disable_ioapic_setup);
+
+-static int __init enable_ioapic_setup(char *str)
++/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
++static int __init disable_timer_pin_setup(char *arg)
+ {
+- ioapic_force = 1;
+- skip_ioapic_setup = 0;
++ disable_timer_pin_1 = 1;
+ return 1;
+ }
+-
+-__setup("noapic", disable_ioapic_setup);
+-__setup("apic", enable_ioapic_setup);
++__setup("disable_timer_pin_1", disable_timer_pin_setup);
+
+ #ifndef CONFIG_XEN
+ static int __init setup_disable_8254_timer(char *s)
+@@ -318,137 +419,6 @@ __setup("disable_8254_timer", setup_disa
+ __setup("enable_8254_timer", setup_enable_8254_timer);
+ #endif /* !CONFIG_XEN */
+
+-#include <asm/pci-direct.h>
+-#include <linux/pci_ids.h>
+-#include <linux/pci.h>
+-
+-
+-#ifdef CONFIG_ACPI
+-
+-static int nvidia_hpet_detected __initdata;
+-
+-static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
+-{
+- nvidia_hpet_detected = 1;
+- return 0;
+-}
+-#endif
+-
+-/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
+- off. Check for an Nvidia or VIA PCI bridge and turn it off.
+- Use pci direct infrastructure because this runs before the PCI subsystem.
+-
+- Can be overwritten with "apic"
+-
+- And another hack to disable the IOMMU on VIA chipsets.
+-
+- ... and others. Really should move this somewhere else.
+-
+- Kludge-O-Rama. */
+-void __init check_ioapic(void)
+-{
+- int num,slot,func;
+- /* Poor man's PCI discovery */
+- for (num = 0; num < 32; num++) {
+- for (slot = 0; slot < 32; slot++) {
+- for (func = 0; func < 8; func++) {
+- u32 class;
+- u32 vendor;
+- u8 type;
+- class = read_pci_config(num,slot,func,
+- PCI_CLASS_REVISION);
+- if (class == 0xffffffff)
+- break;
+-
+- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
+- continue;
+-
+- vendor = read_pci_config(num, slot, func,
+- PCI_VENDOR_ID);
+- vendor &= 0xffff;
+- switch (vendor) {
+- case PCI_VENDOR_ID_VIA:
+-#ifdef CONFIG_IOMMU
+- if ((end_pfn > MAX_DMA32_PFN ||
+- force_iommu) &&
+- !iommu_aperture_allowed) {
+- printk(KERN_INFO
+- "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
+- iommu_aperture_disabled = 1;
+- }
+-#endif
+- return;
+- case PCI_VENDOR_ID_NVIDIA:
+-#ifdef CONFIG_ACPI
+- /*
+- * All timer overrides on Nvidia are
+- * wrong unless HPET is enabled.
+- */
+- nvidia_hpet_detected = 0;
+- acpi_table_parse(ACPI_HPET,
+- nvidia_hpet_check);
+- if (nvidia_hpet_detected == 0) {
+- acpi_skip_timer_override = 1;
+- printk(KERN_INFO "Nvidia board "
+- "detected. Ignoring ACPI "
+- "timer override.\n");
+- }
+-#endif
+- /* RED-PEN skip them on mptables too? */
+- return;
+- case PCI_VENDOR_ID_ATI:
+-
+- /* This should be actually default, but
+- for 2.6.16 let's do it for ATI only where
+- it's really needed. */
+-#ifndef CONFIG_XEN
+- if (timer_over_8254 == 1) {
+- timer_over_8254 = 0;
+- printk(KERN_INFO
+- "ATI board detected. Disabling timer routing over 8254.\n");
+- }
+-#endif
+- return;
+- }
+-
+-
+- /* No multi-function device? */
+- type = read_pci_config_byte(num,slot,func,
+- PCI_HEADER_TYPE);
+- if (!(type & 0x80))
+- break;
+- }
+- }
+- }
+-}
+-
+-static int __init ioapic_pirq_setup(char *str)
+-{
+- int i, max;
+- int ints[MAX_PIRQS+1];
+-
+- get_options(str, ARRAY_SIZE(ints), ints);
+-
+- for (i = 0; i < MAX_PIRQS; i++)
+- pirq_entries[i] = -1;
+-
+- pirqs_enabled = 1;
+- apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
+- max = MAX_PIRQS;
+- if (ints[0] < MAX_PIRQS)
+- max = ints[0];
+-
+- for (i = 0; i < max; i++) {
+- apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+- /*
+- * PIRQs are mapped upside down, usually.
+- */
+- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+- }
+- return 1;
+-}
+-
+-__setup("pirq=", ioapic_pirq_setup);
+
+ /*
+ * Find the IRQ entry number of a certain pin.
+@@ -478,9 +448,7 @@ static int __init find_isa_irq_pin(int i
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
++ if (test_bit(lbus, mp_bus_not_pci) &&
+ (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_srcbusirq == irq))
+
+@@ -496,9 +464,7 @@ static int __init find_isa_irq_apic(int
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
++ if (test_bit(lbus, mp_bus_not_pci) &&
+ (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_srcbusirq == irq))
+ break;
+@@ -539,7 +505,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
+ mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+ break;
+
+- if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
++ if (!test_bit(lbus, mp_bus_not_pci) &&
+ !mp_irqs[i].mpc_irqtype &&
+ (bus == lbus) &&
+ (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
+@@ -562,27 +528,6 @@ int IO_APIC_get_PCI_irq_vector(int bus,
+ return best_guess;
+ }
+
+-/*
+- * EISA Edge/Level control register, ELCR
+- */
+-static int EISA_ELCR(unsigned int irq)
+-{
+- if (irq < 16) {
+- unsigned int port = 0x4d0 + (irq >> 3);
+- return (inb(port) >> (irq & 7)) & 1;
+- }
+- apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
+- return 0;
+-}
+-
+-/* EISA interrupts are always polarity zero and can be edge or level
+- * trigger depending on the ELCR value. If an interrupt is listed as
+- * EISA conforming in the MP table, that means its trigger type must
+- * be read in from the ELCR */
+-
+-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+-#define default_EISA_polarity(idx) (0)
+-
+ /* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+@@ -595,12 +540,6 @@ static int EISA_ELCR(unsigned int irq)
+ #define default_PCI_trigger(idx) (1)
+ #define default_PCI_polarity(idx) (1)
+
+-/* MCA interrupts are always polarity zero level triggered,
+- * when listed as conforming in the MP table. */
+-
+-#define default_MCA_trigger(idx) (1)
+-#define default_MCA_polarity(idx) (0)
+-
+ static int __init MPBIOS_polarity(int idx)
+ {
+ int bus = mp_irqs[idx].mpc_srcbus;
+@@ -612,38 +551,11 @@ static int __init MPBIOS_polarity(int id
+ switch (mp_irqs[idx].mpc_irqflag & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent polarity */
+- {
+- switch (mp_bus_id_to_type[bus])
+- {
+- case MP_BUS_ISA: /* ISA pin */
+- {
+- polarity = default_ISA_polarity(idx);
+- break;
+- }
+- case MP_BUS_EISA: /* EISA pin */
+- {
+- polarity = default_EISA_polarity(idx);
+- break;
+- }
+- case MP_BUS_PCI: /* PCI pin */
+- {
+- polarity = default_PCI_polarity(idx);
+- break;
+- }
+- case MP_BUS_MCA: /* MCA pin */
+- {
+- polarity = default_MCA_polarity(idx);
+- break;
+- }
+- default:
+- {
+- printk(KERN_WARNING "broken BIOS!!\n");
+- polarity = 1;
+- break;
+- }
+- }
++ if (test_bit(bus, mp_bus_not_pci))
++ polarity = default_ISA_polarity(idx);
++ else
++ polarity = default_PCI_polarity(idx);
+ break;
+- }
+ case 1: /* high active */
+ {
+ polarity = 0;
+@@ -681,38 +593,11 @@ static int MPBIOS_trigger(int idx)
+ switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+ {
+ case 0: /* conforms, ie. bus-type dependent */
+- {
+- switch (mp_bus_id_to_type[bus])
+- {
+- case MP_BUS_ISA: /* ISA pin */
+- {
+- trigger = default_ISA_trigger(idx);
+- break;
+- }
+- case MP_BUS_EISA: /* EISA pin */
+- {
+- trigger = default_EISA_trigger(idx);
+- break;
+- }
+- case MP_BUS_PCI: /* PCI pin */
+- {
+- trigger = default_PCI_trigger(idx);
+- break;
+- }
+- case MP_BUS_MCA: /* MCA pin */
+- {
+- trigger = default_MCA_trigger(idx);
+- break;
+- }
+- default:
+- {
+- printk(KERN_WARNING "broken BIOS!!\n");
+- trigger = 1;
+- break;
+- }
+- }
++ if (test_bit(bus, mp_bus_not_pci))
++ trigger = default_ISA_trigger(idx);
++ else
++ trigger = default_PCI_trigger(idx);
+ break;
+- }
+ case 1: /* edge */
+ {
+ trigger = 0;
+@@ -749,64 +634,6 @@ static inline int irq_trigger(int idx)
+ return MPBIOS_trigger(idx);
+ }
+
+-static int next_irq = 16;
+-
+-/*
+- * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
+- * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
+- * from ACPI, which can reach 800 in large boxen.
+- *
+- * Compact the sparse GSI space into a sequential IRQ series and reuse
+- * vectors if possible.
+- */
+-int gsi_irq_sharing(int gsi)
+-{
+- int i, tries, vector;
+-
+- BUG_ON(gsi >= NR_IRQ_VECTORS);
+-
+- if (platform_legacy_irq(gsi))
+- return gsi;
+-
+- if (gsi_2_irq[gsi] != 0xFF)
+- return (int)gsi_2_irq[gsi];
+-
+- tries = NR_IRQS;
+- try_again:
+- vector = assign_irq_vector(gsi);
+-
+- /*
+- * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
+- * use of vector and if found, return that IRQ. However, we never want
+- * to share legacy IRQs, which usually have a different trigger mode
+- * than PCI.
+- */
+- for (i = 0; i < NR_IRQS; i++)
+- if (IO_APIC_VECTOR(i) == vector)
+- break;
+- if (platform_legacy_irq(i)) {
+- if (--tries >= 0) {
+- IO_APIC_VECTOR(i) = 0;
+- goto try_again;
+- }
+- panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
+- }
+- if (i < NR_IRQS) {
+- gsi_2_irq[gsi] = i;
+- printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
+- gsi, vector, i);
+- return i;
+- }
+-
+- i = next_irq++;
+- BUG_ON(i >= NR_IRQS);
+- gsi_2_irq[gsi] = i;
+- IO_APIC_VECTOR(i) = vector;
+- printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
+- gsi, vector, i);
+- return i;
+-}
+-
+ static int pin_2_irq(int idx, int apic, int pin)
+ {
+ int irq, i;
+@@ -818,49 +645,16 @@ static int pin_2_irq(int idx, int apic,
+ if (mp_irqs[idx].mpc_dstirq != pin)
+ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+
+- switch (mp_bus_id_to_type[bus])
+- {
+- case MP_BUS_ISA: /* ISA pin */
+- case MP_BUS_EISA:
+- case MP_BUS_MCA:
+- {
+- irq = mp_irqs[idx].mpc_srcbusirq;
+- break;
+- }
+- case MP_BUS_PCI: /* PCI pin */
+- {
+- /*
+- * PCI IRQs are mapped in order
+- */
+- i = irq = 0;
+- while (i < apic)
+- irq += nr_ioapic_registers[i++];
+- irq += pin;
+- irq = gsi_irq_sharing(irq);
+- break;
+- }
+- default:
+- {
+- printk(KERN_ERR "unknown bus type %d.\n",bus);
+- irq = 0;
+- break;
+- }
+- }
+- BUG_ON(irq >= NR_IRQS);
+-
+- /*
+- * PCI IRQ command line redirection. Yes, limits are hardcoded.
+- */
+- if ((pin >= 16) && (pin <= 23)) {
+- if (pirq_entries[pin-16] != -1) {
+- if (!pirq_entries[pin-16]) {
+- apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
+- } else {
+- irq = pirq_entries[pin-16];
+- apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
+- pin-16, irq);
+- }
+- }
++ if (test_bit(bus, mp_bus_not_pci)) {
++ irq = mp_irqs[idx].mpc_srcbusirq;
++ } else {
++ /*
++ * PCI IRQs are mapped in order
++ */
++ i = irq = 0;
++ while (i < apic)
++ irq += nr_ioapic_registers[i++];
++ irq += pin;
+ }
+ BUG_ON(irq >= NR_IRQS);
+ return irq;
+@@ -884,43 +678,68 @@ static inline int IO_APIC_irq_trigger(in
+ }
+
+ /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
++static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
+
+-int assign_irq_vector(int irq)
++static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
+ {
+- unsigned long flags;
+ int vector;
+ struct physdev_irq irq_op;
+
+- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
++ BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
+
+- spin_lock_irqsave(&vector_lock, flags);
++ cpus_and(*result, mask, cpu_online_map);
+
+- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
+- spin_unlock_irqrestore(&vector_lock, flags);
+- return IO_APIC_VECTOR(irq);
+- }
++ if (irq_vector[irq] > 0)
++ return irq_vector[irq];
+
+ irq_op.irq = irq;
+- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
+- spin_unlock_irqrestore(&vector_lock, flags);
++ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
+ return -ENOSPC;
+- }
+
+ vector = irq_op.vector;
+- vector_irq[vector] = irq;
+- if (irq != AUTO_ASSIGN)
+- IO_APIC_VECTOR(irq) = vector;
++ irq_vector[irq] = vector;
+
+- spin_unlock_irqrestore(&vector_lock, flags);
++ return vector;
++}
+
++static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
++{
++ int vector;
++ unsigned long flags;
++
++ spin_lock_irqsave(&vector_lock, flags);
++ vector = __assign_irq_vector(irq, mask, result);
++ spin_unlock_irqrestore(&vector_lock, flags);
+ return vector;
+ }
+
+-extern void (*interrupt[NR_IRQS])(void);
+ #ifndef CONFIG_XEN
+-static struct hw_interrupt_type ioapic_level_type;
+-static struct hw_interrupt_type ioapic_edge_type;
++void __setup_vector_irq(int cpu)
++{
++ /* Initialize vector_irq on a new cpu */
++ /* This function must be called with vector_lock held */
++ int irq, vector;
++
++ /* Mark the inuse vectors */
++ for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
++ if (!cpu_isset(cpu, irq_domain[irq]))
++ continue;
++ vector = irq_vector[irq];
++ per_cpu(vector_irq, cpu)[vector] = irq;
++ }
++ /* Mark the free vectors */
++ for (vector = 0; vector < NR_VECTORS; ++vector) {
++ irq = per_cpu(vector_irq, cpu)[vector];
++ if (irq < 0)
++ continue;
++ if (!cpu_isset(cpu, irq_domain[irq]))
++ per_cpu(vector_irq, cpu)[vector] = -1;
++ }
++}
++
++extern void (*interrupt[NR_IRQS])(void);
++
++static struct irq_chip ioapic_chip;
+
+ #define IOAPIC_AUTO -1
+ #define IOAPIC_EDGE 0
+@@ -928,16 +747,15 @@ static struct hw_interrupt_type ioapic_e
+
+ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
+ {
+- unsigned idx;
+-
+- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
+-
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+ trigger == IOAPIC_LEVEL)
+- irq_desc[idx].chip = &ioapic_level_type;
+- else
+- irq_desc[idx].chip = &ioapic_edge_type;
+- set_intr_gate(vector, interrupt[idx]);
++ set_irq_chip_and_handler_name(irq, &ioapic_chip,
++ handle_fasteoi_irq, "fasteoi");
++ else {
++ irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
++ set_irq_chip_and_handler_name(irq, &ioapic_chip,
++ handle_edge_irq, "edge");
++ }
+ }
+ #else
+ #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
+@@ -990,16 +808,21 @@ static void __init setup_IO_APIC_irqs(vo
+ continue;
+
+ if (IO_APIC_IRQ(irq)) {
+- vector = assign_irq_vector(irq);
++ cpumask_t mask;
++ vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
++ if (vector < 0)
++ continue;
++
++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
+ entry.vector = vector;
+
+ ioapic_register_intr(irq, vector, IOAPIC_AUTO);
+ if (!apic && (irq < 16))
+ disable_8259A_irq(irq);
+ }
++ ioapic_write_entry(apic, pin, entry);
++
+ spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ set_native_irq_info(irq, TARGET_CPUS);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+@@ -1042,7 +865,7 @@ static void __init setup_ExtINT_IRQ0_pin
+ * The timer IRQ doesn't have to know that behind the
+ * scene we have a 8259A-master in AEOI mode ...
+ */
+- irq_desc[0].chip = &ioapic_edge_type;
++ set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+
+ /*
+ * Add it to the IO-APIC irq-routing table:
+@@ -1138,10 +961,7 @@ void __apicdebuginit print_IO_APIC(void)
+ for (i = 0; i <= reg_01.bits.entries; i++) {
+ struct IO_APIC_route_entry entry;
+
+- spin_lock_irqsave(&ioapic_lock, flags);
+- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
+- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ entry = ioapic_read_entry(apic, i);
+
+ printk(KERN_DEBUG " %02x %03X %02X ",
+ i,
+@@ -1161,17 +981,12 @@ void __apicdebuginit print_IO_APIC(void)
+ );
+ }
+ }
+- if (use_pci_vector())
+- printk(KERN_INFO "Using vector-based indexing\n");
+ printk(KERN_DEBUG "IRQ to pin mappings:\n");
+ for (i = 0; i < NR_IRQS; i++) {
+ struct irq_pin_list *entry = irq_2_pin + i;
+ if (entry->pin < 0)
+ continue;
+- if (use_pci_vector() && !platform_legacy_irq(i))
+- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
+- else
+- printk(KERN_DEBUG "IRQ%d ", i);
++ printk(KERN_DEBUG "IRQ%d ", i);
+ for (;;) {
+ printk("-> %d:%d", entry->apic, entry->pin);
+ if (!entry->next)
+@@ -1342,9 +1157,6 @@ static void __init enable_IO_APIC(void)
+ irq_2_pin[i].pin = -1;
+ irq_2_pin[i].next = 0;
+ }
+- if (!pirqs_enabled)
+- for (i = 0; i < MAX_PIRQS; i++)
+- pirq_entries[i] = -1;
+
+ /*
+ * The number of IO-APIC IRQ registers (== #pins):
+@@ -1361,11 +1173,7 @@ static void __init enable_IO_APIC(void)
+ /* See if any of the pins is in ExtINT mode */
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ struct IO_APIC_route_entry entry;
+- spin_lock_irqsave(&ioapic_lock, flags);
+- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
+-
++ entry = ioapic_read_entry(apic, pin);
+
+ /* If the interrupt line is enabled and in ExtInt mode
+ * I have found the pin where the i8259 is connected.
+@@ -1419,7 +1227,6 @@ void disable_IO_APIC(void)
+ */
+ if (ioapic_i8259.pin != -1) {
+ struct IO_APIC_route_entry entry;
+- unsigned long flags;
+
+ memset(&entry, 0, sizeof(entry));
+ entry.mask = 0; /* Enabled */
+@@ -1436,12 +1243,7 @@ void disable_IO_APIC(void)
+ /*
+ * Add it to the IO-APIC irq-routing table:
+ */
+- spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+- *(((int *)&entry)+1));
+- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+- *(((int *)&entry)+0));
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+ }
+
+ disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+@@ -1449,76 +1251,6 @@ void disable_IO_APIC(void)
+ }
+
+ /*
+- * function to set the IO-APIC physical IDs based on the
+- * values stored in the MPC table.
+- *
+- * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
+- */
+-
+-#ifndef CONFIG_XEN
+-static void __init setup_ioapic_ids_from_mpc (void)
+-{
+- union IO_APIC_reg_00 reg_00;
+- int apic;
+- int i;
+- unsigned char old_id;
+- unsigned long flags;
+-
+- /*
+- * Set the IOAPIC ID to the value stored in the MPC table.
+- */
+- for (apic = 0; apic < nr_ioapics; apic++) {
+-
+- /* Read the register 0 value */
+- spin_lock_irqsave(&ioapic_lock, flags);
+- reg_00.raw = io_apic_read(apic, 0);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+- old_id = mp_ioapics[apic].mpc_apicid;
+-
+-
+- printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
+-
+-
+- /*
+- * We need to adjust the IRQ routing table
+- * if the ID changed.
+- */
+- if (old_id != mp_ioapics[apic].mpc_apicid)
+- for (i = 0; i < mp_irq_entries; i++)
+- if (mp_irqs[i].mpc_dstapic == old_id)
+- mp_irqs[i].mpc_dstapic
+- = mp_ioapics[apic].mpc_apicid;
+-
+- /*
+- * Read the right value from the MPC table and
+- * write it into the ID register.
+- */
+- apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
+- mp_ioapics[apic].mpc_apicid);
+-
+- reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+- spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(apic, 0, reg_00.raw);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+- /*
+- * Sanity check
+- */
+- spin_lock_irqsave(&ioapic_lock, flags);
+- reg_00.raw = io_apic_read(apic, 0);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
+- if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+- printk("could not set ID!\n");
+- else
+- apic_printk(APIC_VERBOSE," ok.\n");
+- }
+-}
+-#else
+-static void __init setup_ioapic_ids_from_mpc(void) { }
+-#endif
+-
+-/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+@@ -1572,7 +1304,7 @@ static int __init timer_irq_works(void)
+ * an edge even if it isn't on the 8259A...
+ */
+
+-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
++static unsigned int startup_ioapic_irq(unsigned int irq)
+ {
+ int was_pending = 0;
+ unsigned long flags;
+@@ -1589,107 +1321,19 @@ static unsigned int startup_edge_ioapic_
+ return was_pending;
+ }
+
+-/*
+- * Once we have recorded IRQ_PENDING already, we can mask the
+- * interrupt for real. This prevents IRQ storms from unhandled
+- * devices.
+- */
+-static void ack_edge_ioapic_irq(unsigned int irq)
+-{
+- move_irq(irq);
+- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
+- == (IRQ_PENDING | IRQ_DISABLED))
+- mask_IO_APIC_irq(irq);
+- ack_APIC_irq();
+-}
+-
+-/*
+- * Level triggered interrupts can just be masked,
+- * and shutting down and starting up the interrupt
+- * is the same as enabling and disabling them -- except
+- * with a startup need to return a "was pending" value.
+- *
+- * Level triggered interrupts are special because we
+- * do not touch any IO-APIC register while handling
+- * them. We ack the APIC in the end-IRQ handler, not
+- * in the start-IRQ-handler. Protection against reentrance
+- * from the same interrupt is still provided, both by the
+- * generic IRQ layer and by the fact that an unacked local
+- * APIC does not accept IRQs.
+- */
+-static unsigned int startup_level_ioapic_irq (unsigned int irq)
+-{
+- unmask_IO_APIC_irq(irq);
+-
+- return 0; /* don't check for pending */
+-}
+-
+-static void end_level_ioapic_irq (unsigned int irq)
+-{
+- move_irq(irq);
+- ack_APIC_irq();
+-}
+-
+-#ifdef CONFIG_PCI_MSI
+-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- return startup_edge_ioapic_irq(irq);
+-}
+-
+-static void ack_edge_ioapic_vector(unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- move_native_irq(vector);
+- ack_edge_ioapic_irq(irq);
+-}
+-
+-static unsigned int startup_level_ioapic_vector (unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- return startup_level_ioapic_irq (irq);
+-}
+-
+-static void end_level_ioapic_vector (unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- move_native_irq(vector);
+- end_level_ioapic_irq(irq);
+-}
+-
+-static void mask_IO_APIC_vector (unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- mask_IO_APIC_irq(irq);
+-}
+-
+-static void unmask_IO_APIC_vector (unsigned int vector)
+-{
+- int irq = vector_to_irq(vector);
+-
+- unmask_IO_APIC_irq(irq);
+-}
+-
+-#ifdef CONFIG_SMP
+-static void set_ioapic_affinity_vector (unsigned int vector,
+- cpumask_t cpu_mask)
++static int ioapic_retrigger_irq(unsigned int irq)
+ {
+- int irq = vector_to_irq(vector);
++ cpumask_t mask;
++ unsigned vector;
++ unsigned long flags;
+
+- set_native_irq_info(vector, cpu_mask);
+- set_ioapic_affinity_irq(irq, cpu_mask);
+-}
+-#endif // CONFIG_SMP
+-#endif // CONFIG_PCI_MSI
++ spin_lock_irqsave(&vector_lock, flags);
++ vector = irq_vector[irq];
++ cpus_clear(mask);
++ cpu_set(first_cpu(irq_domain[irq]), mask);
+
+-static int ioapic_retrigger(unsigned int irq)
+-{
+- send_IPI_self(IO_APIC_VECTOR(irq));
++ send_IPI_mask(mask, vector);
++ spin_unlock_irqrestore(&vector_lock, flags);
+
+ return 1;
+ }
+@@ -1703,32 +1347,47 @@ static int ioapic_retrigger(unsigned int
+ * races.
+ */
+
+-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
+- .typename = "IO-APIC-edge",
+- .startup = startup_edge_ioapic,
+- .shutdown = shutdown_edge_ioapic,
+- .enable = enable_edge_ioapic,
+- .disable = disable_edge_ioapic,
+- .ack = ack_edge_ioapic,
+- .end = end_edge_ioapic,
+-#ifdef CONFIG_SMP
+- .set_affinity = set_ioapic_affinity,
++static void ack_apic_edge(unsigned int irq)
++{
++ move_native_irq(irq);
++ ack_APIC_irq();
++}
++
++static void ack_apic_level(unsigned int irq)
++{
++ int do_unmask_irq = 0;
++
++#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
++ /* If we are moving the irq we need to mask it */
++ if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
++ do_unmask_irq = 1;
++ mask_IO_APIC_irq(irq);
++ }
+ #endif
+- .retrigger = ioapic_retrigger,
+-};
+
+-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
+- .typename = "IO-APIC-level",
+- .startup = startup_level_ioapic,
+- .shutdown = shutdown_level_ioapic,
+- .enable = enable_level_ioapic,
+- .disable = disable_level_ioapic,
+- .ack = mask_and_ack_level_ioapic,
+- .end = end_level_ioapic,
++ /*
++ * We must acknowledge the irq before we move it or the acknowledge will
++ * not propogate properly.
++ */
++ ack_APIC_irq();
++
++ /* Now we can move and renable the irq */
++ move_masked_irq(irq);
++ if (unlikely(do_unmask_irq))
++ unmask_IO_APIC_irq(irq);
++}
++
++static struct irq_chip ioapic_chip __read_mostly = {
++ .name = "IO-APIC",
++ .startup = startup_ioapic_irq,
++ .mask = mask_IO_APIC_irq,
++ .unmask = unmask_IO_APIC_irq,
++ .ack = ack_apic_edge,
++ .eoi = ack_apic_level,
+ #ifdef CONFIG_SMP
+- .set_affinity = set_ioapic_affinity,
++ .set_affinity = set_ioapic_affinity_irq,
+ #endif
+- .retrigger = ioapic_retrigger,
++ .retrigger = ioapic_retrigger_irq,
+ };
+ #endif /* !CONFIG_XEN */
+
+@@ -1749,12 +1408,7 @@ static inline void init_IO_APIC_traps(vo
+ */
+ for (irq = 0; irq < NR_IRQS ; irq++) {
+ int tmp = irq;
+- if (use_pci_vector()) {
+- if (!platform_legacy_irq(tmp))
+- if ((tmp = vector_to_irq(tmp)) == -1)
+- continue;
+- }
+- if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
++ if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
+ /*
+ * Hmm.. We don't have an entry for this,
+ * so default to an old-fashioned 8259
+@@ -1765,7 +1419,7 @@ static inline void init_IO_APIC_traps(vo
+ #ifndef CONFIG_XEN
+ else
+ /* Strange. Oh, well.. */
+- irq_desc[irq].chip = &no_irq_type;
++ irq_desc[irq].chip = &no_irq_chip;
+ #endif
+ }
+ }
+@@ -1886,8 +1540,6 @@ static inline void unlock_ExtINT_logic(v
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ }
+
+-int timer_uses_ioapic_pin_0;
+-
+ /*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
+@@ -1900,13 +1552,13 @@ static inline void check_timer(void)
+ {
+ int apic1, pin1, apic2, pin2;
+ int vector;
++ cpumask_t mask;
+
+ /*
+ * get/set the timer IRQ vector:
+ */
+ disable_8259A_irq(0);
+- vector = assign_irq_vector(0);
+- set_intr_gate(vector, interrupt[0]);
++ vector = assign_irq_vector(0, TARGET_CPUS, &mask);
+
+ /*
+ * Subtle, code in do_timer_interrupt() expects an AEOI
+@@ -1925,9 +1577,6 @@ static inline void check_timer(void)
+ pin2 = ioapic_i8259.pin;
+ apic2 = ioapic_i8259.apic;
+
+- if (pin1 == 0)
+- timer_uses_ioapic_pin_0 = 1;
+-
+ apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
+ vector, apic1, pin1, apic2, pin2);
+
+@@ -2042,11 +1691,6 @@ void __init setup_IO_APIC(void)
+
+ apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+
+- /*
+- * Set up the IO-APIC IRQ routing table.
+- */
+- if (!acpi_ioapic)
+- setup_ioapic_ids_from_mpc();
+ #ifndef CONFIG_XEN
+ sync_Arb_IDs();
+ #endif /* !CONFIG_XEN */
+@@ -2067,17 +1711,12 @@ static int ioapic_suspend(struct sys_dev
+ {
+ struct IO_APIC_route_entry *entry;
+ struct sysfs_ioapic_data *data;
+- unsigned long flags;
+ int i;
+
+ data = container_of(dev, struct sysfs_ioapic_data, dev);
+ entry = data->entry;
+- spin_lock_irqsave(&ioapic_lock, flags);
+- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
+- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
+- }
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
++ *entry = ioapic_read_entry(dev->id, i);
+
+ return 0;
+ }
+@@ -2099,11 +1738,9 @@ static int ioapic_resume(struct sys_devi
+ reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+ io_apic_write(dev->id, 0, reg_00.raw);
+ }
+- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
+- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
+- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
+- }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
++ for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
++ ioapic_write_entry(dev->id, i, entry[i]);
+
+ return 0;
+ }
+@@ -2149,26 +1786,254 @@ static int __init ioapic_init_sysfs(void
+
+ device_initcall(ioapic_init_sysfs);
+
+-/* --------------------------------------------------------------------------
+- ACPI-based IOAPIC Configuration
+- -------------------------------------------------------------------------- */
++#ifndef CONFIG_XEN
++/*
++ * Dynamic irq allocate and deallocation
++ */
++int create_irq(void)
++{
++ /* Allocate an unused irq */
++ int irq;
++ int new;
++ int vector = 0;
++ unsigned long flags;
++ cpumask_t mask;
+
+-#ifdef CONFIG_ACPI
++ irq = -ENOSPC;
++ spin_lock_irqsave(&vector_lock, flags);
++ for (new = (NR_IRQS - 1); new >= 0; new--) {
++ if (platform_legacy_irq(new))
++ continue;
++ if (irq_vector[new] != 0)
++ continue;
++ vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
++ if (likely(vector > 0))
++ irq = new;
++ break;
++ }
++ spin_unlock_irqrestore(&vector_lock, flags);
+
+-#define IO_APIC_MAX_ID 0xFE
++ if (irq >= 0) {
++ dynamic_irq_init(irq);
++ }
++ return irq;
++}
+
+-int __init io_apic_get_version (int ioapic)
++void destroy_irq(unsigned int irq)
+ {
+- union IO_APIC_reg_01 reg_01;
+ unsigned long flags;
+
+- spin_lock_irqsave(&ioapic_lock, flags);
+- reg_01.raw = io_apic_read(ioapic, 1);
+- spin_unlock_irqrestore(&ioapic_lock, flags);
++ dynamic_irq_cleanup(irq);
++
++ spin_lock_irqsave(&vector_lock, flags);
++ irq_vector[irq] = 0;
++ spin_unlock_irqrestore(&vector_lock, flags);
++}
++#endif
++
++/*
++ * MSI mesage composition
++ */
++#ifdef CONFIG_PCI_MSI
++static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
++{
++ int vector;
++ unsigned dest;
++ cpumask_t tmp;
++
++ vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
++ if (vector >= 0) {
++ dest = cpu_mask_to_apicid(tmp);
++
++ msg->address_hi = MSI_ADDR_BASE_HI;
++ msg->address_lo =
++ MSI_ADDR_BASE_LO |
++ ((INT_DEST_MODE == 0) ?
++ MSI_ADDR_DEST_MODE_PHYSICAL:
++ MSI_ADDR_DEST_MODE_LOGICAL) |
++ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++ MSI_ADDR_REDIRECTION_CPU:
++ MSI_ADDR_REDIRECTION_LOWPRI) |
++ MSI_ADDR_DEST_ID(dest);
++
++ msg->data =
++ MSI_DATA_TRIGGER_EDGE |
++ MSI_DATA_LEVEL_ASSERT |
++ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++ MSI_DATA_DELIVERY_FIXED:
++ MSI_DATA_DELIVERY_LOWPRI) |
++ MSI_DATA_VECTOR(vector);
++ }
++ return vector;
++}
++
++#ifdef CONFIG_SMP
++static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
++{
++ struct msi_msg msg;
++ unsigned int dest;
++ cpumask_t tmp;
++ int vector;
++
++ cpus_and(tmp, mask, cpu_online_map);
++ if (cpus_empty(tmp))
++ tmp = TARGET_CPUS;
++
++ cpus_and(mask, tmp, CPU_MASK_ALL);
++
++ vector = assign_irq_vector(irq, mask, &tmp);
++ if (vector < 0)
++ return;
++
++ dest = cpu_mask_to_apicid(tmp);
++
++ read_msi_msg(irq, &msg);
++
++ msg.data &= ~MSI_DATA_VECTOR_MASK;
++ msg.data |= MSI_DATA_VECTOR(vector);
++ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
++ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
++
++ write_msi_msg(irq, &msg);
++ set_native_irq_info(irq, mask);
++}
++#endif /* CONFIG_SMP */
++
++/*
++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
++ * which implement the MSI or MSI-X Capability Structure.
++ */
++static struct irq_chip msi_chip = {
++ .name = "PCI-MSI",
++ .unmask = unmask_msi_irq,
++ .mask = mask_msi_irq,
++ .ack = ack_apic_edge,
++#ifdef CONFIG_SMP
++ .set_affinity = set_msi_irq_affinity,
++#endif
++ .retrigger = ioapic_retrigger_irq,
++};
++
++int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
++{
++ struct msi_msg msg;
++ int ret;
++ ret = msi_compose_msg(dev, irq, &msg);
++ if (ret < 0)
++ return ret;
++
++ write_msi_msg(irq, &msg);
++
++ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
++
++ return 0;
++}
++
++void arch_teardown_msi_irq(unsigned int irq)
++{
++ return;
++}
++
++#endif /* CONFIG_PCI_MSI */
++
++/*
++ * Hypertransport interrupt support
++ */
++#ifdef CONFIG_HT_IRQ
++
++#ifdef CONFIG_SMP
++
++static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
++{
++ struct ht_irq_msg msg;
++ fetch_ht_irq_msg(irq, &msg);
++
++ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
++ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+- return reg_01.bits.version;
++ msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
++ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
++
++ write_ht_irq_msg(irq, &msg);
+ }
+
++static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
++{
++ unsigned int dest;
++ cpumask_t tmp;
++ int vector;
++
++ cpus_and(tmp, mask, cpu_online_map);
++ if (cpus_empty(tmp))
++ tmp = TARGET_CPUS;
++
++ cpus_and(mask, tmp, CPU_MASK_ALL);
++
++ vector = assign_irq_vector(irq, mask, &tmp);
++ if (vector < 0)
++ return;
++
++ dest = cpu_mask_to_apicid(tmp);
++
++ target_ht_irq(irq, dest, vector);
++ set_native_irq_info(irq, mask);
++}
++#endif
++
++static struct irq_chip ht_irq_chip = {
++ .name = "PCI-HT",
++ .mask = mask_ht_irq,
++ .unmask = unmask_ht_irq,
++ .ack = ack_apic_edge,
++#ifdef CONFIG_SMP
++ .set_affinity = set_ht_irq_affinity,
++#endif
++ .retrigger = ioapic_retrigger_irq,
++};
++
++int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
++{
++ int vector;
++ cpumask_t tmp;
++
++ vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
++ if (vector >= 0) {
++ struct ht_irq_msg msg;
++ unsigned dest;
++
++ dest = cpu_mask_to_apicid(tmp);
++
++ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
++
++ msg.address_lo =
++ HT_IRQ_LOW_BASE |
++ HT_IRQ_LOW_DEST_ID(dest) |
++ HT_IRQ_LOW_VECTOR(vector) |
++ ((INT_DEST_MODE == 0) ?
++ HT_IRQ_LOW_DM_PHYSICAL :
++ HT_IRQ_LOW_DM_LOGICAL) |
++ HT_IRQ_LOW_RQEOI_EDGE |
++ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
++ HT_IRQ_LOW_MT_FIXED :
++ HT_IRQ_LOW_MT_ARBITRATED) |
++ HT_IRQ_LOW_IRQ_MASKED;
++
++ write_ht_irq_msg(irq, &msg);
++
++ set_irq_chip_and_handler_name(irq, &ht_irq_chip,
++ handle_edge_irq, "edge");
++ }
++ return vector;
++}
++#endif /* CONFIG_HT_IRQ */
++
++/* --------------------------------------------------------------------------
++ ACPI-based IOAPIC Configuration
++ -------------------------------------------------------------------------- */
++
++#ifdef CONFIG_ACPI
++
++#define IO_APIC_MAX_ID 0xFE
+
+ int __init io_apic_get_redir_entries (int ioapic)
+ {
+@@ -2187,6 +2052,8 @@ int io_apic_set_pci_routing (int ioapic,
+ {
+ struct IO_APIC_route_entry entry;
+ unsigned long flags;
++ int vector;
++ cpumask_t mask;
+
+ if (!IO_APIC_IRQ(irq)) {
+ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+@@ -2195,6 +2062,17 @@ int io_apic_set_pci_routing (int ioapic,
+ }
+
+ /*
++ * IRQs < 16 are already in the irq_2_pin[] map
++ */
++ if (irq >= 16)
++ add_pin_to_irq(irq, ioapic, pin);
++
++
++ vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
++ if (vector < 0)
++ return vector;
++
++ /*
+ * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
+ * Note that we mask (disable) IRQs now -- these get enabled when the
+ * corresponding device driver registers for this IRQ.
+@@ -2204,19 +2082,11 @@ int io_apic_set_pci_routing (int ioapic,
+
+ entry.delivery_mode = INT_DELIVERY_MODE;
+ entry.dest_mode = INT_DEST_MODE;
+- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
+ entry.trigger = edge_level;
+ entry.polarity = active_high_low;
+ entry.mask = 1; /* Disabled (masked) */
+-
+- irq = gsi_irq_sharing(irq);
+- /*
+- * IRQs < 16 are already in the irq_2_pin[] map
+- */
+- if (irq >= 16)
+- add_pin_to_irq(irq, ioapic, pin);
+-
+- entry.vector = assign_irq_vector(irq);
++ entry.vector = vector & 0xff;
+
+ apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
+ "IRQ %d Mode:%i Active:%i)\n", ioapic,
+@@ -2228,10 +2098,10 @@ int io_apic_set_pci_routing (int ioapic,
+ if (!ioapic && (irq < 16))
+ disable_8259A_irq(irq);
+
++ ioapic_write_entry(ioapic, pin, entry);
++
+ spin_lock_irqsave(&ioapic_lock, flags);
+- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
+- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
++ set_native_irq_info(irq, TARGET_CPUS);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return 0;
+Index: 10.3-2007-11-26/arch/x86_64/kernel/ioport-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/ioport-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/ioport-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -58,6 +58,7 @@ asmlinkage long sys_ioperm(unsigned long
+
+ memset(bitmap, 0xff, IO_BITMAP_BYTES);
+ t->io_bitmap_ptr = bitmap;
++ set_thread_flag(TIF_IO_BITMAP);
+
+ set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
+ set_iobitmap.nr_ports = IO_BITMAP_BITS;
+Index: 10.3-2007-11-26/arch/x86_64/kernel/irq-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/irq-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/irq-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -20,11 +20,6 @@
+ #include <asm/idle.h>
+
+ atomic_t irq_err_count;
+-#ifdef CONFIG_X86_IO_APIC
+-#ifdef APIC_MISMATCH_DEBUG
+-atomic_t irq_mis_count;
+-#endif
+-#endif
+
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+ /*
+@@ -79,7 +74,8 @@ int show_interrupts(struct seq_file *p,
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ #endif
+- seq_printf(p, " %14s", irq_desc[i].chip->typename);
++ seq_printf(p, " %8s", irq_desc[i].chip->name);
++ seq_printf(p, "-%-8s", irq_desc[i].name);
+
+ seq_printf(p, " %s", action->name);
+ for (action=action->next; action; action = action->next)
+@@ -99,11 +95,6 @@ skip:
+ seq_putc(p, '\n');
+ #endif
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
+-#ifdef CONFIG_X86_IO_APIC
+-#ifdef APIC_MISMATCH_DEBUG
+- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
+-#endif
+-#endif
+ }
+ return 0;
+ }
+@@ -114,24 +105,28 @@ skip:
+ * handlers).
+ */
+ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
+-{
++{
++ struct pt_regs *old_regs = set_irq_regs(regs);
++
+ /* high bit used in ret_from_ code */
+ unsigned irq = ~regs->orig_rax;
+
+- if (unlikely(irq >= NR_IRQS)) {
+- printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
+- __FUNCTION__, irq);
+- BUG();
+- }
+-
+ exit_idle();
+ irq_enter();
++
+ #ifdef CONFIG_DEBUG_STACKOVERFLOW
+ stack_overflow_check(regs);
+ #endif
+- __do_IRQ(irq, regs);
++
++ if (likely(irq < NR_IRQS))
++ generic_handle_irq(irq);
++ else
++ printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n",
++ __func__, smp_processor_id(), irq);
++
+ irq_exit();
+
++ set_irq_regs(old_regs);
+ return 1;
+ }
+
+@@ -192,6 +187,6 @@ EXPORT_SYMBOL(do_softirq);
+ */
+ void ack_bad_irq(unsigned int irq)
+ {
+- printk("unexpected IRQ trap at vector %02x\n", irq);
++ printk("unexpected IRQ trap at irq %02x\n", irq);
+ }
+ #endif
+Index: 10.3-2007-11-26/arch/x86_64/kernel/machine_kexec.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/machine_kexec.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/machine_kexec.c 2007-10-22 13:53:08.000000000 +0200
+@@ -286,6 +286,7 @@ NORET_TYPE void machine_kexec(struct kim
+ */
+ static int __init setup_crashkernel(char *arg)
+ {
++#ifndef CONFIG_XEN
+ unsigned long size, base;
+ char *p;
+ if (!arg)
+@@ -301,6 +302,10 @@ static int __init setup_crashkernel(char
+ crashk_res.start = base;
+ crashk_res.end = base + size - 1;
+ }
++#else
++ printk("Ignoring crashkernel command line, "
++ "parameter will be supplied by xen\n");
++#endif
+ return 0;
+ }
+ early_param("crashkernel", setup_crashkernel);
+Index: 10.3-2007-11-26/arch/x86_64/kernel/mpparse-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/mpparse-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/mpparse-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -41,8 +41,7 @@ int acpi_found_madt;
+ * Various Linux-internal data structures created from the
+ * MP-table.
+ */
+-unsigned char apic_version [MAX_APICS];
+-unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
++DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+ int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
+
+ static int mp_current_pci_id = 0;
+@@ -56,7 +55,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ
+ int mp_irq_entries;
+
+ int nr_ioapics;
+-int pic_mode;
+ unsigned long mp_lapic_addr = 0;
+
+
+@@ -71,19 +69,6 @@ unsigned disabled_cpus __initdata;
+ /* Bitmask of physically existing CPUs */
+ physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
+
+-/* ACPI MADT entry parsing functions */
+-#ifdef CONFIG_ACPI
+-extern struct acpi_boot_flags acpi_boot;
+-#ifdef CONFIG_X86_LOCAL_APIC
+-extern int acpi_parse_lapic (acpi_table_entry_header *header);
+-extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
+-extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
+-#endif /*CONFIG_X86_LOCAL_APIC*/
+-#ifdef CONFIG_X86_IO_APIC
+-extern int acpi_parse_ioapic (acpi_table_entry_header *header);
+-#endif /*CONFIG_X86_IO_APIC*/
+-#endif /*CONFIG_ACPI*/
+-
+ u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+
+@@ -109,24 +94,20 @@ static int __init mpf_checksum(unsigned
+ static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
+ {
+ int cpu;
+- unsigned char ver;
+ cpumask_t tmp_map;
++ char *bootup_cpu = "";
+
+ if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+ disabled_cpus++;
+ return;
+ }
+-
+- printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
+- m->mpc_apicid,
+- (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
+- (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
+- m->mpc_apicver);
+-
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+- Dprintk(" Bootup CPU\n");
++ bootup_cpu = " (Bootup-CPU)";
+ boot_cpu_id = m->mpc_apicid;
+ }
++
++ printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
++
+ if (num_processors >= NR_CPUS) {
+ printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+ " Processor ignored.\n", NR_CPUS);
+@@ -137,24 +118,7 @@ static void __cpuinit MP_processor_info
+ cpus_complement(tmp_map, cpu_present_map);
+ cpu = first_cpu(tmp_map);
+
+-#if MAX_APICS < 255
+- if ((int)m->mpc_apicid > MAX_APICS) {
+- printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
+- m->mpc_apicid, MAX_APICS);
+- return;
+- }
+-#endif
+- ver = m->mpc_apicver;
+-
+ physid_set(m->mpc_apicid, phys_cpu_present_map);
+- /*
+- * Validate version
+- */
+- if (ver == 0x0) {
+- printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
+- ver = 0x10;
+- }
+- apic_version[m->mpc_apicid] = ver;
+ if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
+ /*
+ * bios_cpu_apicid is required to have processors listed
+@@ -185,37 +149,42 @@ static void __init MP_bus_info (struct m
+ Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+
+ if (strncmp(str, "ISA", 3) == 0) {
+- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
+- } else if (strncmp(str, "EISA", 4) == 0) {
+- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
++ set_bit(m->mpc_busid, mp_bus_not_pci);
+ } else if (strncmp(str, "PCI", 3) == 0) {
+- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
++ clear_bit(m->mpc_busid, mp_bus_not_pci);
+ mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
+ mp_current_pci_id++;
+- } else if (strncmp(str, "MCA", 3) == 0) {
+- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
+ } else {
+ printk(KERN_ERR "Unknown bustype %s\n", str);
+ }
+ }
+
++static int bad_ioapic(unsigned long address)
++{
++ if (nr_ioapics >= MAX_IO_APICS) {
++ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
++ "(found %d)\n", MAX_IO_APICS, nr_ioapics);
++ panic("Recompile kernel with bigger MAX_IO_APICS!\n");
++ }
++ if (!address) {
++ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
++ " found in table, skipping!\n");
++ return 1;
++ }
++ return 0;
++}
++
+ static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+ {
+ if (!(m->mpc_flags & MPC_APIC_USABLE))
+ return;
+
+- printk("I/O APIC #%d Version %d at 0x%X.\n",
+- m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+- if (nr_ioapics >= MAX_IO_APICS) {
+- printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
+- MAX_IO_APICS, nr_ioapics);
+- panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
+- }
+- if (!m->mpc_apicaddr) {
+- printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
+- " found in MP table, skipping!\n");
++ printk("I/O APIC #%d at 0x%X.\n",
++ m->mpc_apicid, m->mpc_apicaddr);
++
++ if (bad_ioapic(m->mpc_apicaddr))
+ return;
+- }
++
+ mp_ioapics[nr_ioapics] = *m;
+ nr_ioapics++;
+ }
+@@ -239,19 +208,6 @@ static void __init MP_lintsrc_info (stru
+ m->mpc_irqtype, m->mpc_irqflag & 3,
+ (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
+ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+- /*
+- * Well it seems all SMP boards in existence
+- * use ExtINT/LVT1 == LINT0 and
+- * NMI/LVT2 == LINT1 - the following check
+- * will show us if this assumptions is false.
+- * Until then we do not have to add baggage.
+- */
+- if ((m->mpc_irqtype == mp_ExtINT) &&
+- (m->mpc_destapiclint != 0))
+- BUG();
+- if ((m->mpc_irqtype == mp_NMI) &&
+- (m->mpc_destapiclint != 1))
+- BUG();
+ }
+
+ /*
+@@ -265,7 +221,7 @@ static int __init smp_read_mpc(struct mp
+ unsigned char *mpt=((unsigned char *)mpc)+count;
+
+ if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
+- printk("SMP mptable: bad signature [%c%c%c%c]!\n",
++ printk("MPTABLE: bad signature [%c%c%c%c]!\n",
+ mpc->mpc_signature[0],
+ mpc->mpc_signature[1],
+ mpc->mpc_signature[2],
+@@ -273,31 +229,31 @@ static int __init smp_read_mpc(struct mp
+ return 0;
+ }
+ if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
+- printk("SMP mptable: checksum error!\n");
++ printk("MPTABLE: checksum error!\n");
+ return 0;
+ }
+ if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
+- printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
++ printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
+ mpc->mpc_spec);
+ return 0;
+ }
+ if (!mpc->mpc_lapic) {
+- printk(KERN_ERR "SMP mptable: null local APIC address!\n");
++ printk(KERN_ERR "MPTABLE: null local APIC address!\n");
+ return 0;
+ }
+ memcpy(str,mpc->mpc_oem,8);
+- str[8]=0;
+- printk(KERN_INFO "OEM ID: %s ",str);
++ str[8] = 0;
++ printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
+
+ memcpy(str,mpc->mpc_productid,12);
+- str[12]=0;
+- printk("Product ID: %s ",str);
++ str[12] = 0;
++ printk("MPTABLE: Product ID: %s ",str);
+
+- printk("APIC at: 0x%X\n",mpc->mpc_lapic);
++ printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
+
+ /* save the local APIC address, it might be non-default */
+ if (!acpi_lapic)
+- mp_lapic_addr = mpc->mpc_lapic;
++ mp_lapic_addr = mpc->mpc_lapic;
+
+ /*
+ * Now process the configuration blocks.
+@@ -309,7 +265,7 @@ static int __init smp_read_mpc(struct mp
+ struct mpc_config_processor *m=
+ (struct mpc_config_processor *)mpt;
+ if (!acpi_lapic)
+- MP_processor_info(m);
++ MP_processor_info(m);
+ mpt += sizeof(*m);
+ count += sizeof(*m);
+ break;
+@@ -328,8 +284,8 @@ static int __init smp_read_mpc(struct mp
+ struct mpc_config_ioapic *m=
+ (struct mpc_config_ioapic *)mpt;
+ MP_ioapic_info(m);
+- mpt+=sizeof(*m);
+- count+=sizeof(*m);
++ mpt += sizeof(*m);
++ count += sizeof(*m);
+ break;
+ }
+ case MP_INTSRC:
+@@ -338,8 +294,8 @@ static int __init smp_read_mpc(struct mp
+ (struct mpc_config_intsrc *)mpt;
+
+ MP_intsrc_info(m);
+- mpt+=sizeof(*m);
+- count+=sizeof(*m);
++ mpt += sizeof(*m);
++ count += sizeof(*m);
+ break;
+ }
+ case MP_LINTSRC:
+@@ -347,15 +303,15 @@ static int __init smp_read_mpc(struct mp
+ struct mpc_config_lintsrc *m=
+ (struct mpc_config_lintsrc *)mpt;
+ MP_lintsrc_info(m);
+- mpt+=sizeof(*m);
+- count+=sizeof(*m);
++ mpt += sizeof(*m);
++ count += sizeof(*m);
+ break;
+ }
+ }
+ }
+ clustered_apic_check();
+ if (!num_processors)
+- printk(KERN_ERR "SMP mptable: no processors registered!\n");
++ printk(KERN_ERR "MPTABLE: no processors registered!\n");
+ return num_processors;
+ }
+
+@@ -451,13 +407,10 @@ static inline void __init construct_defa
+ * 2 CPUs, numbered 0 & 1.
+ */
+ processor.mpc_type = MP_PROCESSOR;
+- /* Either an integrated APIC or a discrete 82489DX. */
+- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
++ processor.mpc_apicver = 0;
+ processor.mpc_cpuflag = CPU_ENABLED;
+- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+- (boot_cpu_data.x86_model << 4) |
+- boot_cpu_data.x86_mask;
+- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
++ processor.mpc_cpufeature = 0;
++ processor.mpc_featureflag = 0;
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+ for (i = 0; i < 2; i++) {
+@@ -476,14 +429,6 @@ static inline void __init construct_defa
+ case 5:
+ memcpy(bus.mpc_bustype, "ISA ", 6);
+ break;
+- case 2:
+- case 6:
+- case 3:
+- memcpy(bus.mpc_bustype, "EISA ", 6);
+- break;
+- case 4:
+- case 7:
+- memcpy(bus.mpc_bustype, "MCA ", 6);
+ }
+ MP_bus_info(&bus);
+ if (mpc_default_type > 4) {
+@@ -494,7 +439,7 @@ static inline void __init construct_defa
+
+ ioapic.mpc_type = MP_IOAPIC;
+ ioapic.mpc_apicid = 2;
+- ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
++ ioapic.mpc_apicver = 0;
+ ioapic.mpc_flags = MPC_APIC_USABLE;
+ ioapic.mpc_apicaddr = 0xFEC00000;
+ MP_ioapic_info(&ioapic);
+@@ -537,13 +482,6 @@ void __init get_smp_config (void)
+ printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
+
+ printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
+- if (mpf->mpf_feature2 & (1<<7)) {
+- printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
+- pic_mode = 1;
+- } else {
+- printk(KERN_INFO " Virtual Wire compatibility mode.\n");
+- pic_mode = 0;
+- }
+
+ /*
+ * Now see if we need to read further.
+@@ -620,7 +558,7 @@ static int __init smp_scan_config (unsig
+ return 0;
+ }
+
+-void __init find_intel_smp (void)
++void __init find_smp_config(void)
+ {
+ unsigned int address;
+
+@@ -637,9 +575,7 @@ void __init find_intel_smp (void)
+ smp_scan_config(0xF0000,0x10000))
+ return;
+ /*
+- * If it is an SMP machine we should know now, unless the
+- * configuration is in an EISA/MCA bus machine with an
+- * extended bios data area.
++ * If it is an SMP machine we should know now.
+ *
+ * there is a real-mode segmented pointer pointing to the
+ * 4K EBDA area at 0x40E, calculate and scan it here.
+@@ -660,64 +596,38 @@ void __init find_intel_smp (void)
+ printk(KERN_INFO "No mptable found.\n");
+ }
+
+-/*
+- * - Intel MP Configuration Table
+- */
+-void __init find_smp_config (void)
+-{
+-#ifdef CONFIG_X86_LOCAL_APIC
+- find_intel_smp();
+-#endif
+-}
+-
+-
+ /* --------------------------------------------------------------------------
+ ACPI-based MP Configuration
+ -------------------------------------------------------------------------- */
+
+ #ifdef CONFIG_ACPI
+
+-void __init mp_register_lapic_address (
+- u64 address)
++void __init mp_register_lapic_address(u64 address)
+ {
+ #ifndef CONFIG_XEN
+ mp_lapic_addr = (unsigned long) address;
+-
+ set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+-
+ if (boot_cpu_id == -1U)
+ boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+-
+- Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+ #endif
+ }
+
+-
+-void __cpuinit mp_register_lapic (
+- u8 id,
+- u8 enabled)
++void __cpuinit mp_register_lapic (u8 id, u8 enabled)
+ {
+ struct mpc_config_processor processor;
+ int boot_cpu = 0;
+
+- if (id >= MAX_APICS) {
+- printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
+- id, MAX_APICS);
+- return;
+- }
+-
+- if (id == boot_cpu_physical_apicid)
++ if (id == boot_cpu_id)
+ boot_cpu = 1;
+
+ #ifndef CONFIG_XEN
+ processor.mpc_type = MP_PROCESSOR;
+ processor.mpc_apicid = id;
+- processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
++ processor.mpc_apicver = 0;
+ processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
+ processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
+- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
++ processor.mpc_cpufeature = 0;
++ processor.mpc_featureflag = 0;
+ processor.mpc_reserved[0] = 0;
+ processor.mpc_reserved[1] = 0;
+ #endif
+@@ -725,8 +635,6 @@ void __cpuinit mp_register_lapic (
+ MP_processor_info(&processor);
+ }
+
+-#ifdef CONFIG_X86_IO_APIC
+-
+ #define MP_ISA_BUS 0
+ #define MP_MAX_IOAPIC_PIN 127
+
+@@ -737,11 +645,9 @@ static struct mp_ioapic_routing {
+ u32 pin_programmed[4];
+ } mp_ioapic_routing[MAX_IO_APICS];
+
+-
+-static int mp_find_ioapic (
+- int gsi)
++static int mp_find_ioapic(int gsi)
+ {
+- int i = 0;
++ int i = 0;
+
+ /* Find the IOAPIC that manages this GSI. */
+ for (i = 0; i < nr_ioapics; i++) {
+@@ -751,28 +657,15 @@ static int mp_find_ioapic (
+ }
+
+ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+-
+ return -1;
+ }
+-
+
+-void __init mp_register_ioapic (
+- u8 id,
+- u32 address,
+- u32 gsi_base)
++void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
+ {
+- int idx = 0;
++ int idx = 0;
+
+- if (nr_ioapics >= MAX_IO_APICS) {
+- printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+- "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+- panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+- }
+- if (!address) {
+- printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+- " found in MADT table, skipping!\n");
++ if (bad_ioapic(address))
+ return;
+- }
+
+ idx = nr_ioapics++;
+
+@@ -784,7 +677,7 @@ void __init mp_register_ioapic (
+ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+ #endif
+ mp_ioapics[idx].mpc_apicid = id;
+- mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
++ mp_ioapics[idx].mpc_apicver = 0;
+
+ /*
+ * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
+@@ -795,21 +688,15 @@ void __init mp_register_ioapic (
+ mp_ioapic_routing[idx].gsi_end = gsi_base +
+ io_apic_get_redir_entries(idx);
+
+- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
++ printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
+ "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
+- mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
++ mp_ioapics[idx].mpc_apicaddr,
+ mp_ioapic_routing[idx].gsi_start,
+ mp_ioapic_routing[idx].gsi_end);
+-
+- return;
+ }
+
+-
+-void __init mp_override_legacy_irq (
+- u8 bus_irq,
+- u8 polarity,
+- u8 trigger,
+- u32 gsi)
++void __init
++mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
+ {
+ struct mpc_config_intsrc intsrc;
+ int ioapic = -1;
+@@ -847,22 +734,18 @@ void __init mp_override_legacy_irq (
+ mp_irqs[mp_irq_entries] = intsrc;
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+-
+- return;
+ }
+
+-
+-void __init mp_config_acpi_legacy_irqs (void)
++void __init mp_config_acpi_legacy_irqs(void)
+ {
+ struct mpc_config_intsrc intsrc;
+- int i = 0;
+- int ioapic = -1;
++ int i = 0;
++ int ioapic = -1;
+
+ /*
+ * Fabricate the legacy ISA bus (bus #31).
+ */
+- mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+- Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
++ set_bit(MP_ISA_BUS, mp_bus_not_pci);
+
+ /*
+ * Locate the IOAPIC that manages the ISA IRQs (0-15).
+@@ -915,24 +798,13 @@ void __init mp_config_acpi_legacy_irqs (
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!\n");
+ }
+-
+- return;
+ }
+
+-#define MAX_GSI_NUM 4096
+-
+ int mp_register_gsi(u32 gsi, int triggering, int polarity)
+ {
+- int ioapic = -1;
+- int ioapic_pin = 0;
+- int idx, bit = 0;
+- static int pci_irq = 16;
+- /*
+- * Mapping between Global System Interrupts, which
+- * represent all possible interrupts, to the IRQs
+- * assigned to actual devices.
+- */
+- static int gsi_to_irq[MAX_GSI_NUM];
++ int ioapic = -1;
++ int ioapic_pin = 0;
++ int idx, bit = 0;
+
+ if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+ return gsi;
+@@ -965,47 +837,14 @@ int mp_register_gsi(u32 gsi, int trigger
+ if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+ Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+ mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+- return gsi_to_irq[gsi];
++ return gsi;
+ }
+
+ mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
+
+- if (triggering == ACPI_LEVEL_SENSITIVE) {
+- /*
+- * For PCI devices assign IRQs in order, avoiding gaps
+- * due to unused I/O APIC pins.
+- */
+- int irq = gsi;
+- if (gsi < MAX_GSI_NUM) {
+- /*
+- * Retain the VIA chipset work-around (gsi > 15), but
+- * avoid a problem where the 8254 timer (IRQ0) is setup
+- * via an override (so it's not on pin 0 of the ioapic),
+- * and at the same time, the pin 0 interrupt is a PCI
+- * type. The gsi > 15 test could cause these two pins
+- * to be shared as IRQ0, and they are not shareable.
+- * So test for this condition, and if necessary, avoid
+- * the pin collision.
+- */
+- if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
+- gsi = pci_irq++;
+- /*
+- * Don't assign IRQ used by ACPI SCI
+- */
+- if (gsi == acpi_fadt.sci_int)
+- gsi = pci_irq++;
+- gsi_to_irq[irq] = gsi;
+- } else {
+- printk(KERN_ERR "GSI %u is too high\n", gsi);
+- return gsi;
+- }
+- }
+-
+ io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+ triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+ polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+ return gsi;
+ }
+-
+-#endif /*CONFIG_X86_IO_APIC*/
+ #endif /*CONFIG_ACPI*/
+Index: 10.3-2007-11-26/arch/x86_64/kernel/pci-swiotlb-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/pci-swiotlb-xen.c 2007-05-31 14:39:08.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/pci-swiotlb-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -3,7 +3,8 @@
+ #include <linux/pci.h>
+ #include <linux/cache.h>
+ #include <linux/module.h>
+-#include <asm/dma-mapping.h>
++#include <linux/dma-mapping.h>
++
+ #include <asm/proto.h>
+ #include <asm/swiotlb.h>
+ #include <asm/dma.h>
+Index: 10.3-2007-11-26/arch/x86_64/kernel/process-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/process-xen.c 2007-05-31 14:39:08.000000000 +0200
++++ 10.3-2007-11-26/arch/x86_64/kernel/process-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -89,25 +89,24 @@ void idle_notifier_unregister(struct not
+ }
+ EXPORT_SYMBOL(idle_notifier_unregister);
+
+-enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
+-static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
+-
+ void enter_idle(void)
+ {
+- __get_cpu_var(idle_state) = CPU_IDLE;
++ write_pda(isidle, 1);
+ atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+ }
+
+ static void __exit_idle(void)
+ {
+- __get_cpu_var(idle_state) = CPU_NOT_IDLE;
++ if (test_and_clear_bit_pda(0, isidle) == 0)
++ return;
+ atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+ }
+
+ /* Called from interrupts to signify idle end */
+ void exit_idle(void)
+ {
+- if (current->pid | read_pda(irqcount))
++ /* idle loop has pid 0 */
++ if (current->pid)
+ return;
+ __exit_idle();
+ }
+@@ -184,6 +183,9 @@ void cpu_idle (void)
+ play_dead();
+ enter_idle();
+ idle();
++ /* In many cases the interrupt that ended idle
++ has already called exit_idle. But some idle
++ loops can be woken up without interrupt. */
+ __exit_idle();
+ }
+
+@@ -196,7 +198,7 @@ void cpu_idle (void)
+ void cpu_idle_wait(void)
+ {
+ unsigned int cpu, this_cpu = get_cpu();
+- cpumask_t map;
++ cpumask_t map, tmp = current->cpus_allowed;
+
+ set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
+ put_cpu();
+@@ -219,6 +221,8 @@ void cpu_idle_wait(void)
+ }
+ cpus_and(map, map, cpu_online_map);
+ } while (!cpus_empty(map));
++
++ set_cpus_allowed(current, tmp);
+ }
+ EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+@@ -250,9 +254,9 @@ void __show_regs(struct pt_regs * regs)
+ print_modules();
+ printk("Pid: %d, comm: %.20s %s %s %.*s\n",
+ current->pid, current->comm, print_tainted(),
+- system_utsname.release,
+- (int)strcspn(system_utsname.version, " "),
+- system_utsname.version);
++ init_utsname()->release,
++ (int)strcspn(init_utsname()->version, " "),
++ init_utsname()->version);
+ printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
+ printk_address(regs->rip);
+ printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
+@@ -310,6 +314,7 @@ void exit_thread(void)
+
+ kfree(t->io_bitmap_ptr);
+ t->io_bitmap_ptr = NULL;
++ clear_thread_flag(TIF_IO_BITMAP);
+ /*
+ * Careful, clear this in the TSS too:
+ */
+@@ -339,6 +344,7 @@ void flush_thread(void)
+ if (t->flags & _TIF_IA32)
+ current_thread_info()->status |= TS_COMPAT;
+ }
++ t->flags &= ~_TIF_DEBUG;
+
+ tsk->thread.debugreg0 = 0;
+ tsk->thread.debugreg1 = 0;
+@@ -431,7 +437,7 @@ int copy_thread(int nr, unsigned long cl
+ asm("mov %%es,%0" : "=m" (p->thread.es));
+ asm("mov %%ds,%0" : "=m" (p->thread.ds));
+
+- if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
++ if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
+ p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
+ if (!p->thread.io_bitmap_ptr) {
+ p->thread.io_bitmap_max = 0;
+@@ -439,6 +445,7 @@ int copy_thread(int nr, unsigned long cl
+ }
+ memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
+ IO_BITMAP_BYTES);
++ set_tsk_thread_flag(p, TIF_IO_BITMAP);
+ }
+
+ /*
+@@ -466,6 +473,30 @@ out:
+ }
+
+ /*
++ * This special macro can be used to load a debugging register
++ */
++#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
++
++static inline void __switch_to_xtra(struct task_struct *prev_p,
++ struct task_struct *next_p)
++{
++ struct thread_struct *prev, *next;
++
++ prev = &prev_p->thread,
++ next = &next_p->thread;
++
++ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
++ loaddebug(next, 0);
++ loaddebug(next, 1);
++ loaddebug(next, 2);
++ loaddebug(next, 3);
++ /* no 4 and 5 */
++ loaddebug(next, 6);
++ loaddebug(next, 7);
++ }
++}
++
++/*
+ * switch_to(x,y) should switch tasks from x to y.
+ *
+ * This could still be optimized:
+@@ -487,6 +518,10 @@ __switch_to(struct task_struct *prev_p,
+ struct physdev_set_iobitmap iobmp_op;
+ multicall_entry_t _mcl[8], *mcl = _mcl;
+
++ /* we're going to use this soon, after a few expensive things */
++ if (next_p->fpu_counter>5)
++ prefetch(&next->i387.fxsave);
++
+ /*
+ * This is basically '__unlazy_fpu', except that we queue a
+ * multicall to indicate FPU task switch, rather than
+@@ -499,7 +534,8 @@ __switch_to(struct task_struct *prev_p,
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = 1;
+ mcl++;
+- }
++ } else
++ prev_p->fpu_counter = 0;
+
+ /*
+ * Reload esp0, LDT and the page table pointer:
+@@ -576,21 +612,29 @@ __switch_to(struct task_struct *prev_p,
+ write_pda(oldrsp, next->userrsp);
+ write_pda(pcurrent, next_p);
+ write_pda(kernelstack,
+- task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
++ (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
++#ifdef CONFIG_CC_STACKPROTECTOR
++ write_pda(stack_canary, next_p->stack_canary);
++
++ /*
++ * Build time only check to make sure the stack_canary is at
++ * offset 40 in the pda; this is a gcc ABI requirement
++ */
++ BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
++#endif
+
+ /*
+ * Now maybe reload the debug registers
+ */
+- if (unlikely(next->debugreg7)) {
+- set_debugreg(next->debugreg0, 0);
+- set_debugreg(next->debugreg1, 1);
+- set_debugreg(next->debugreg2, 2);
+- set_debugreg(next->debugreg3, 3);
+- /* no 4 and 5 */
+- set_debugreg(next->debugreg6, 6);
+- set_debugreg(next->debugreg7, 7);
+- }
++ if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
++ __switch_to_xtra(prev_p, next_p);
+
++ /* If the task has used fpu the last 5 timeslices, just do a full
++ * restore of the math state immediately to avoid the trap; the
++ * chances of needing FPU soon are obviously high now
++ */
++ if (next_p->fpu_counter>5)
++ math_state_restore();
+ return prev_p;
+ }
+
+@@ -810,7 +854,7 @@ int dump_task_regs(struct task_struct *t
+
+ unsigned long arch_align_stack(unsigned long sp)
+ {
+- if (randomize_va_space)
++ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ sp -= get_random_int() % 8192;
+ return sp & ~0xf;
+ }
+Index: 10.3-2007-11-26/arch/x86_64/kernel/setup-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/setup-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/setup-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -118,16 +118,6 @@ EXPORT_SYMBOL(boot_cpu_data);
+
+ unsigned long mmu_cr4_features;
+
+-int acpi_disabled;
+-EXPORT_SYMBOL(acpi_disabled);
+-#ifdef CONFIG_ACPI
+-extern int __initdata acpi_ht;
+-extern acpi_interrupt_flags acpi_sci_flags;
+-int __initdata acpi_force = 0;
+-#endif
+-
+-int acpi_numa __initdata;
+-
+ /* Boot loader ID as an integer, for the benefit of proc_dointvec */
+ int bootloader_type;
+
+@@ -151,10 +141,6 @@ struct sys_desc_table_struct {
+
+ struct edid_info edid_info;
+ EXPORT_SYMBOL_GPL(edid_info);
+-struct e820map e820;
+-#ifdef CONFIG_XEN
+-struct e820map machine_e820;
+-#endif
+
+ extern int root_mountflags;
+
+@@ -181,9 +167,6 @@ struct resource standard_io_resources[]
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO }
+ };
+
+-#define STANDARD_IO_RESOURCES \
+- (sizeof standard_io_resources / sizeof standard_io_resources[0])
+-
+ #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
+
+ struct resource data_resource = {
+@@ -230,9 +213,6 @@ static struct resource adapter_rom_resou
+ .flags = IORESOURCE_ROM }
+ };
+
+-#define ADAPTER_ROM_RESOURCES \
+- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
+-
+ static struct resource video_rom_resource = {
+ .name = "Video ROM",
+ .start = 0xc0000,
+@@ -309,7 +289,8 @@ static void __init probe_roms(void)
+ }
+
+ /* check for adapter roms on 2k boundaries */
+- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
++ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
++ start += 2048) {
+ rom = isa_bus_to_virt(start);
+ if (!romsignature(rom))
+ continue;
+@@ -329,186 +310,21 @@ static void __init probe_roms(void)
+ }
+ }
+
+-/* Check for full argument with no trailing characters */
+-static int fullarg(char *p, char *arg)
++#ifdef CONFIG_PROC_VMCORE
++/* elfcorehdr= specifies the location of elf core header
++ * stored by the crashed kernel. This option will be passed
++ * by kexec loader to the capture kernel.
++ */
++static int __init setup_elfcorehdr(char *arg)
+ {
+- int l = strlen(arg);
+- return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
++ char *end;
++ if (!arg)
++ return -EINVAL;
++ elfcorehdr_addr = memparse(arg, &end);
++ return end > arg ? 0 : -EINVAL;
+ }
+-
+-static __init void parse_cmdline_early (char ** cmdline_p)
+-{
+- char c = ' ', *to = command_line, *from = COMMAND_LINE;
+- int len = 0;
+- int userdef = 0;
+-
+- for (;;) {
+- if (c != ' ')
+- goto next_char;
+-
+-#ifdef CONFIG_SMP
+- /*
+- * If the BIOS enumerates physical processors before logical,
+- * maxcpus=N at enumeration-time can be used to disable HT.
+- */
+- else if (!memcmp(from, "maxcpus=", 8)) {
+- extern unsigned int maxcpus;
+-
+- maxcpus = simple_strtoul(from + 8, NULL, 0);
+- }
+-#endif
+-#ifdef CONFIG_ACPI
+- /* "acpi=off" disables both ACPI table parsing and interpreter init */
+- if (fullarg(from,"acpi=off"))
+- disable_acpi();
+-
+- if (fullarg(from, "acpi=force")) {
+- /* add later when we do DMI horrors: */
+- acpi_force = 1;
+- acpi_disabled = 0;
+- }
+-
+- /* acpi=ht just means: do ACPI MADT parsing
+- at bootup, but don't enable the full ACPI interpreter */
+- if (fullarg(from, "acpi=ht")) {
+- if (!acpi_force)
+- disable_acpi();
+- acpi_ht = 1;
+- }
+- else if (fullarg(from, "pci=noacpi"))
+- acpi_disable_pci();
+- else if (fullarg(from, "acpi=noirq"))
+- acpi_noirq_set();
+-
+- else if (fullarg(from, "acpi_sci=edge"))
+- acpi_sci_flags.trigger = 1;
+- else if (fullarg(from, "acpi_sci=level"))
+- acpi_sci_flags.trigger = 3;
+- else if (fullarg(from, "acpi_sci=high"))
+- acpi_sci_flags.polarity = 1;
+- else if (fullarg(from, "acpi_sci=low"))
+- acpi_sci_flags.polarity = 3;
+-
+- /* acpi=strict disables out-of-spec workarounds */
+- else if (fullarg(from, "acpi=strict")) {
+- acpi_strict = 1;
+- }
+-#ifdef CONFIG_X86_IO_APIC
+- else if (fullarg(from, "acpi_skip_timer_override"))
+- acpi_skip_timer_override = 1;
+-#endif
+-#endif
+-
+-#ifndef CONFIG_XEN
+- if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
+- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+- disable_apic = 1;
+- }
+-
+- if (fullarg(from, "noapic"))
+- skip_ioapic_setup = 1;
+-
+- if (fullarg(from,"apic")) {
+- skip_ioapic_setup = 0;
+- ioapic_force = 1;
+- }
++early_param("elfcorehdr", setup_elfcorehdr);
+ #endif
+-
+- if (!memcmp(from, "mem=", 4))
+- parse_memopt(from+4, &from);
+-
+- if (!memcmp(from, "memmap=", 7)) {
+- /* exactmap option is for used defined memory */
+- if (!memcmp(from+7, "exactmap", 8)) {
+-#ifdef CONFIG_CRASH_DUMP
+- /* If we are doing a crash dump, we
+- * still need to know the real mem
+- * size before original memory map is
+- * reset.
+- */
+- saved_max_pfn = e820_end_of_ram();
+-#endif
+- from += 8+7;
+- end_pfn_map = 0;
+- e820.nr_map = 0;
+- userdef = 1;
+- }
+- else {
+- parse_memmapopt(from+7, &from);
+- userdef = 1;
+- }
+- }
+-
+-#ifdef CONFIG_NUMA
+- if (!memcmp(from, "numa=", 5))
+- numa_setup(from+5);
+-#endif
+-
+- if (!memcmp(from,"iommu=",6)) {
+- iommu_setup(from+6);
+- }
+-
+- if (fullarg(from,"oops=panic"))
+- panic_on_oops = 1;
+-
+- if (!memcmp(from, "noexec=", 7))
+- nonx_setup(from + 7);
+-
+-#ifdef CONFIG_KEXEC
+- /* crashkernel=size@addr specifies the location to reserve for
+- * a crash kernel. By reserving this memory we guarantee
+- * that linux never set's it up as a DMA target.
+- * Useful for holding code to do something appropriate
+- * after a kernel panic.
+- */
+- else if (!memcmp(from, "crashkernel=", 12)) {
+-#ifndef CONFIG_XEN
+- unsigned long size, base;
+- size = memparse(from+12, &from);
+- if (*from == '@') {
+- base = memparse(from+1, &from);
+- /* FIXME: Do I want a sanity check
+- * to validate the memory range?
+- */
+- crashk_res.start = base;
+- crashk_res.end = base + size - 1;
+- }
+-#else
+- printk("Ignoring crashkernel command line, "
+- "parameter will be supplied by xen\n");
+-#endif
+- }
+-#endif
+-
+-#ifdef CONFIG_PROC_VMCORE
+- /* elfcorehdr= specifies the location of elf core header
+- * stored by the crashed kernel. This option will be passed
+- * by kexec loader to the capture kernel.
+- */
+- else if(!memcmp(from, "elfcorehdr=", 11))
+- elfcorehdr_addr = memparse(from+11, &from);
+-#endif
+-
+-#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
+- else if (!memcmp(from, "additional_cpus=", 16))
+- setup_additional_cpus(from+16);
+-#endif
+-
+- next_char:
+- c = *(from++);
+- if (!c)
+- break;
+- if (COMMAND_LINE_SIZE <= ++len)
+- break;
+- *(to++) = c;
+- }
+- if (userdef) {
+- printk(KERN_INFO "user-defined physical RAM map:\n");
+- e820_print_map("user");
+- }
+- *to = '\0';
+- *cmdline_p = command_line;
+-}
+
+ #ifndef CONFIG_NUMA
+ static void __init
+@@ -521,10 +337,11 @@ contig_initmem_init(unsigned long start_
+ if (bootmap == -1L)
+ panic("Cannot find bootmem map of size %ld\n",bootmap_size);
+ bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
++ e820_register_active_regions(0, start_pfn, end_pfn);
+ #ifdef CONFIG_XEN
+- e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT);
++ free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
+ #else
+- e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
++ free_bootmem_with_active_regions(0, end_pfn);
+ #endif
+ reserve_bootmem(bootmap, bootmap_size);
+ }
+@@ -587,6 +404,10 @@ static void discover_ebda(void)
+ void __init setup_arch(char **cmdline_p)
+ {
+ #ifdef CONFIG_XEN
++ extern struct e820map machine_e820;
++
++ printk(KERN_INFO "Command line: %s\n", saved_command_line);
++
+ /* Register a call for panic conditions. */
+ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
+
+@@ -633,6 +454,8 @@ void __init setup_arch(char **cmdline_p)
+
+ ARCH_SETUP
+ #else
++ printk(KERN_INFO "Command line: %s\n", saved_command_line);
++
+ ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
+ screen_info = SCREEN_INFO;
+ edid_info = EDID_INFO;
+@@ -660,16 +483,22 @@ void __init setup_arch(char **cmdline_p)
+ data_resource.start = virt_to_phys(&_etext);
+ data_resource.end = virt_to_phys(&_edata)-1;
+
+- parse_cmdline_early(cmdline_p);
+-
+ early_identify_cpu(&boot_cpu_data);
+
++ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
++ *cmdline_p = command_line;
++
++ parse_early_param();
++
++ finish_e820_parsing();
++
++ e820_register_active_regions(0, 0, -1UL);
+ /*
+ * partially used pages are not usable - thus
+ * we are rounding upwards:
+ */
+ end_pfn = e820_end_of_ram();
+- num_physpages = end_pfn; /* for pfn_valid */
++ num_physpages = end_pfn;
+
+ check_efer();
+
+@@ -680,6 +509,14 @@ void __init setup_arch(char **cmdline_p)
+ if (is_initial_xendomain())
+ dmi_scan_machine();
+
++ /* How many end-of-memory variables you have, grandma! */
++ max_low_pfn = end_pfn;
++ max_pfn = end_pfn;
++ high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
++
++ /* Remove active ranges so rediscovery with NUMA-awareness happens */
++ remove_all_active_ranges();
++
+ #ifdef CONFIG_ACPI_NUMA
+ /*
+ * Parse SRAT to discover nodes.
+@@ -838,16 +675,16 @@ void __init setup_arch(char **cmdline_p)
+
+ }
+
++#ifdef CONFIG_ACPI
+ if (!is_initial_xendomain()) {
+ acpi_disabled = 1;
+-#ifdef CONFIG_ACPI
+ acpi_ht = 0;
+-#endif
+ }
+ #endif
++#endif
+
+-#ifndef CONFIG_XEN
+- check_ioapic();
++#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
++ early_quirks();
+ #endif
+
+ zap_low_mappings(0);
+@@ -907,6 +744,7 @@ void __init setup_arch(char **cmdline_p)
+ }
+ #else
+ e820_reserve_resources(e820.map, e820.nr_map);
++ e820_mark_nosave_regions();
+ #endif
+
+ request_resource(&iomem_resource, &video_ram_resource);
+@@ -914,7 +752,7 @@ void __init setup_arch(char **cmdline_p)
+ {
+ unsigned i;
+ /* request I/O space for devices used on all i[345]86 PCs */
+- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
++ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
+ request_resource(&ioport_resource, &standard_io_resources[i]);
+ }
+
+@@ -1099,7 +937,7 @@ static void __init amd_detect_cmp(struct
+ #endif
+ }
+
+-static void __init init_amd(struct cpuinfo_x86 *c)
++static void __cpuinit init_amd(struct cpuinfo_x86 *c)
+ {
+ unsigned level;
+
+@@ -1155,6 +993,12 @@ static void __init init_amd(struct cpuin
+
+ /* Fix cpuid4 emulation for more */
+ num_cache_leaves = 3;
++
++ /* When there is only one core no need to synchronize RDTSC */
++ if (num_possible_cpus() == 1)
++ set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
++ else
++ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+ }
+
+ static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
+@@ -1236,8 +1080,7 @@ static void srat_detect_node(void)
+ node = first_node(node_online_map);
+ numa_set_node(cpu, node);
+
+- if (acpi_numa > 0)
+- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
++ printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
+ #endif
+ }
+
+@@ -1271,6 +1114,8 @@ static void __cpuinit init_intel(struct
+ if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
+ (c->x86 == 0x6 && c->x86_model >= 0x0e))
+ set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
++ if (c->x86 == 6)
++ set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+ set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
+ c->x86_max_cores = intel_num_cpu_cores(c);
+
+@@ -1489,8 +1334,8 @@ static int show_cpuinfo(struct seq_file
+
+ /* Intel-defined (#2) */
+ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
+- "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
+- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
++ "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
++ NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+
+ /* VIA/Cyrix/Centaur-defined */
+Index: 10.3-2007-11-26/arch/x86_64/kernel/setup64-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/setup64-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/setup64-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -31,7 +31,7 @@
+ #include <asm/hypervisor.h>
+ #endif
+
+-char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
++char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
+
+ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
+
+@@ -55,8 +55,10 @@ Control non executable mappings for 64bi
+ on Enable(default)
+ off Disable
+ */
+-int __init nonx_setup(char *str)
++static int __init nonx_setup(char *str)
+ {
++ if (!str)
++ return -EINVAL;
+ if (!strncmp(str, "on", 2)) {
+ __supported_pte_mask |= _PAGE_NX;
+ do_not_nx = 0;
+@@ -64,9 +66,9 @@ int __init nonx_setup(char *str)
+ do_not_nx = 1;
+ __supported_pte_mask &= ~_PAGE_NX;
+ }
+- return 1;
++ return 0;
+ }
+-__setup("noexec=", nonx_setup); /* parsed early actually */
++early_param("noexec", nonx_setup);
+
+ int force_personality32 = 0;
+
+@@ -102,12 +104,9 @@ void __init setup_per_cpu_areas(void)
+ #endif
+
+ /* Copy section for each CPU (we discard the original) */
+- size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
+-#ifdef CONFIG_MODULES
+- if (size < PERCPU_ENOUGH_ROOM)
+- size = PERCPU_ENOUGH_ROOM;
+-#endif
++ size = PERCPU_ENOUGH_ROOM;
+
++ printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
+ for_each_cpu_mask (i, cpu_possible_map) {
+ char *ptr;
+
+@@ -169,7 +168,10 @@ void pda_init(int cpu)
+ /* Setup up data that may be needed in __get_free_pages early */
+ asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
+ #ifndef CONFIG_XEN
++ /* Memory clobbers used to order PDA accessed */
++ mb();
+ wrmsrl(MSR_GS_BASE, pda);
++ mb();
+ #else
+ HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, (unsigned long)pda);
+ #endif
+@@ -234,6 +236,8 @@ void __cpuinit check_efer(void)
+ }
+ }
+
++unsigned long kernel_eflags;
++
+ /*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+@@ -298,28 +302,17 @@ void __cpuinit cpu_init (void)
+ * set up and load the per-CPU TSS
+ */
+ for (v = 0; v < N_EXCEPTION_STACKS; v++) {
++ static const unsigned int order[N_EXCEPTION_STACKS] = {
++ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
++ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
++ };
+ if (cpu) {
+- static const unsigned int order[N_EXCEPTION_STACKS] = {
+- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+- };
+-
+ estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
+ if (!estacks)
+ panic("Cannot allocate exception stack %ld %d\n",
+ v, cpu);
+ }
+- switch (v + 1) {
+-#if DEBUG_STKSZ > EXCEPTION_STKSZ
+- case DEBUG_STACK:
+- cpu_pda(cpu)->debugstack = (unsigned long)estacks;
+- estacks += DEBUG_STKSZ;
+- break;
+-#endif
+- default:
+- estacks += EXCEPTION_STKSZ;
+- break;
+- }
++ estacks += PAGE_SIZE << order[v];
+ orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
+ }
+
+@@ -358,4 +351,6 @@ void __cpuinit cpu_init (void)
+ set_debugreg(0UL, 7);
+
+ fpu_init();
++
++ raw_local_save_flags(kernel_eflags);
+ }
+Index: 10.3-2007-11-26/arch/x86_64/kernel/smp-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/smp-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/smp-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -394,9 +394,8 @@ int smp_call_function_single (int cpu, v
+ /* prevent preemption and reschedule on another processor */
+ int me = get_cpu();
+ if (cpu == me) {
+- WARN_ON(1);
+ put_cpu();
+- return -EBUSY;
++ return 0;
+ }
+ spin_lock_bh(&call_lock);
+ __smp_call_function_single(cpu, func, info, nonatomic, wait);
+@@ -526,7 +525,7 @@ void smp_send_stop(void)
+ #ifndef CONFIG_XEN
+ asmlinkage void smp_reschedule_interrupt(void)
+ #else
+-asmlinkage irqreturn_t smp_reschedule_interrupt(void)
++asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
+ #endif
+ {
+ #ifndef CONFIG_XEN
+@@ -539,7 +538,7 @@ asmlinkage irqreturn_t smp_reschedule_in
+ #ifndef CONFIG_XEN
+ asmlinkage void smp_call_function_interrupt(void)
+ #else
+-asmlinkage irqreturn_t smp_call_function_interrupt(void)
++asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
+ #endif
+ {
+ void (*func) (void *info) = call_data->func;
+@@ -570,31 +569,3 @@ asmlinkage irqreturn_t smp_call_function
+ return IRQ_HANDLED;
+ #endif
+ }
+-
+-int safe_smp_processor_id(void)
+-{
+-#ifdef CONFIG_XEN
+- return smp_processor_id();
+-#else
+- unsigned apicid, i;
+-
+- if (disable_apic)
+- return 0;
+-
+- apicid = hard_smp_processor_id();
+- if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
+- return apicid;
+-
+- for (i = 0; i < NR_CPUS; ++i) {
+- if (x86_cpu_to_apicid[i] == apicid)
+- return i;
+- }
+-
+- /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
+- * or called too early. Either way, we must be CPU 0. */
+- if (x86_cpu_to_apicid[0] == BAD_APICID)
+- return 0;
+-
+- return 0; /* Should not happen */
+-#endif
+-}
+Index: 10.3-2007-11-26/arch/x86_64/kernel/traps-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/traps-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/traps-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -23,6 +23,7 @@
+ #include <linux/delay.h>
+ #include <linux/spinlock.h>
+ #include <linux/interrupt.h>
++#include <linux/kallsyms.h>
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+ #include <linux/nmi.h>
+@@ -45,6 +46,7 @@
+ #include <asm/pda.h>
+ #include <asm/proto.h>
+ #include <asm/nmi.h>
++#include <asm/stacktrace.h>
+
+ asmlinkage void divide_error(void);
+ asmlinkage void debug(void);
+@@ -114,7 +116,6 @@ static int call_trace = 1;
+ #endif
+
+ #ifdef CONFIG_KALLSYMS
+-# include <linux/kallsyms.h>
+ void printk_address(unsigned long address)
+ {
+ unsigned long offset = 0, symsize;
+@@ -142,7 +143,7 @@ void printk_address(unsigned long addres
+ #endif
+
+ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
+- unsigned *usedp, const char **idp)
++ unsigned *usedp, char **idp)
+ {
+ #ifndef CONFIG_X86_NO_TSS
+ static char ids[][8] = {
+@@ -162,26 +163,7 @@ static unsigned long *in_exception_stack
+ * 'stack' is in one of them:
+ */
+ for (k = 0; k < N_EXCEPTION_STACKS; k++) {
+- unsigned long end;
+-
+- /*
+- * set 'end' to the end of the exception stack.
+- */
+- switch (k + 1) {
+- /*
+- * TODO: this block is not needed i think, because
+- * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
+- * properly too.
+- */
+-#if DEBUG_STKSZ > EXCEPTION_STKSZ
+- case DEBUG_STACK:
+- end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
+- break;
+-#endif
+- default:
+- end = per_cpu(orig_ist, cpu).ist[k];
+- break;
+- }
++ unsigned long end = per_cpu(orig_ist, cpu).ist[k];
+ /*
+ * Is 'stack' above this exception frame's end?
+ * If yes then skip to the next frame.
+@@ -236,13 +218,19 @@ static unsigned long *in_exception_stack
+ return NULL;
+ }
+
+-static int show_trace_unwind(struct unwind_frame_info *info, void *context)
++struct ops_and_data {
++ struct stacktrace_ops *ops;
++ void *data;
++};
++
++static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
+ {
++ struct ops_and_data *oad = (struct ops_and_data *)context;
+ int n = 0;
+
+ while (unwind(info) == 0 && UNW_PC(info)) {
+ n++;
+- printk_address(UNW_PC(info));
++ oad->ops->address(oad->data, UNW_PC(info));
+ if (arch_unw_user_mode(info))
+ break;
+ }
+@@ -256,13 +244,19 @@ static int show_trace_unwind(struct unwi
+ * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
+ */
+
+-void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
++static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+ {
+- const unsigned cpu = safe_smp_processor_id();
++ void *t = (void *)tinfo;
++ return p > t && p < t + THREAD_SIZE - 3;
++}
++
++void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
++ struct stacktrace_ops *ops, void *data)
++{
++ const unsigned cpu = smp_processor_id();
+ unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
+ unsigned used = 0;
+-
+- printk("\nCall Trace:\n");
++ struct thread_info *tinfo;
+
+ if (!tsk)
+ tsk = current;
+@@ -270,32 +264,47 @@ void show_trace(struct task_struct *tsk,
+ if (call_trace >= 0) {
+ int unw_ret = 0;
+ struct unwind_frame_info info;
++ struct ops_and_data oad = { .ops = ops, .data = data };
+
+ if (regs) {
+ if (unwind_init_frame_info(&info, tsk, regs) == 0)
+- unw_ret = show_trace_unwind(&info, NULL);
++ unw_ret = dump_trace_unwind(&info, &oad);
+ } else if (tsk == current)
+- unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
++ unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
+ else {
+ if (unwind_init_blocked(&info, tsk) == 0)
+- unw_ret = show_trace_unwind(&info, NULL);
++ unw_ret = dump_trace_unwind(&info, &oad);
+ }
+ if (unw_ret > 0) {
+ if (call_trace == 1 && !arch_unw_user_mode(&info)) {
+- print_symbol("DWARF2 unwinder stuck at %s\n",
++ ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
+ UNW_PC(&info));
+ if ((long)UNW_SP(&info) < 0) {
+- printk("Leftover inexact backtrace:\n");
++ ops->warning(data, "Leftover inexact backtrace:\n");
+ stack = (unsigned long *)UNW_SP(&info);
++ if (!stack)
++ return;
+ } else
+- printk("Full inexact backtrace again:\n");
++ ops->warning(data, "Full inexact backtrace again:\n");
+ } else if (call_trace >= 1)
+ return;
+ else
+- printk("Full inexact backtrace again:\n");
++ ops->warning(data, "Full inexact backtrace again:\n");
+ } else
+- printk("Inexact backtrace:\n");
++ ops->warning(data, "Inexact backtrace:\n");
++ }
++ if (!stack) {
++ unsigned long dummy;
++ stack = &dummy;
++ if (tsk && tsk != current)
++ stack = (unsigned long *)tsk->thread.rsp;
+ }
++ /*
++ * Align the stack pointer on word boundary, later loops
++ * rely on that (and corruption / debug info bugs can cause
++ * unaligned values here):
++ */
++ stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
+
+ /*
+ * Print function call entries within a stack. 'cond' is the
+@@ -305,7 +314,9 @@ void show_trace(struct task_struct *tsk,
+ #define HANDLE_STACK(cond) \
+ do while (cond) { \
+ unsigned long addr = *stack++; \
+- if (kernel_text_address(addr)) { \
++ if (oops_in_progress ? \
++ __kernel_text_address(addr) : \
++ kernel_text_address(addr)) { \
+ /* \
+ * If the address is either in the text segment of the \
+ * kernel, or in the region which contains vmalloc'ed \
+@@ -314,7 +325,7 @@ void show_trace(struct task_struct *tsk,
+ * down the cause of the crash will be able to figure \
+ * out the call path that was taken. \
+ */ \
+- printk_address(addr); \
++ ops->address(data, addr); \
+ } \
+ } while (0)
+
+@@ -323,16 +334,17 @@ void show_trace(struct task_struct *tsk,
+ * current stack address. If the stacks consist of nested
+ * exceptions
+ */
+- for ( ; ; ) {
+- const char *id;
++ for (;;) {
++ char *id;
+ unsigned long *estack_end;
+ estack_end = in_exception_stack(cpu, (unsigned long)stack,
+ &used, &id);
+
+ if (estack_end) {
+- printk(" <%s>", id);
++ if (ops->stack(data, id) < 0)
++ break;
+ HANDLE_STACK (stack < estack_end);
+- printk(" <EOE>");
++ ops->stack(data, "<EOE>");
+ /*
+ * We link to the next stack via the
+ * second-to-last pointer (index -2 to end) in the
+@@ -347,7 +359,8 @@ void show_trace(struct task_struct *tsk,
+ (IRQSTACKSIZE - 64) / sizeof(*irqstack);
+
+ if (stack >= irqstack && stack < irqstack_end) {
+- printk(" <IRQ>");
++ if (ops->stack(data, "IRQ") < 0)
++ break;
+ HANDLE_STACK (stack < irqstack_end);
+ /*
+ * We link to the next stack (which would be
+@@ -356,7 +369,7 @@ void show_trace(struct task_struct *tsk,
+ */
+ stack = (unsigned long *) (irqstack_end[-1]);
+ irqstack_end = NULL;
+- printk(" <EOI>");
++ ops->stack(data, "EOI");
+ continue;
+ }
+ }
+@@ -364,19 +377,58 @@ void show_trace(struct task_struct *tsk,
+ }
+
+ /*
+- * This prints the process stack:
++ * This handles the process stack:
+ */
+- HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
++ tinfo = current_thread_info();
++ HANDLE_STACK (valid_stack_ptr(tinfo, stack));
+ #undef HANDLE_STACK
++}
++EXPORT_SYMBOL(dump_trace);
++
++static void
++print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
++{
++ print_symbol(msg, symbol);
++ printk("\n");
++}
++
++static void print_trace_warning(void *data, char *msg)
++{
++ printk("%s\n", msg);
++}
++
++static int print_trace_stack(void *data, char *name)
++{
++ printk(" <%s> ", name);
++ return 0;
++}
++
++static void print_trace_address(void *data, unsigned long addr)
++{
++ printk_address(addr);
++}
++
++static struct stacktrace_ops print_trace_ops = {
++ .warning = print_trace_warning,
++ .warning_symbol = print_trace_warning_symbol,
++ .stack = print_trace_stack,
++ .address = print_trace_address,
++};
+
++void
++show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
++{
++ printk("\nCall Trace:\n");
++ dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
+ printk("\n");
+ }
+
+-static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
++static void
++_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
+ {
+ unsigned long *stack;
+ int i;
+- const int cpu = safe_smp_processor_id();
++ const int cpu = smp_processor_id();
+ unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
+ unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
+
+@@ -430,7 +482,7 @@ void show_registers(struct pt_regs *regs
+ int i;
+ int in_kernel = !user_mode(regs);
+ unsigned long rsp;
+- const int cpu = safe_smp_processor_id();
++ const int cpu = smp_processor_id();
+ struct task_struct *cur = cpu_pda(cpu)->pcurrent;
+
+ rsp = regs->rsp;
+@@ -505,9 +557,11 @@ static unsigned int die_nest_count;
+
+ unsigned __kprobes long oops_begin(void)
+ {
+- int cpu = safe_smp_processor_id();
++ int cpu = smp_processor_id();
+ unsigned long flags;
+
++ oops_enter();
++
+ /* racy, but better than risking deadlock. */
+ local_irq_save(flags);
+ if (!spin_trylock(&die_lock)) {
+@@ -536,6 +590,7 @@ void __kprobes oops_end(unsigned long fl
+ spin_unlock_irqrestore(&die_lock, flags);
+ if (panic_on_oops)
+ panic("Fatal exception");
++ oops_exit();
+ }
+
+ void __kprobes __die(const char * str, struct pt_regs * regs, long err)
+@@ -573,7 +628,7 @@ void die(const char * str, struct pt_reg
+ }
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+-void __kprobes die_nmi(char *str, struct pt_regs *regs)
++void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
+ {
+ unsigned long flags = oops_begin();
+
+@@ -581,13 +636,12 @@ void __kprobes die_nmi(char *str, struct
+ * We are in trouble anyway, lets at least try
+ * to get a message out.
+ */
+- printk(str, safe_smp_processor_id());
++ printk(str, smp_processor_id());
+ show_registers(regs);
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
+- if (panic_on_timeout || panic_on_oops)
+- panic("nmi watchdog");
+- printk("console shuts up ...\n");
++ if (do_panic || panic_on_oops)
++ panic("Non maskable interrupt");
+ oops_end(flags);
+ nmi_exit();
+ local_irq_enable();
+@@ -734,8 +788,15 @@ asmlinkage void __kprobes do_general_pro
+ static __kprobes void
+ mem_parity_error(unsigned char reason, struct pt_regs * regs)
+ {
+- printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
+- printk("You probably have a hardware problem with your RAM chips\n");
++ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
++ reason);
++ printk(KERN_EMERG "You probably have a hardware problem with your "
++ "RAM chips\n");
++
++ if (panic_on_unrecovered_nmi)
++ panic("NMI: Not continuing");
++
++ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+
+ #if 0 /* XEN */
+ /* Clear and disable the memory parity error line. */
+@@ -762,9 +823,15 @@ io_check_error(unsigned char reason, str
+
+ static __kprobes void
+ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+-{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
+- printk("Dazed and confused, but trying to continue\n");
+- printk("Do you have a strange power saving mode enabled?\n");
++{
++ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
++ reason);
++ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
++
++ if (panic_on_unrecovered_nmi)
++ panic("NMI: Not continuing");
++
++ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ }
+
+ /* Runs on IST stack. This code must keep interrupts off all the time.
+@@ -789,12 +856,12 @@ asmlinkage __kprobes void default_do_nmi
+ * Ok, so this is none of the documented NMI sources,
+ * so it must be the NMI watchdog.
+ */
+- if (nmi_watchdog > 0) {
+- nmi_watchdog_tick(regs,reason);
++ if (nmi_watchdog_tick(regs,reason))
+ return;
+- }
+ #endif
+- unknown_nmi_error(reason, regs);
++ if (!do_nmi_callback(regs,cpu))
++ unknown_nmi_error(reason, regs);
++
+ return;
+ }
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+@@ -1081,6 +1148,7 @@ asmlinkage void math_state_restore(void)
+ init_fpu(me);
+ restore_fpu_checking(&me->thread.i387.fxsave);
+ task_thread_info(me)->status |= TS_USEDFPU;
++ me->fpu_counter++;
+ }
+
+
+@@ -1143,24 +1211,30 @@ void smp_trap_init(trap_info_t *trap_ctx
+ }
+
+
+-/* Actual parsing is done early in setup.c. */
+-static int __init oops_dummy(char *s)
++static int __init oops_setup(char *s)
+ {
+- panic_on_oops = 1;
+- return 1;
++ if (!s)
++ return -EINVAL;
++ if (!strcmp(s, "panic"))
++ panic_on_oops = 1;
++ return 0;
+ }
+-__setup("oops=", oops_dummy);
++early_param("oops", oops_setup);
+
+ static int __init kstack_setup(char *s)
+ {
++ if (!s)
++ return -EINVAL;
+ kstack_depth_to_print = simple_strtoul(s,NULL,0);
+- return 1;
++ return 0;
+ }
+-__setup("kstack=", kstack_setup);
++early_param("kstack", kstack_setup);
+
+ #ifdef CONFIG_STACK_UNWIND
+ static int __init call_trace_setup(char *s)
+ {
++ if (!s)
++ return -EINVAL;
+ if (strcmp(s, "old") == 0)
+ call_trace = -1;
+ else if (strcmp(s, "both") == 0)
+@@ -1169,7 +1243,7 @@ static int __init call_trace_setup(char
+ call_trace = 1;
+ else if (strcmp(s, "new") == 0)
+ call_trace = 2;
+- return 1;
++ return 0;
+ }
+-__setup("call_trace=", call_trace_setup);
++early_param("call_trace", call_trace_setup);
+ #endif
+Index: 10.3-2007-11-26/arch/x86_64/kernel/vsyscall-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/kernel/vsyscall-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/kernel/vsyscall-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -26,6 +26,10 @@
+ #include <linux/seqlock.h>
+ #include <linux/jiffies.h>
+ #include <linux/sysctl.h>
++#include <linux/getcpu.h>
++#include <linux/cpu.h>
++#include <linux/smp.h>
++#include <linux/notifier.h>
+
+ #include <asm/vsyscall.h>
+ #include <asm/pgtable.h>
+@@ -33,11 +37,15 @@
+ #include <asm/fixmap.h>
+ #include <asm/errno.h>
+ #include <asm/io.h>
++#include <asm/segment.h>
++#include <asm/desc.h>
++#include <asm/topology.h>
+
+ #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
+
+ int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
+ seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
++int __vgetcpu_mode __section_vgetcpu_mode;
+
+ #include <asm/unistd.h>
+
+@@ -61,8 +69,7 @@ static __always_inline void do_vgettimeo
+ sequence = read_seqbegin(&__xtime_lock);
+
+ sec = __xtime.tv_sec;
+- usec = (__xtime.tv_nsec / 1000) +
+- (__jiffies - __wall_jiffies) * (1000000 / HZ);
++ usec = __xtime.tv_nsec / 1000;
+
+ if (__vxtime.mode != VXTIME_HPET) {
+ t = get_cycles_sync();
+@@ -72,7 +79,8 @@ static __always_inline void do_vgettimeo
+ __vxtime.tsc_quot) >> 32;
+ /* See comment in x86_64 do_gettimeofday. */
+ } else {
+- usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
++ usec += ((readl((void __iomem *)
++ fix_to_virt(VSYSCALL_HPET) + 0xf0) -
+ __vxtime.last) * __vxtime.quot) >> 32;
+ }
+ } while (read_seqretry(&__xtime_lock, sequence));
+@@ -127,9 +135,46 @@ time_t __vsyscall(1) vtime(time_t *t)
+ return __xtime.tv_sec;
+ }
+
+-long __vsyscall(2) venosys_0(void)
+-{
+- return -ENOSYS;
++/* Fast way to get current CPU and node.
++ This helps to do per node and per CPU caches in user space.
++ The result is not guaranteed without CPU affinity, but usually
++ works out because the scheduler tries to keep a thread on the same
++ CPU.
++
++ tcache must point to a two element sized long array.
++ All arguments can be NULL. */
++long __vsyscall(2)
++vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
++{
++ unsigned int dummy, p;
++ unsigned long j = 0;
++
++ /* Fast cache - only recompute value once per jiffies and avoid
++ relatively costly rdtscp/cpuid otherwise.
++ This works because the scheduler usually keeps the process
++ on the same CPU and this syscall doesn't guarantee its
++ results anyways.
++ We do this here because otherwise user space would do it on
++ its own in a likely inferior way (no access to jiffies).
++ If you don't like it pass NULL. */
++ if (tcache && tcache->blob[0] == (j = __jiffies)) {
++ p = tcache->blob[1];
++ } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
++ /* Load per CPU data from RDTSCP */
++ rdtscp(dummy, dummy, p);
++ } else {
++ /* Load per CPU data from GDT */
++ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
++ }
++ if (tcache) {
++ tcache->blob[0] = j;
++ tcache->blob[1] = p;
++ }
++ if (cpu)
++ *cpu = p & 0xfff;
++ if (node)
++ *node = p >> 12;
++ return 0;
+ }
+
+ long __vsyscall(3) venosys_1(void)
+@@ -149,7 +194,8 @@ static int vsyscall_sysctl_change(ctl_ta
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ extern u16 vsysc1, vsysc2;
+- u16 *map1, *map2;
++ u16 __iomem *map1;
++ u16 __iomem *map2;
+ int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ if (!write)
+ return ret;
+@@ -164,11 +210,11 @@ static int vsyscall_sysctl_change(ctl_ta
+ goto out;
+ }
+ if (!sysctl_vsyscall) {
+- *map1 = SYSCALL;
+- *map2 = SYSCALL;
++ writew(SYSCALL, map1);
++ writew(SYSCALL, map2);
+ } else {
+- *map1 = NOP2;
+- *map2 = NOP2;
++ writew(NOP2, map1);
++ writew(NOP2, map2);
+ }
+ iounmap(map2);
+ out:
+@@ -200,6 +246,45 @@ static ctl_table kernel_root_table2[] =
+
+ #endif
+
++/* Assume __initcall executes before all user space. Hopefully kmod
++ doesn't violate that. We'll find out if it does. */
++static void __cpuinit vsyscall_set_cpu(int cpu)
++{
++ unsigned long d;
++ unsigned long node = 0;
++#ifdef CONFIG_NUMA
++ node = cpu_to_node[cpu];
++#endif
++ if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
++ write_rdtscp_aux((node << 12) | cpu);
++
++ /* Store cpu number in limit so that it can be loaded quickly
++ in user space in vgetcpu.
++ 12 bits for the CPU and 8 bits for the node. */
++ d = 0x0f40000000000ULL;
++ d |= cpu;
++ d |= (node & 0xf) << 12;
++ d |= (node >> 4) << 48;
++ HYPERVISOR_update_descriptor(virt_to_machine(&cpu_gdt(cpu)[GDT_ENTRY_PER_CPU]), d);
++}
++
++static void __cpuinit cpu_vsyscall_init(void *arg)
++{
++ /* preemption should be already off */
++ vsyscall_set_cpu(raw_smp_processor_id());
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++static int __cpuinit
++cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
++{
++ long cpu = (long)arg;
++ if (action == CPU_ONLINE)
++ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
++ return NOTIFY_DONE;
++}
++#endif
++
+ static void __init map_vsyscall(void)
+ {
+ extern char __vsyscall_0;
+@@ -225,14 +310,21 @@ static int __init vsyscall_init(void)
+ VSYSCALL_ADDR(__NR_vgettimeofday)));
+ BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
+ BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
++ BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
+ map_vsyscall();
+ #ifdef CONFIG_XEN
+ map_vsyscall_user();
+ sysctl_vsyscall = 0; /* disable vgettimeofay() */
++ if (boot_cpu_has(X86_FEATURE_RDTSCP))
++ vgetcpu_mode = VGETCPU_RDTSCP;
++ else
++ vgetcpu_mode = VGETCPU_LSL;
+ #endif
+ #ifdef CONFIG_SYSCTL
+ register_sysctl_table(kernel_root_table2, 0);
+ #endif
++ on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
++ hotcpu_notifier(cpu_vsyscall_notifier, 0);
+ return 0;
+ }
+
+Index: 10.3-2007-11-26/arch/x86_64/mm/fault-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/mm/fault-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/mm/fault-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -40,8 +40,7 @@
+ #define PF_RSVD (1<<3)
+ #define PF_INSTR (1<<4)
+
+-#ifdef CONFIG_KPROBES
+-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
++static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
+
+ /* Hook to register for page fault notifications */
+ int register_page_fault_notifier(struct notifier_block *nb)
+@@ -49,11 +48,13 @@ int register_page_fault_notifier(struct
+ vmalloc_sync_all();
+ return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
+ }
++EXPORT_SYMBOL_GPL(register_page_fault_notifier);
+
+ int unregister_page_fault_notifier(struct notifier_block *nb)
+ {
+ return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
+ }
++EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
+
+ static inline int notify_page_fault(enum die_val val, const char *str,
+ struct pt_regs *regs, long err, int trap, int sig)
+@@ -67,13 +68,6 @@ static inline int notify_page_fault(enum
+ };
+ return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+ }
+-#else
+-static inline int notify_page_fault(enum die_val val, const char *str,
+- struct pt_regs *regs, long err, int trap, int sig)
+-{
+- return NOTIFY_DONE;
+-}
+-#endif
+
+ void bust_spinlocks(int yes)
+ {
+@@ -102,7 +96,7 @@ void bust_spinlocks(int yes)
+ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
+ unsigned long error_code)
+ {
+- unsigned char *instr;
++ unsigned char __user *instr;
+ int scan_more = 1;
+ int prefetch = 0;
+ unsigned char *max_instr;
+@@ -111,7 +105,7 @@ static noinline int is_prefetch(struct p
+ if (error_code & PF_INSTR)
+ return 0;
+
+- instr = (unsigned char *)convert_rip_to_linear(current, regs);
++ instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
+ max_instr = instr + 15;
+
+ if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+@@ -122,7 +116,7 @@ static noinline int is_prefetch(struct p
+ unsigned char instr_hi;
+ unsigned char instr_lo;
+
+- if (__get_user(opcode, instr))
++ if (__get_user(opcode, (char __user *)instr))
+ break;
+
+ instr_hi = opcode & 0xf0;
+@@ -160,7 +154,7 @@ static noinline int is_prefetch(struct p
+ case 0x00:
+ /* Prefetch instruction is 0x0F0D or 0x0F18 */
+ scan_more = 0;
+- if (__get_user(opcode, instr))
++ if (__get_user(opcode, (char __user *)instr))
+ break;
+ prefetch = (instr_lo == 0xF) &&
+ (opcode == 0x0D || opcode == 0x18);
+@@ -176,7 +170,7 @@ static noinline int is_prefetch(struct p
+ static int bad_address(void *p)
+ {
+ unsigned long dummy;
+- return __get_user(dummy, (unsigned long *)p);
++ return __get_user(dummy, (unsigned long __user *)p);
+ }
+
+ void dump_pagetable(unsigned long address)
+@@ -248,7 +242,7 @@ static int is_errata93(struct pt_regs *r
+
+ int unhandled_signal(struct task_struct *tsk, int sig)
+ {
+- if (tsk->pid == 1)
++ if (is_init(tsk))
+ return 1;
+ if (tsk->ptrace & PT_PTRACED)
+ return 0;
+@@ -300,7 +294,7 @@ static int vmalloc_fault(unsigned long a
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
++ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+
+ /* Below here mismatches are bugs because these lower tables
+ are shared */
+@@ -309,7 +303,7 @@ static int vmalloc_fault(unsigned long a
+ pud_ref = pud_offset(pgd_ref, address);
+ if (pud_none(*pud_ref))
+ return -1;
+- if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
++ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+ BUG();
+ pmd = pmd_offset(pud, address);
+ pmd_ref = pmd_offset(pud_ref, address);
+@@ -531,7 +525,7 @@ good_area:
+ case PF_PROT: /* read, present */
+ goto bad_area;
+ case 0: /* read, not present */
+- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
++ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+ goto bad_area;
+ }
+
+@@ -647,7 +641,7 @@ no_context:
+ */
+ out_of_memory:
+ up_read(&mm->mmap_sem);
+- if (current->pid == 1) {
++ if (is_init(current)) {
+ yield();
+ goto again;
+ }
+@@ -702,7 +696,7 @@ void vmalloc_sync_all(void)
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
++ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
+ spin_unlock(&pgd_lock);
+ set_bit(pgd_index(address), insync);
+Index: 10.3-2007-11-26/arch/x86_64/mm/init-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/mm/init-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/mm/init-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -61,8 +61,6 @@ EXPORT_SYMBOL(__kernel_page_user);
+
+ extern unsigned long *contiguous_bitmap;
+
+-static unsigned long dma_reserve __initdata;
+-
+ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+ extern unsigned long start_pfn;
+
+@@ -440,7 +438,6 @@ __init void *early_ioremap(unsigned long
+
+ /* actually usually some more */
+ if (size >= LARGE_PAGE_SIZE) {
+- printk("SMBIOS area too long %lu\n", size);
+ return NULL;
+ }
+ set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
+@@ -462,13 +459,15 @@ __init void early_iounmap(void *addr, un
+ #endif
+
+ static void __meminit
+-phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
++phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
+ {
+- int i, k;
++ int i = pmd_index(address);
+
+- for (i = 0; i < PTRS_PER_PMD; pmd++, i++) {
++ for (; i < PTRS_PER_PMD; i++) {
+ unsigned long pte_phys;
++ pmd_t *pmd = pmd_page + pmd_index(address);
+ pte_t *pte, *pte_save;
++ int k;
+
+ if (address >= end) {
+ if (!after_bootmem)
+@@ -476,6 +475,10 @@ phys_pmd_init(pmd_t *pmd, unsigned long
+ set_pmd(pmd, __pmd(0));
+ break;
+ }
++
++ if (pmd_val(*pmd))
++ continue;
++
+ pte = alloc_static_page(&pte_phys);
+ pte_save = pte;
+ for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
+@@ -501,40 +504,35 @@ phys_pmd_init(pmd_t *pmd, unsigned long
+ static void __meminit
+ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+ {
+- pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
+-
+- if (pmd_none(*pmd)) {
+- spin_lock(&init_mm.page_table_lock);
+- phys_pmd_init(pmd, address, end);
+- spin_unlock(&init_mm.page_table_lock);
+- __flush_tlb_all();
+- }
++ pmd_t *pmd = pmd_offset(pud,0);
++ spin_lock(&init_mm.page_table_lock);
++ phys_pmd_init(pmd, address, end);
++ spin_unlock(&init_mm.page_table_lock);
++ __flush_tlb_all();
+ }
+
+-static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
++static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+ {
+- long i = pud_index(address);
+-
+- pud = pud + i;
+-
+- if (after_bootmem && pud_val(*pud)) {
+- phys_pmd_update(pud, address, end);
+- return;
+- }
++ int i = pud_index(addr);
+
+- for (; i < PTRS_PER_PUD; pud++, i++) {
+- unsigned long paddr, pmd_phys;
++ for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
++ unsigned long pmd_phys;
++ pud_t *pud = pud_page + pud_index(addr);
+ pmd_t *pmd;
+
+- paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
+- if (paddr >= end)
++ if (addr >= end)
+ break;
+
++ if (pud_val(*pud)) {
++ phys_pmd_update(pud, addr, end);
++ continue;
++ }
++
+ pmd = alloc_static_page(&pmd_phys);
+ early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
+ spin_lock(&init_mm.page_table_lock);
+ set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
+- phys_pmd_init(pmd, paddr, end);
++ phys_pmd_init(pmd, addr, end);
+ spin_unlock(&init_mm.page_table_lock);
+ }
+ __flush_tlb();
+@@ -797,77 +795,19 @@ void __cpuinit zap_low_mappings(int cpu)
+ #endif
+ }
+
+-/* Compute zone sizes for the DMA and DMA32 zones in a node. */
+-__init void
+-size_zones(unsigned long *z, unsigned long *h,
+- unsigned long start_pfn, unsigned long end_pfn)
+-{
+- int i;
+-#ifndef CONFIG_XEN
+- unsigned long w;
+-#endif
+-
+- for (i = 0; i < MAX_NR_ZONES; i++)
+- z[i] = 0;
+-
+-#ifndef CONFIG_XEN
+- if (start_pfn < MAX_DMA_PFN)
+- z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
+- if (start_pfn < MAX_DMA32_PFN) {
+- unsigned long dma32_pfn = MAX_DMA32_PFN;
+- if (dma32_pfn > end_pfn)
+- dma32_pfn = end_pfn;
+- z[ZONE_DMA32] = dma32_pfn - start_pfn;
+- }
+- z[ZONE_NORMAL] = end_pfn - start_pfn;
+-
+- /* Remove lower zones from higher ones. */
+- w = 0;
+- for (i = 0; i < MAX_NR_ZONES; i++) {
+- if (z[i])
+- z[i] -= w;
+- w += z[i];
+- }
+-
+- /* Compute holes */
+- w = start_pfn;
+- for (i = 0; i < MAX_NR_ZONES; i++) {
+- unsigned long s = w;
+- w += z[i];
+- h[i] = e820_hole_size(s, w);
+- }
+-
+- /* Add the space pace needed for mem_map to the holes too. */
+- for (i = 0; i < MAX_NR_ZONES; i++)
+- h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
+-
+- /* The 16MB DMA zone has the kernel and other misc mappings.
+- Account them too */
+- if (h[ZONE_DMA]) {
+- h[ZONE_DMA] += dma_reserve;
+- if (h[ZONE_DMA] >= z[ZONE_DMA]) {
+- printk(KERN_WARNING
+- "Kernel too large and filling up ZONE_DMA?\n");
+- h[ZONE_DMA] = z[ZONE_DMA];
+- }
+- }
+-#else
+- z[ZONE_DMA] = end_pfn;
+- for (i = 0; i < MAX_NR_ZONES; i++)
+- h[i] = 0;
+-#endif
+-}
+-
+ #ifndef CONFIG_NUMA
+ void __init paging_init(void)
+ {
+- unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
++ unsigned long max_zone_pfns[MAX_NR_ZONES];
++
++ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
++ max_zone_pfns[ZONE_DMA] = end_pfn;
++ max_zone_pfns[ZONE_DMA32] = end_pfn;
++ max_zone_pfns[ZONE_NORMAL] = end_pfn;
+
+ memory_present(0, 0, end_pfn);
+ sparse_init();
+- size_zones(zones, holes, 0, end_pfn);
+- free_area_init_node(0, NODE_DATA(0), zones,
+- __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
++ free_area_init_nodes(max_zone_pfns);
+
+ init_mm.context.pinned = 1;
+ }
+@@ -921,36 +861,23 @@ void online_page(struct page *page)
+
+ #ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+- * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
+- * via probe interface of sysfs. If acpi notifies hot-add event, then it
+- * can tell node id by searching dsdt. But, probe interface doesn't have
+- * node id. So, return 0 as node id at this time.
+- */
+-#ifdef CONFIG_NUMA
+-int memory_add_physaddr_to_nid(u64 start)
+-{
+- return 0;
+-}
+-#endif
+-
+-/*
+ * Memory is added always to NORMAL zone. This means you will never get
+ * additional DMA/DMA32 memory.
+ */
+ int arch_add_memory(int nid, u64 start, u64 size)
+ {
+ struct pglist_data *pgdat = NODE_DATA(nid);
+- struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
++ struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ int ret;
+
++ init_memory_mapping(start, (start + size -1));
++
+ ret = __add_pages(zone, start_pfn, nr_pages);
+ if (ret)
+ goto error;
+
+- init_memory_mapping(start, (start + size -1));
+-
+ return ret;
+ error:
+ printk("%s: Problem encountered in __add_pages!\n", __func__);
+@@ -964,7 +891,17 @@ int remove_memory(u64 start, u64 size)
+ }
+ EXPORT_SYMBOL_GPL(remove_memory);
+
+-#else /* CONFIG_MEMORY_HOTPLUG */
++#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
++int memory_add_physaddr_to_nid(u64 start)
++{
++ return 0;
++}
++EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
++#endif
++
++#endif /* CONFIG_MEMORY_HOTPLUG */
++
++#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
+ /*
+ * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
+ * just online the pages.
+@@ -990,7 +927,7 @@ int __add_pages(struct zone *z, unsigned
+ }
+ return err;
+ }
+-#endif /* CONFIG_MEMORY_HOTPLUG */
++#endif
+
+ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
+ kcore_vsyscall;
+@@ -1007,12 +944,6 @@ void __init mem_init(void)
+
+ pci_iommu_alloc();
+
+- /* How many end-of-memory variables you have, grandma! */
+- max_low_pfn = end_pfn;
+- max_pfn = end_pfn;
+- num_physpages = end_pfn;
+- high_memory = (void *) __va(end_pfn * PAGE_SIZE);
+-
+ /* clear the zero-page */
+ memset(empty_zero_page, 0, PAGE_SIZE);
+
+@@ -1030,7 +961,8 @@ void __init mem_init(void)
+ init_page_count(pfn_to_page(pfn));
+ totalram_pages++;
+ }
+- reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
++ reservedpages = end_pfn - totalram_pages -
++ absent_pages_in_range(0, end_pfn);
+
+ after_bootmem = 1;
+
+@@ -1137,15 +1069,34 @@ void free_initrd_mem(unsigned long start
+
+ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
+ {
+- /* Should check here against the e820 map to avoid double free */
+ #ifdef CONFIG_NUMA
+ int nid = phys_to_nid(phys);
++#endif
++ unsigned long pfn = phys >> PAGE_SHIFT;
++ if (pfn >= end_pfn) {
++ /* This can happen with kdump kernels when accessing firmware
++ tables. */
++ if (pfn < end_pfn_map)
++ return;
++ printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
++ phys, len);
++ return;
++ }
++
++ /* Should check here against the e820 map to avoid double free */
++#ifdef CONFIG_NUMA
+ reserve_bootmem_node(NODE_DATA(nid), phys, len);
+ #else
+ reserve_bootmem(phys, len);
+ #endif
+- if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
++#ifndef CONFIG_XEN
++ if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
++ static unsigned long dma_reserve __initdata;
++
+ dma_reserve += len / PAGE_SIZE;
++ set_dma_reserve(dma_reserve);
++ }
++#endif
+ }
+
+ int kern_addr_valid(unsigned long addr)
+Index: 10.3-2007-11-26/arch/x86_64/mm/pageattr-xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/arch/x86_64/mm/pageattr-xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/arch/x86_64/mm/pageattr-xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -302,8 +302,8 @@ static void revert_page(unsigned long ad
+ BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, address);
+ BUG_ON(pmd_val(*pmd) & _PAGE_PSE);
+- pgprot_val(ref_prot) |= _PAGE_PSE;
+ large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
++ large_pte = pte_mkhuge(large_pte);
+ set_pte((pte_t *)pmd, large_pte);
+ }
+
+@@ -313,32 +313,28 @@ __change_page_attr(unsigned long address
+ {
+ pte_t *kpte;
+ struct page *kpte_page;
+- unsigned kpte_flags;
+ pgprot_t ref_prot2;
+ kpte = lookup_address(address);
+ if (!kpte) return 0;
+ kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
+- kpte_flags = pte_val(*kpte);
+ if (pgprot_val(prot) != pgprot_val(ref_prot)) {
+- if ((kpte_flags & _PAGE_PSE) == 0) {
++ if (!pte_huge(*kpte)) {
+ set_pte(kpte, pfn_pte(pfn, prot));
+ } else {
+ /*
+ * split_large_page will take the reference for this
+ * change_page_attr on the split page.
+ */
+-
+ struct page *split;
+- ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
+-
++ ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
+ split = split_large_page(address, prot, ref_prot2);
+ if (!split)
+ return -ENOMEM;
+- set_pte(kpte,mk_pte(split, ref_prot2));
++ set_pte(kpte, mk_pte(split, ref_prot2));
+ kpte_page = split;
+- }
++ }
+ page_private(kpte_page)++;
+- } else if ((kpte_flags & _PAGE_PSE) == 0) {
++ } else if (!pte_huge(*kpte)) {
+ set_pte(kpte, pfn_pte(pfn, ref_prot));
+ BUG_ON(page_private(kpte_page) == 0);
+ page_private(kpte_page)--;
+@@ -395,10 +391,12 @@ int change_page_attr_addr(unsigned long
+ * lowmem */
+ if (__pa(address) < KERNEL_TEXT_SIZE) {
+ unsigned long addr2;
+- pgprot_t prot2 = prot;
++ pgprot_t prot2;
+ addr2 = __START_KERNEL_map + __pa(address);
+- pgprot_val(prot2) &= ~_PAGE_NX;
+- err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
++ /* Make sure the kernel mappings stay executable */
++ prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
++ err = __change_page_attr(addr2, pfn, prot2,
++ PAGE_KERNEL_EXEC);
+ }
+ }
+ up_write(&init_mm.mmap_sem);
+Index: 10.3-2007-11-26/drivers/char/tpm/tpm_xen.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/char/tpm/tpm_xen.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/char/tpm/tpm_xen.c 2007-10-22 13:53:08.000000000 +0200
+@@ -85,8 +85,7 @@ static struct tpm_private *my_priv;
+
+ /* local function prototypes */
+ static irqreturn_t tpmif_int(int irq,
+- void *tpm_priv,
+- struct pt_regs *ptregs);
++ void *tpm_priv);
+ static void tpmif_rx_action(unsigned long unused);
+ static int tpmif_connect(struct xenbus_device *dev,
+ struct tpm_private *tp,
+@@ -558,7 +557,7 @@ static void tpmif_rx_action(unsigned lon
+ }
+
+
+-static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
++static irqreturn_t tpmif_int(int irq, void *tpm_priv)
+ {
+ struct tpm_private *tp = tpm_priv;
+ unsigned long flags;
+Index: 10.3-2007-11-26/drivers/pci/Kconfig
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/pci/Kconfig 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/pci/Kconfig 2007-10-22 13:53:08.000000000 +0200
+@@ -34,7 +34,7 @@ config PCI_DEBUG
+ config HT_IRQ
+ bool "Interrupts on hypertransport devices"
+ default y
+- depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
++ depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN
+ help
+ This allows native hypertransport devices to use interrupts.
+
+Index: 10.3-2007-11-26/drivers/xen/Kconfig
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/Kconfig 2007-09-03 09:52:56.000000000 +0200
++++ 10.3-2007-11-26/drivers/xen/Kconfig 2007-10-22 13:53:08.000000000 +0200
+@@ -249,6 +249,9 @@ config HAVE_IRQ_IGNORE_UNHANDLED
+ bool
+ default y
+
++config GENERIC_HARDIRQS_NO__DO_IRQ
++ def_bool y
++
+ config NO_IDLE_HZ
+ bool
+ default y
+Index: 10.3-2007-11-26/drivers/xen/balloon/balloon.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/balloon/balloon.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/balloon/balloon.c 2007-10-22 13:53:08.000000000 +0200
+@@ -83,7 +83,13 @@ static unsigned long frame_list[PAGE_SIZ
+
+ /* VM /proc information for memory */
+ extern unsigned long totalram_pages;
++#ifdef CONFIG_HIGHMEM
+ extern unsigned long totalhigh_pages;
++#define totalhigh_pages(op) (totalhigh_pages op)
++#else
++#undef totalhigh_pages
++#define totalhigh_pages(op)
++#endif
+
+ /* List of ballooned pages, threaded through the mem_map array. */
+ static LIST_HEAD(ballooned_pages);
+@@ -119,7 +125,7 @@ static void balloon_append(struct page *
+ if (PageHighMem(page)) {
+ list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
+ bs.balloon_high++;
+- totalhigh_pages--;
++ totalhigh_pages(--);
+ } else {
+ list_add(PAGE_TO_LIST(page), &ballooned_pages);
+ bs.balloon_low++;
+@@ -139,7 +145,7 @@ static struct page *balloon_retrieve(voi
+
+ if (PageHighMem(page)) {
+ bs.balloon_high--;
+- totalhigh_pages++;
++ totalhigh_pages(++);
+ }
+ else
+ bs.balloon_low--;
+Index: 10.3-2007-11-26/drivers/xen/blkback/blkback.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/blkback/blkback.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/blkback/blkback.c 2007-10-22 13:53:08.000000000 +0200
+@@ -287,7 +287,7 @@ static void blkif_notify_work(blkif_t *b
+ wake_up(&blkif->wq);
+ }
+
+-irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t blkif_be_int(int irq, void *dev_id)
+ {
+ blkif_notify_work(dev_id);
+ return IRQ_HANDLED;
+Index: 10.3-2007-11-26/drivers/xen/blkback/common.h
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/blkback/common.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/blkback/common.h 2007-10-22 13:53:08.000000000 +0200
+@@ -130,7 +130,7 @@ void blkif_interface_init(void);
+
+ void blkif_xenbus_init(void);
+
+-irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t blkif_be_int(int irq, void *dev_id);
+ int blkif_schedule(void *arg);
+
+ int blkback_barrier(struct xenbus_transaction xbt,
+Index: 10.3-2007-11-26/drivers/xen/blkfront/blkfront.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/blkfront/blkfront.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/blkfront/blkfront.c 2007-10-22 13:53:08.000000000 +0200
+@@ -69,7 +69,7 @@ static int setup_blkring(struct xenbus_d
+
+ static void kick_pending_request_queues(struct blkfront_info *);
+
+-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
++static irqreturn_t blkif_int(int irq, void *dev_id);
+ static void blkif_restart_queue(void *arg);
+ static void blkif_recover(struct blkfront_info *);
+ static void blkif_completion(struct blk_shadow *);
+@@ -688,7 +688,7 @@ void do_blkif_request(request_queue_t *r
+ }
+
+
+-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
++static irqreturn_t blkif_int(int irq, void *dev_id)
+ {
+ struct request *req;
+ blkif_response_t *bret;
+Index: 10.3-2007-11-26/drivers/xen/blktap/blktap.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/blktap/blktap.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/blktap/blktap.c 2007-10-22 13:53:08.000000000 +0200
+@@ -1059,7 +1059,7 @@ static void blkif_notify_work(blkif_t *b
+ wake_up(&blkif->wq);
+ }
+
+-irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t tap_blkif_be_int(int irq, void *dev_id)
+ {
+ blkif_notify_work(dev_id);
+ return IRQ_HANDLED;
+Index: 10.3-2007-11-26/drivers/xen/blktap/common.h
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/blktap/common.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/blktap/common.h 2007-10-22 13:53:08.000000000 +0200
+@@ -112,7 +112,7 @@ void tap_blkif_interface_init(void);
+
+ void tap_blkif_xenbus_init(void);
+
+-irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t tap_blkif_be_int(int irq, void *dev_id);
+ int tap_blkif_schedule(void *arg);
+
+ int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
+Index: 10.3-2007-11-26/drivers/xen/console/console.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/console/console.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/console/console.c 2007-10-22 13:53:08.000000000 +0200
+@@ -335,7 +335,7 @@ static struct tty_struct *xencons_tty;
+ static int xencons_priv_irq;
+ static char x_char;
+
+-void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
++void xencons_rx(char *buf, unsigned len)
+ {
+ int i;
+ unsigned long flags;
+@@ -360,8 +360,7 @@ void xencons_rx(char *buf, unsigned len,
+ if (time_before(jiffies, sysrq_timeout)) {
+ spin_unlock_irqrestore(
+ &xencons_lock, flags);
+- handle_sysrq(
+- buf[i], regs, xencons_tty);
++ handle_sysrq(buf[i], xencons_tty);
+ spin_lock_irqsave(
+ &xencons_lock, flags);
+ continue;
+@@ -426,14 +425,13 @@ void xencons_tx(void)
+ }
+
+ /* Privileged receive callback and transmit kicker. */
+-static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
+- struct pt_regs *regs)
++static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id)
+ {
+ static char rbuf[16];
+ int l;
+
+ while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
+- xencons_rx(rbuf, l, regs);
++ xencons_rx(rbuf, l);
+
+ xencons_tx();
+
+Index: 10.3-2007-11-26/drivers/xen/console/xencons_ring.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/console/xencons_ring.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/console/xencons_ring.c 2007-10-22 13:53:08.000000000 +0200
+@@ -83,7 +83,7 @@ int xencons_ring_send(const char *data,
+ return sent;
+ }
+
+-static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
++static irqreturn_t handle_input(int irq, void *unused)
+ {
+ struct xencons_interface *intf = xencons_interface();
+ XENCONS_RING_IDX cons, prod;
+@@ -94,7 +94,7 @@ static irqreturn_t handle_input(int irq,
+ BUG_ON((prod - cons) > sizeof(intf->in));
+
+ while (cons != prod) {
+- xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
++ xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1);
+ cons++;
+ }
+
+Index: 10.3-2007-11-26/drivers/xen/core/evtchn.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/core/evtchn.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/core/evtchn.c 2007-10-22 13:53:08.000000000 +0200
+@@ -462,7 +462,7 @@ static void unbind_from_irq(unsigned int
+
+ int bind_caller_port_to_irqhandler(
+ unsigned int caller_port,
+- irqreturn_t (*handler)(int, void *, struct pt_regs *),
++ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+@@ -485,7 +485,7 @@ EXPORT_SYMBOL_GPL(bind_caller_port_to_ir
+
+ int bind_listening_port_to_irqhandler(
+ unsigned int remote_domain,
+- irqreturn_t (*handler)(int, void *, struct pt_regs *),
++ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+@@ -509,7 +509,7 @@ EXPORT_SYMBOL_GPL(bind_listening_port_to
+ int bind_interdomain_evtchn_to_irqhandler(
+ unsigned int remote_domain,
+ unsigned int remote_port,
+- irqreturn_t (*handler)(int, void *, struct pt_regs *),
++ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+@@ -533,7 +533,7 @@ EXPORT_SYMBOL_GPL(bind_interdomain_evtch
+ int bind_virq_to_irqhandler(
+ unsigned int virq,
+ unsigned int cpu,
+- irqreturn_t (*handler)(int, void *, struct pt_regs *),
++ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+@@ -557,7 +557,7 @@ EXPORT_SYMBOL_GPL(bind_virq_to_irqhandle
+ int bind_ipi_to_irqhandler(
+ unsigned int ipi,
+ unsigned int cpu,
+- irqreturn_t (*handler)(int, void *, struct pt_regs *),
++ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id)
+@@ -644,15 +644,7 @@ static unsigned int startup_dynirq(unsig
+ return 0;
+ }
+
+-static void shutdown_dynirq(unsigned int irq)
+-{
+- int evtchn = evtchn_from_irq(irq);
+-
+- if (VALID_EVTCHN(evtchn))
+- mask_evtchn(evtchn);
+-}
+-
+-static void enable_dynirq(unsigned int irq)
++static void unmask_dynirq(unsigned int irq)
+ {
+ int evtchn = evtchn_from_irq(irq);
+
+@@ -660,7 +652,7 @@ static void enable_dynirq(unsigned int i
+ unmask_evtchn(evtchn);
+ }
+
+-static void disable_dynirq(unsigned int irq)
++static void mask_dynirq(unsigned int irq)
+ {
+ int evtchn = evtchn_from_irq(irq);
+
+@@ -688,12 +680,12 @@ static void end_dynirq(unsigned int irq)
+ unmask_evtchn(evtchn);
+ }
+
+-static struct hw_interrupt_type dynirq_type = {
+- .typename = "Dynamic-irq",
++static struct irq_chip dynirq_chip = {
++ .name = "Dynamic-irq",
+ .startup = startup_dynirq,
+- .shutdown = shutdown_dynirq,
+- .enable = enable_dynirq,
+- .disable = disable_dynirq,
++ .mask = mask_dynirq,
++ .unmask = unmask_dynirq,
++ .mask_ack = ack_dynirq,
+ .ack = ack_dynirq,
+ .end = end_dynirq,
+ #ifdef CONFIG_SMP
+@@ -776,7 +768,7 @@ static void shutdown_pirq(unsigned int i
+ irq_info[irq] = IRQ_UNBOUND;
+ }
+
+-static void enable_pirq(unsigned int irq)
++static void unmask_pirq(unsigned int irq)
+ {
+ int evtchn = evtchn_from_irq(irq);
+
+@@ -786,7 +778,7 @@ static void enable_pirq(unsigned int irq
+ }
+ }
+
+-static void disable_pirq(unsigned int irq)
++static void mask_pirq(unsigned int irq)
+ {
+ int evtchn = evtchn_from_irq(irq);
+
+@@ -816,12 +808,14 @@ static void end_pirq(unsigned int irq)
+ }
+ }
+
+-static struct hw_interrupt_type pirq_type = {
++static struct irq_chip pirq_chip = {
++ .name = "Phys-irq",
+ .typename = "Phys-irq",
+ .startup = startup_pirq,
+ .shutdown = shutdown_pirq,
+- .enable = enable_pirq,
+- .disable = disable_pirq,
++ .mask = mask_pirq,
++ .unmask = unmask_pirq,
++ .mask_ack = ack_pirq,
+ .ack = ack_pirq,
+ .end = end_pirq,
+ #ifdef CONFIG_SMP
+@@ -994,7 +988,8 @@ void __init xen_init_IRQ(void)
+ irq_desc[dynirq_to_irq(i)].status = IRQ_DISABLED;
+ irq_desc[dynirq_to_irq(i)].action = NULL;
+ irq_desc[dynirq_to_irq(i)].depth = 1;
+- irq_desc[dynirq_to_irq(i)].chip = &dynirq_type;
++ set_irq_chip_and_handler_name(dynirq_to_irq(i), &dynirq_chip,
++ handle_level_irq, "level");
+ }
+
+ /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
+@@ -1010,6 +1005,7 @@ void __init xen_init_IRQ(void)
+ irq_desc[pirq_to_irq(i)].status = IRQ_DISABLED;
+ irq_desc[pirq_to_irq(i)].action = NULL;
+ irq_desc[pirq_to_irq(i)].depth = 1;
+- irq_desc[pirq_to_irq(i)].chip = &pirq_type;
++ set_irq_chip_and_handler_name(pirq_to_irq(i), &pirq_chip,
++ handle_level_irq, "level");
+ }
+ }
+Index: 10.3-2007-11-26/drivers/xen/core/reboot.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/core/reboot.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/core/reboot.c 2007-10-22 13:53:08.000000000 +0200
+@@ -180,7 +180,7 @@ static void sysrq_handler(struct xenbus_
+
+ #ifdef CONFIG_MAGIC_SYSRQ
+ if (sysrq_key != '\0')
+- handle_sysrq(sysrq_key, NULL, NULL);
++ handle_sysrq(sysrq_key, NULL);
+ #endif
+ }
+
+Index: 10.3-2007-11-26/drivers/xen/core/smpboot.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/core/smpboot.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/core/smpboot.c 2007-10-22 13:53:08.000000000 +0200
+@@ -25,8 +25,8 @@
+ #include <xen/cpu_hotplug.h>
+ #include <xen/xenbus.h>
+
+-extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+-extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
++extern irqreturn_t smp_reschedule_interrupt(int, void *);
++extern irqreturn_t smp_call_function_interrupt(int, void *);
+
+ extern int local_setup_timer(unsigned int cpu);
+ extern void local_teardown_timer(unsigned int cpu);
+@@ -72,8 +72,6 @@ EXPORT_SYMBOL(cpu_core_map);
+ #if defined(__i386__)
+ u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
+ EXPORT_SYMBOL(x86_cpu_to_apicid);
+-#elif !defined(CONFIG_X86_IO_APIC)
+-unsigned int maxcpus = NR_CPUS;
+ #endif
+
+ void __init prefill_possible_map(void)
+Index: 10.3-2007-11-26/drivers/xen/fbfront/xenfb.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/fbfront/xenfb.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/fbfront/xenfb.c 2007-10-22 13:53:08.000000000 +0200
+@@ -417,8 +417,7 @@ static struct fb_ops xenfb_fb_ops = {
+ .fb_mmap = xenfb_mmap,
+ };
+
+-static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
+- struct pt_regs *regs)
++static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
+ {
+ /*
+ * No in events recognized, simply ignore them all.
+Index: 10.3-2007-11-26/drivers/xen/fbfront/xenkbd.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/fbfront/xenkbd.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/fbfront/xenkbd.c 2007-10-22 13:53:08.000000000 +0200
+@@ -46,7 +46,7 @@ static void xenkbd_disconnect_backend(st
+ * to do that.
+ */
+
+-static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
++static irqreturn_t input_handler(int rq, void *dev_id)
+ {
+ struct xenkbd_info *info = dev_id;
+ struct xenkbd_page *page = info->page;
+Index: 10.3-2007-11-26/drivers/xen/gntdev/gntdev.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/gntdev/gntdev.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/gntdev/gntdev.c 2007-10-22 13:53:08.000000000 +0200
+@@ -701,9 +701,6 @@ static pte_t gntdev_clear_pte(struct vm_
+ BUG();
+ }
+
+- /* Copy the existing value of the PTE for returning. */
+- copy = *ptep;
+-
+ /* Calculate the grant relating to this PTE. */
+ slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
+
+@@ -718,6 +715,10 @@ static pte_t gntdev_clear_pte(struct vm_
+ GNTDEV_INVALID_HANDLE &&
+ !xen_feature(XENFEAT_auto_translated_physmap)) {
+ /* NOT USING SHADOW PAGE TABLES. */
++
++ /* Copy the existing value of the PTE for returning. */
++ copy = *ptep;
++
+ gnttab_set_unmap_op(&op, virt_to_machine(ptep),
+ GNTMAP_contains_pte,
+ private_data->grants[slot_index]
+@@ -730,7 +731,7 @@ static pte_t gntdev_clear_pte(struct vm_
+ op.status);
+ } else {
+ /* USING SHADOW PAGE TABLES. */
+- pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
++ copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+ }
+
+ /* Finally, we unmap the grant from kernel space. */
+@@ -758,7 +759,7 @@ static pte_t gntdev_clear_pte(struct vm_
+ >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+
+ } else {
+- pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
++ copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
+ }
+
+ return copy;
+Index: 10.3-2007-11-26/drivers/xen/privcmd/privcmd.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/privcmd/privcmd.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/privcmd/privcmd.c 2007-10-22 13:53:08.000000000 +0200
+@@ -234,7 +234,7 @@ static int privcmd_mmap(struct file * fi
+ return -ENOSYS;
+
+ /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+- vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
++ vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
+ vma->vm_ops = &privcmd_vm_ops;
+ vma->vm_private_data = NULL;
+
+Index: 10.3-2007-11-26/drivers/xen/netback/common.h
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/netback/common.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/netback/common.h 2007-10-22 13:53:08.000000000 +0200
+@@ -140,7 +140,7 @@ void netif_deschedule_work(netif_t *neti
+
+ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
+ struct net_device_stats *netif_be_get_stats(struct net_device *dev);
+-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t netif_be_int(int irq, void *dev_id);
+
+ static inline int netbk_can_queue(struct net_device *dev)
+ {
+Index: 10.3-2007-11-26/drivers/xen/netback/loopback.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/netback/loopback.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/netback/loopback.c 2007-10-22 13:53:08.000000000 +0200
+@@ -151,7 +151,7 @@ static int loopback_start_xmit(struct sk
+ np->stats.rx_bytes += skb->len;
+ np->stats.rx_packets++;
+
+- if (skb->ip_summed == CHECKSUM_HW) {
++ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ /* Defer checksum calculation. */
+ skb->proto_csum_blank = 1;
+ /* Must be a local packet: assert its integrity. */
+Index: 10.3-2007-11-26/drivers/xen/netback/netback.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/netback/netback.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/netback/netback.c 2007-10-22 13:53:08.000000000 +0200
+@@ -649,7 +649,7 @@ static void net_rx_action(unsigned long
+ id = meta[npo.meta_cons].id;
+ flags = nr_frags ? NETRXF_more_data : 0;
+
+- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
++ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+ flags |= NETRXF_csum_blank | NETRXF_data_validated;
+ else if (skb->proto_data_valid) /* remote but checksummed? */
+ flags |= NETRXF_data_validated;
+@@ -1339,7 +1339,7 @@ static void netif_page_release(struct pa
+ netif_idx_release(netif_page_index(page));
+ }
+
+-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t netif_be_int(int irq, void *dev_id)
+ {
+ netif_t *netif = dev_id;
+
+@@ -1406,7 +1406,7 @@ static netif_rx_response_t *make_rx_resp
+ }
+
+ #ifdef NETBE_DEBUG_INTERRUPT
+-static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
++static irqreturn_t netif_be_dbg(int irq, void *dev_id)
+ {
+ struct list_head *ent;
+ netif_t *netif;
+Index: 10.3-2007-11-26/drivers/xen/netfront/netfront.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/netfront/netfront.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/netfront/netfront.c 2007-10-22 13:53:08.000000000 +0200
+@@ -135,7 +135,7 @@ static inline int netif_needs_gso(struct
+ {
+ return skb_is_gso(skb) &&
+ (!skb_gso_ok(skb, dev->features) ||
+- unlikely(skb->ip_summed != CHECKSUM_HW));
++ unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
+ }
+ #else
+ #define netif_needs_gso(dev, skb) 0
+@@ -268,7 +268,7 @@ static void network_tx_buf_gc(struct net
+ static void network_alloc_rx_buffers(struct net_device *);
+ static int send_fake_arp(struct net_device *);
+
+-static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
++static irqreturn_t netif_int(int irq, void *dev_id);
+
+ #ifdef CONFIG_SYSFS
+ static int xennet_sysfs_addif(struct net_device *netdev);
+@@ -978,7 +978,7 @@ static int network_start_xmit(struct sk_
+ tx->flags = 0;
+ extra = NULL;
+
+- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
++ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
+ #ifdef CONFIG_XEN
+ if (skb->proto_data_valid) /* remote but checksummed? */
+@@ -1034,7 +1034,7 @@ static int network_start_xmit(struct sk_
+ return 0;
+ }
+
+-static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
++static irqreturn_t netif_int(int irq, void *dev_id)
+ {
+ struct net_device *dev = dev_id;
+ struct netfront_info *np = netdev_priv(dev);
+Index: 10.3-2007-11-26/drivers/xen/pciback/pciback.h
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/pciback/pciback.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/pciback/pciback.h 2007-10-22 13:53:08.000000000 +0200
+@@ -83,7 +83,7 @@ int pciback_publish_pci_roots(struct pci
+ void pciback_release_devices(struct pciback_device *pdev);
+
+ /* Handles events from front-end */
+-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t pciback_handle_event(int irq, void *dev_id);
+ void pciback_do_op(void *data);
+
+ int pciback_xenbus_register(void);
+Index: 10.3-2007-11-26/drivers/xen/pciback/pciback_ops.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/pciback/pciback_ops.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/pciback/pciback_ops.c 2007-10-22 13:53:08.000000000 +0200
+@@ -85,7 +85,7 @@ void pciback_do_op(void *data)
+ test_and_schedule_op(pdev);
+ }
+
+-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t pciback_handle_event(int irq, void *dev_id)
+ {
+ struct pciback_device *pdev = dev_id;
+
+Index: 10.3-2007-11-26/drivers/xen/tpmback/common.h
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/tpmback/common.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/tpmback/common.h 2007-10-22 13:53:08.000000000 +0200
+@@ -61,7 +61,7 @@ void tpmif_deschedule_work(tpmif_t * tpm
+ void tpmif_xenbus_init(void);
+ void tpmif_xenbus_exit(void);
+ int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
+-irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
++irqreturn_t tpmif_be_int(int irq, void *dev_id);
+
+ long int tpmback_get_instance(struct backend_info *bi);
+
+Index: 10.3-2007-11-26/drivers/xen/tpmback/tpmback.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/tpmback/tpmback.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/tpmback/tpmback.c 2007-10-22 13:53:08.000000000 +0200
+@@ -502,7 +502,7 @@ static ssize_t vtpm_op_read(struct file
+ list_del(&pak->next);
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
+
+- DPRINTK("size given by app: %d, available: %d\n", size, left);
++ DPRINTK("size given by app: %zu, available: %u\n", size, left);
+
+ ret_size = min_t(size_t, size, left);
+
+@@ -899,7 +899,7 @@ static void tpm_tx_action(unsigned long
+ }
+ }
+
+-irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t tpmif_be_int(int irq, void *dev_id)
+ {
+ tpmif_t *tpmif = (tpmif_t *) dev_id;
+
+Index: 10.3-2007-11-26/drivers/xen/xenbus/xenbus_comms.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/xenbus/xenbus_comms.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/xenbus/xenbus_comms.c 2007-10-22 13:53:08.000000000 +0200
+@@ -54,7 +54,7 @@ static DECLARE_WORK(probe_work, xenbus_p
+
+ static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
+
+-static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
++static irqreturn_t wake_waiting(int irq, void *unused)
+ {
+ if (unlikely(xenstored_ready == 0)) {
+ xenstored_ready = 1;
+Index: 10.3-2007-11-26/drivers/xen/xenoprof/xenoprofile.c
+===================================================================
+--- 10.3-2007-11-26.orig/drivers/xen/xenoprof/xenoprofile.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/drivers/xen/xenoprof/xenoprofile.c 2007-10-22 13:53:08.000000000 +0200
+@@ -177,7 +177,7 @@ done:
+ }
+
+ static irqreturn_t
+-xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
++xenoprof_ovf_interrupt(int irq, void * dev_id)
+ {
+ struct xenoprof_buf * buf;
+ int cpu;
+Index: 10.3-2007-11-26/include/asm-i386/acpi.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/acpi.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/acpi.h 2007-10-22 13:53:08.000000000 +0200
+@@ -141,7 +141,9 @@ extern void acpi_reserve_bootmem(void);
+
+ #endif /*CONFIG_ACPI_SLEEP*/
+
++#ifndef CONFIG_XEN
+ #define ARCH_HAS_POWER_INIT 1
++#endif
+
+ #endif /*__KERNEL__*/
+
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/desc.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/desc.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/desc.h 2007-10-22 13:53:08.000000000 +0200
+@@ -32,52 +32,108 @@ static inline struct desc_struct *get_cp
+ return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
+ }
+
++/*
++ * This is the ldt that every process will get unless we need
++ * something other than this.
++ */
++extern struct desc_struct default_ldt[];
++extern struct desc_struct idt_table[];
++extern void set_intr_gate(unsigned int irq, void * addr);
++
++static inline void pack_descriptor(__u32 *a, __u32 *b,
++ unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
++{
++ *a = ((base & 0xffff) << 16) | (limit & 0xffff);
++ *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
++ (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
++}
++
++static inline void pack_gate(__u32 *a, __u32 *b,
++ unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
++{
++ *a = (seg << 16) | (base & 0xffff);
++ *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
++}
++
++#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
++#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
++#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
++#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
++#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
++#define DESCTYPE_DPL3 0x60 /* DPL-3 */
++#define DESCTYPE_S 0x10 /* !system */
++
+ #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
+ #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
+
+ #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
+ #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
+-#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
+-#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
++#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
++#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
+
+ #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
+ #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
+-#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
+-#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
++#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
++#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
+
+-/*
+- * This is the ldt that every process will get unless we need
+- * something other than this.
+- */
+-extern struct desc_struct default_ldt[];
+-extern void set_intr_gate(unsigned int irq, void * addr);
++#if TLS_SIZE != 24
++# error update this code.
++#endif
++
++static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
++{
++#define C(i) HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), *(u64 *)&t->tls_array[i])
++ C(0); C(1); C(2);
++#undef C
++}
+
+-#define _set_tssldt_desc(n,addr,limit,type) \
+-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
+- "movw %w1,2(%2)\n\t" \
+- "rorl $16,%1\n\t" \
+- "movb %b1,4(%2)\n\t" \
+- "movb %4,5(%2)\n\t" \
+- "movb $0,6(%2)\n\t" \
+- "movb %h1,7(%2)\n\t" \
+- "rorl $16,%1" \
+- : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
++#ifndef CONFIG_XEN
++static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
++{
++ __u32 *lp = (__u32 *)((char *)dt + entry*8);
++ *lp = entry_a;
++ *(lp+1) = entry_b;
++}
+
+-#ifndef CONFIG_X86_NO_TSS
+-static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
++#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
++#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
++#else
++extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
++extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
++#endif
++#ifndef CONFIG_X86_NO_IDT
++#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
++
++static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
+ {
+- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
+- offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
++ __u32 a, b;
++ pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
++ write_idt_entry(idt_table, gate, a, b);
+ }
++#endif
+
+-#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
++#ifndef CONFIG_X86_NO_TSS
++static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
++{
++ __u32 a, b;
++ pack_descriptor(&a, &b, (unsigned long)addr,
++ offsetof(struct tss_struct, __cacheline_filler) - 1,
++ DESCTYPE_TSS, 0);
++ write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
++}
+ #endif
+
+-static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
++static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
+ {
+- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
++ __u32 a, b;
++ pack_descriptor(&a, &b, (unsigned long)addr,
++ entries * sizeof(struct desc_struct) - 1,
++ DESCTYPE_LDT, 0);
++ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
+ }
+
++#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
++
+ #define LDT_entry_a(info) \
+ ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
+
+@@ -103,19 +159,6 @@ static inline void set_ldt_desc(unsigned
+ (info)->seg_not_present == 1 && \
+ (info)->useable == 0 )
+
+-extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
+-
+-#if TLS_SIZE != 24
+-# error update this code.
+-#endif
+-
+-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
+-{
+-#define C(i) HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), *(u64 *)&t->tls_array[i])
+- C(0); C(1); C(2);
+-#undef C
+-}
+-
+ static inline void clear_LDT(void)
+ {
+ int cpu = get_cpu();
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/fixmap.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/fixmap.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/fixmap.h 2007-10-22 13:53:08.000000000 +0200
+@@ -55,7 +55,7 @@ enum fixed_addresses {
+ #ifdef CONFIG_X86_LOCAL_APIC
+ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
+ #endif
+-#ifdef CONFIG_X86_IO_APIC
++#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
+ FIX_IO_APIC_BASE_0,
+ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+ #endif
+@@ -95,10 +95,9 @@ enum fixed_addresses {
+ __end_of_fixed_addresses
+ };
+
+-extern void set_fixaddr_top(unsigned long top);
+-
+ extern void __set_fixmap(enum fixed_addresses idx,
+ maddr_t phys, pgprot_t flags);
++extern void reserve_top_address(unsigned long reserve);
+
+ #define set_fixmap(idx, phys) \
+ __set_fixmap(idx, phys, PAGE_KERNEL)
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/floppy.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/floppy.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/floppy.h 2007-10-22 13:53:08.000000000 +0200
+@@ -43,14 +43,14 @@ static char *virtual_dma_addr;
+ static int virtual_dma_mode;
+ static int doing_pdma;
+
+-static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
++static irqreturn_t floppy_hardint(int irq, void *dev_id)
+ {
+ register unsigned char st;
+ register int lcount;
+ register char *lptr;
+
+ if (!doing_pdma)
+- return floppy_interrupt(irq, dev_id, regs);
++ return floppy_interrupt(irq, dev_id);
+
+ st = 1;
+ for(lcount=virtual_dma_count, lptr=virtual_dma_addr;
+@@ -73,7 +73,7 @@ static irqreturn_t floppy_hardint(int ir
+ virtual_dma_residue += virtual_dma_count;
+ virtual_dma_count=0;
+ doing_pdma = 0;
+- floppy_interrupt(irq, dev_id, regs);
++ floppy_interrupt(irq, dev_id);
+ return IRQ_HANDLED;
+ }
+ return IRQ_HANDLED;
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/hw_irq.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/hw_irq.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/hw_irq.h 2007-10-22 13:53:08.000000000 +0200
+@@ -17,8 +17,6 @@
+ #include <asm/irq.h>
+ #include <asm/sections.h>
+
+-struct hw_interrupt_type;
+-
+ #define NMI_VECTOR 0x02
+
+ /*
+@@ -28,10 +26,6 @@ struct hw_interrupt_type;
+ * Interrupt entry/exit code at both C and assembly level
+ */
+
+-extern u8 irq_vector[NR_IRQ_VECTORS];
+-#define IO_APIC_VECTOR(irq) (irq_vector[irq])
+-#define AUTO_ASSIGN -1
+-
+ extern void (*interrupt[NR_IRQS])(void);
+
+ #ifdef CONFIG_SMP
+@@ -44,7 +38,7 @@ fastcall void call_function_interrupt(vo
+ fastcall void apic_timer_interrupt(void);
+ fastcall void error_interrupt(void);
+ fastcall void spurious_interrupt(void);
+-fastcall void thermal_interrupt(struct pt_regs *);
++fastcall void thermal_interrupt(void);
+ #define platform_legacy_irq(irq) ((irq) < 16)
+ #endif
+
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/io.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/io.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/io.h 2007-10-22 13:53:08.000000000 +0200
+@@ -238,33 +238,6 @@ static inline void memcpy_toio(volatile
+
+ #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d))
+
+-/**
+- * check_signature - find BIOS signatures
+- * @io_addr: mmio address to check
+- * @signature: signature block
+- * @length: length of signature
+- *
+- * Perform a signature comparison with the mmio address io_addr. This
+- * address should have been obtained by ioremap.
+- * Returns 1 on a match.
+- */
+-
+-static inline int check_signature(volatile void __iomem * io_addr,
+- const unsigned char *signature, int length)
+-{
+- int retval = 0;
+- do {
+- if (readb(io_addr) != *signature)
+- goto out;
+- io_addr++;
+- signature++;
+- length--;
+- } while (length);
+- retval = 1;
+-out:
+- return retval;
+-}
+-
+ /*
+ * Cache management
+ *
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/page.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/page.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/page.h 2007-10-22 13:53:08.000000000 +0200
+@@ -196,7 +196,7 @@ extern int page_is_ram(unsigned long pag
+
+ #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET)
+ #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE)
+-#define MAXMEM (__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE)
++#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE)
+ #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
+ #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
+ #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-2level.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-10-22 13:53:08.000000000 +0200
+@@ -21,14 +21,6 @@
+ set_pte((ptep), (pteval)); \
+ } while (0)
+
+-#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
+- if (((_mm) != current->mm && (_mm) != &init_mm) || \
+- HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
+- set_pte((ptep), (pteval)); \
+- xen_invlpg((addr)); \
+- } \
+-} while (0)
+-
+ #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
+
+ #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval))
+@@ -38,6 +30,7 @@
+
+ #define pte_none(x) (!(x).pte_low)
+
++#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+ pte_t pte = *ptep;
+@@ -49,6 +42,7 @@ static inline pte_t ptep_get_and_clear(s
+ return pte;
+ }
+
++#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+ #define ptep_clear_flush(vma, addr, ptep) \
+ ({ \
+ pte_t *__ptep = (ptep); \
+@@ -64,8 +58,6 @@ static inline pte_t ptep_get_and_clear(s
+ __res; \
+ })
+
+-#define pte_same(a, b) ((a).pte_low == (b).pte_low)
+-
+ #define __pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT)
+ #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
+ __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte)))
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-3level.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-10-22 13:53:08.000000000 +0200
+@@ -50,7 +50,6 @@ static inline int pte_exec_kernel(pte_t
+ * not possible, use pte_get_and_clear to obtain the old pte
+ * value and then use set_pte to update it. -ben
+ */
+-#define __HAVE_ARCH_SET_PTE_ATOMIC
+
+ static inline void set_pte(pte_t *ptep, pte_t pte)
+ {
+@@ -67,14 +66,6 @@ static inline void set_pte(pte_t *ptep,
+ set_pte((ptep), (pteval)); \
+ } while (0)
+
+-#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
+- if (((_mm) != current->mm && (_mm) != &init_mm) || \
+- HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
+- set_pte((ptep), (pteval)); \
+- xen_invlpg((addr)); \
+- } \
+-} while (0)
+-
+ #define set_pmd(pmdptr,pmdval) \
+ xen_l2_entry_update((pmdptr), (pmdval))
+ #define set_pud(pudptr,pudval) \
+@@ -91,7 +82,7 @@ static inline void pud_clear (pud_t * pu
+ #define pud_page(pud) \
+ ((struct page *) __va(pud_val(pud) & PAGE_MASK))
+
+-#define pud_page_kernel(pud) \
++#define pud_page_vaddr(pud) \
+ ((unsigned long) __va(pud_val(pud) & PAGE_MASK))
+
+
+@@ -121,6 +112,7 @@ static inline void pte_clear(struct mm_s
+
+ #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
+
++#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+ pte_t pte = *ptep;
+@@ -139,6 +131,7 @@ static inline pte_t ptep_get_and_clear(s
+ return pte;
+ }
+
++#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+ #define ptep_clear_flush(vma, addr, ptep) \
+ ({ \
+ pte_t *__ptep = (ptep); \
+@@ -156,6 +149,7 @@ static inline pte_t ptep_get_and_clear(s
+ __res; \
+ })
+
++#define __HAVE_ARCH_PTE_SAME
+ static inline int pte_same(pte_t a, pte_t b)
+ {
+ return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable.h 2007-10-22 13:53:08.000000000 +0200
+@@ -256,31 +256,89 @@ static inline pte_t pte_mkhuge(pte_t pte
+ # include <asm/pgtable-2level.h>
+ #endif
+
+-#define ptep_test_and_clear_dirty(vma, addr, ptep) \
++/*
++ * Rules for using pte_update - it must be called after any PTE update which
++ * has not been done using the set_pte / clear_pte interfaces. It is used by
++ * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
++ * updates should either be sets, clears, or set_pte_atomic for P->P
++ * transitions, which means this hook should only be called for user PTEs.
++ * This hook implies a P->P protection or access change has taken place, which
++ * requires a subsequent TLB flush. The notification can optionally be delayed
++ * until the TLB flush event by using the pte_update_defer form of the
++ * interface, but care must be taken to assure that the flush happens while
++ * still holding the same page table lock so that the shadow and primary pages
++ * do not become out of sync on SMP.
++ */
++#define pte_update(mm, addr, ptep) do { } while (0)
++#define pte_update_defer(mm, addr, ptep) do { } while (0)
++
++
++/*
++ * We only update the dirty/accessed state if we set
++ * the dirty bit by hand in the kernel, since the hardware
++ * will do the accessed bit for us, and we don't want to
++ * race with other CPU's that might be updating the dirty
++ * bit at the same time.
++ */
++#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
++#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
++do { \
++ if (dirty) \
++ ptep_establish(vma, address, ptep, entry); \
++} while (0)
++
++/*
++ * We don't actually have these, but we want to advertise them so that
++ * we can encompass the flush here.
++ */
++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
++
++/*
++ * Rules for using ptep_establish: the pte MUST be a user pte, and
++ * must be a present->present transition.
++ */
++#define __HAVE_ARCH_PTEP_ESTABLISH
++#define ptep_establish(vma, address, ptep, pteval) \
++do { \
++ if ( likely((vma)->vm_mm == current->mm) ) { \
++ BUG_ON(HYPERVISOR_update_va_mapping(address, \
++ pteval, \
++ (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
++ UVMF_INVLPG|UVMF_MULTI)); \
++ } else { \
++ xen_l1_entry_update(ptep, pteval); \
++ flush_tlb_page(vma, address); \
++ } \
++} while (0)
++
++#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
++#define ptep_clear_flush_dirty(vma, address, ptep) \
+ ({ \
+ pte_t __pte = *(ptep); \
+- int __ret = pte_dirty(__pte); \
+- if (__ret) { \
+- __pte = pte_mkclean(__pte); \
+- if ((vma)->vm_mm != current->mm || \
+- HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
+- (ptep)->pte_low = __pte.pte_low; \
+- } \
+- __ret; \
++ int __dirty = pte_dirty(__pte); \
++ __pte = pte_mkclean(__pte); \
++ if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
++ ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
++ else if (__dirty) \
++ (ptep)->pte_low = __pte.pte_low; \
++ __dirty; \
+ })
+
+-#define ptep_test_and_clear_young(vma, addr, ptep) \
++#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
++#define ptep_clear_flush_young(vma, address, ptep) \
+ ({ \
+ pte_t __pte = *(ptep); \
+- int __ret = pte_young(__pte); \
+- if (__ret) \
+- __pte = pte_mkold(__pte); \
+- if ((vma)->vm_mm != current->mm || \
+- HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
+- (ptep)->pte_low = __pte.pte_low; \
+- __ret; \
++ int __young = pte_young(__pte); \
++ __pte = pte_mkold(__pte); \
++ if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
++ ptep_set_access_flags(vma, address, ptep, __pte, __young); \
++ else if (__young) \
++ (ptep)->pte_low = __pte.pte_low; \
++ __young; \
+ })
+
++#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+ #define ptep_get_and_clear_full(mm, addr, ptep, full) \
+ ((full) ? ({ \
+ pte_t __res = *(ptep); \
+@@ -292,6 +350,7 @@ static inline pte_t pte_mkhuge(pte_t pte
+ }) : \
+ ptep_get_and_clear(mm, addr, ptep))
+
++#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+ pte_t pte = *ptep;
+@@ -387,11 +446,11 @@ static inline pte_t pte_modify(pte_t pte
+ #define pte_index(address) \
+ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ #define pte_offset_kernel(dir, address) \
+- ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
++ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
+
+ #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+
+-#define pmd_page_kernel(pmd) \
++#define pmd_page_vaddr(pmd) \
+ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
+ /*
+@@ -414,8 +473,6 @@ extern pte_t *lookup_address(unsigned lo
+ static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
+ #endif
+
+-extern void noexec_setup(const char *str);
+-
+ #if defined(CONFIG_HIGHPTE)
+ #define pte_offset_map(dir, address) \
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
+@@ -433,37 +490,15 @@ extern void noexec_setup(const char *str
+ #define pte_unmap_nested(pte) do { } while (0)
+ #endif
+
+-#define __HAVE_ARCH_PTEP_ESTABLISH
+-#define ptep_establish(vma, address, ptep, pteval) \
+- do { \
+- if ( likely((vma)->vm_mm == current->mm) ) { \
+- BUG_ON(HYPERVISOR_update_va_mapping(address, \
+- pteval, \
+- (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
+- UVMF_INVLPG|UVMF_MULTI)); \
+- } else { \
+- xen_l1_entry_update(ptep, pteval); \
+- flush_tlb_page(vma, address); \
+- } \
+- } while (0)
++/* Clear a kernel PTE and flush it from the TLB */
++#define kpte_clear_flush(ptep, vaddr) \
++ HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)
+
+ /*
+ * The i386 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+- *
+- * Also, we only update the dirty/accessed state if we set
+- * the dirty bit by hand in the kernel, since the hardware
+- * will do the accessed bit for us, and we don't want to
+- * race with other CPU's that might be updating the dirty
+- * bit at the same time.
+ */
+ #define update_mmu_cache(vma,address,pte) do { } while (0)
+-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+-#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
+- do { \
+- if (dirty) \
+- ptep_establish(vma, address, ptep, entry); \
+- } while (0)
+
+ #include <xen/features.h>
+ void make_lowmem_page_readonly(void *va, unsigned int feature);
+@@ -518,13 +553,6 @@ direct_remap_pfn_range(vma,from,pfn,size
+ #define GET_IOSPACE(pfn) 0
+ #define GET_PFN(pfn) (pfn)
+
+-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+-#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+-#define __HAVE_ARCH_PTE_SAME
+ #include <asm-generic/pgtable.h>
+
+ #endif /* _I386_PGTABLE_H */
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/processor.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/processor.h 2007-09-03 09:52:56.000000000 +0200
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/processor.h 2007-10-22 13:53:08.000000000 +0200
+@@ -146,6 +146,18 @@ static inline void detect_ht(struct cpui
+ #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+ #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+
++static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
++ unsigned int *ecx, unsigned int *edx)
++{
++ /* ecx is often an input as well as an output. */
++ __asm__(XEN_CPUID
++ : "=a" (*eax),
++ "=b" (*ebx),
++ "=c" (*ecx),
++ "=d" (*edx)
++ : "0" (*eax), "2" (*ecx));
++}
++
+ /*
+ * Generic CPUID function
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
+@@ -153,24 +165,18 @@ static inline void detect_ht(struct cpui
+ */
+ static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
+ {
+- __asm__(XEN_CPUID
+- : "=a" (*eax),
+- "=b" (*ebx),
+- "=c" (*ecx),
+- "=d" (*edx)
+- : "0" (op), "c"(0));
++ *eax = op;
++ *ecx = 0;
++ __cpuid(eax, ebx, ecx, edx);
+ }
+
+ /* Some CPUID calls want 'count' to be placed in ecx */
+ static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
+- int *edx)
++ int *edx)
+ {
+- __asm__(XEN_CPUID
+- : "=a" (*eax),
+- "=b" (*ebx),
+- "=c" (*ecx),
+- "=d" (*edx)
+- : "0" (op), "c" (count));
++ *eax = op;
++ *ecx = count;
++ __cpuid(eax, ebx, ecx, edx);
+ }
+
+ /*
+@@ -178,42 +184,30 @@ static inline void cpuid_count(int op, i
+ */
+ static inline unsigned int cpuid_eax(unsigned int op)
+ {
+- unsigned int eax;
++ unsigned int eax, ebx, ecx, edx;
+
+- __asm__(XEN_CPUID
+- : "=a" (eax)
+- : "0" (op)
+- : "bx", "cx", "dx");
++ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return eax;
+ }
+ static inline unsigned int cpuid_ebx(unsigned int op)
+ {
+- unsigned int eax, ebx;
++ unsigned int eax, ebx, ecx, edx;
+
+- __asm__(XEN_CPUID
+- : "=a" (eax), "=b" (ebx)
+- : "0" (op)
+- : "cx", "dx" );
++ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return ebx;
+ }
+ static inline unsigned int cpuid_ecx(unsigned int op)
+ {
+- unsigned int eax, ecx;
++ unsigned int eax, ebx, ecx, edx;
+
+- __asm__(XEN_CPUID
+- : "=a" (eax), "=c" (ecx)
+- : "0" (op)
+- : "bx", "dx" );
++ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return ecx;
+ }
+ static inline unsigned int cpuid_edx(unsigned int op)
+ {
+- unsigned int eax, edx;
++ unsigned int eax, ebx, ecx, edx;
+
+- __asm__(XEN_CPUID
+- : "=a" (eax), "=d" (edx)
+- : "0" (op)
+- : "bx", "cx");
++ cpuid(op, &eax, &ebx, &ecx, &edx);
+ return edx;
+ }
+
+@@ -315,6 +309,8 @@ static inline void __mwait(unsigned long
+ : :"a" (eax), "c" (ecx));
+ }
+
++extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
++
+ /* from system description table in BIOS. Mostly for MCA use, but
+ others may find it useful. */
+ extern unsigned int machine_id;
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/ptrace.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/ptrace.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/ptrace.h 2007-10-22 13:53:08.000000000 +0200
+@@ -1,24 +1,7 @@
+ #ifndef _I386_PTRACE_H
+ #define _I386_PTRACE_H
+
+-#define EBX 0
+-#define ECX 1
+-#define EDX 2
+-#define ESI 3
+-#define EDI 4
+-#define EBP 5
+-#define EAX 6
+-#define DS 7
+-#define ES 8
+-#define FS 9
+-#define GS 10
+-#define ORIG_EAX 11
+-#define EIP 12
+-#define CS 13
+-#define EFL 14
+-#define UESP 15
+-#define SS 16
+-#define FRAME_SIZE 17
++#include <asm/ptrace-abi.h>
+
+ /* this struct defines the way the registers are stored on the
+ stack during a system call. */
+@@ -41,25 +24,10 @@ struct pt_regs {
+ int xss;
+ };
+
+-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+-#define PTRACE_GETREGS 12
+-#define PTRACE_SETREGS 13
+-#define PTRACE_GETFPREGS 14
+-#define PTRACE_SETFPREGS 15
+-#define PTRACE_GETFPXREGS 18
+-#define PTRACE_SETFPXREGS 19
+-
+-#define PTRACE_OLDSETOPTIONS 21
+-
+-#define PTRACE_GET_THREAD_AREA 25
+-#define PTRACE_SET_THREAD_AREA 26
+-
+-#define PTRACE_SYSEMU 31
+-#define PTRACE_SYSEMU_SINGLESTEP 32
+-
+ #ifdef __KERNEL__
+
+ #include <asm/vm86.h>
++#include <asm/segment.h>
+
+ struct task_struct;
+ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code);
+@@ -73,18 +41,17 @@ extern void send_sigtrap(struct task_str
+ */
+ static inline int user_mode(struct pt_regs *regs)
+ {
+- return (regs->xcs & 2) != 0;
++ return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL;
+ }
+ static inline int user_mode_vm(struct pt_regs *regs)
+ {
+- return ((regs->xcs & 2) | (regs->eflags & VM_MASK)) != 0;
++ return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
+ }
++
+ #define instruction_pointer(regs) ((regs)->eip)
+-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
++#define regs_return_value(regs) ((regs)->eax)
++
+ extern unsigned long profile_pc(struct pt_regs *regs);
+-#else
+-#define profile_pc(regs) instruction_pointer(regs)
+-#endif
+ #endif /* __KERNEL__ */
+
+ #endif
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/segment.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/segment.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/segment.h 2007-10-22 13:53:08.000000000 +0200
+@@ -61,11 +61,9 @@
+
+ #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0)
+ #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8)
+-#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
+
+ #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1)
+ #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8)
+-#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) )
+
+ #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4)
+ #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5)
+@@ -85,6 +83,11 @@
+
+ #define GDT_SIZE (GDT_ENTRIES * 8)
+
++/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */
++#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8)
++/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
++#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
++
+ /* Simple and small GDT entries for booting only */
+
+ #define GDT_ENTRY_BOOT_CS 2
+@@ -114,4 +117,16 @@
+ */
+ #define IDT_ENTRIES 256
+
++/* Bottom two bits of selector give the ring privilege level */
++#define SEGMENT_RPL_MASK 0x3
++/* Bit 2 is table indicator (LDT/GDT) */
++#define SEGMENT_TI_MASK 0x4
++
++/* User mode is privilege level 3 */
++#define USER_RPL 0x3
++/* LDT segment has TI set, GDT has it cleared */
++#define SEGMENT_LDT 0x4
++#define SEGMENT_GDT 0x0
++
++#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
+ #endif
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/smp.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/smp.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/smp.h 2007-10-22 13:53:08.000000000 +0200
+@@ -79,25 +79,36 @@ static inline int hard_smp_processor_id(
+ return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+ }
+ #endif
+-
+-static __inline int logical_smp_processor_id(void)
+-{
+- /* we don't want to mark this access volatile - bad code generation */
+- return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+-}
+-
+ #endif
+
++extern int safe_smp_processor_id(void);
+ extern int __cpu_disable(void);
+ extern void __cpu_die(unsigned int cpu);
+ extern void prefill_possible_map(void);
++extern unsigned int num_processors;
++
+ #endif /* !__ASSEMBLY__ */
+
+ #else /* CONFIG_SMP */
+
++#define safe_smp_processor_id() 0
+ #define cpu_physical_id(cpu) boot_cpu_physical_apicid
+
+ #define NO_PROC_ID 0xFF /* No processor magic marker */
+
+ #endif
++
++#ifndef __ASSEMBLY__
++
++extern u8 apicid_2_node[];
++
++#ifdef CONFIG_X86_LOCAL_APIC
++static __inline int logical_smp_processor_id(void)
++{
++ /* we don't want to mark this access volatile - bad code generation */
++ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
++}
++#endif
++#endif
++
+ #endif
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/spinlock.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/spinlock.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/spinlock.h 2007-10-22 13:53:08.000000000 +0200
+@@ -4,8 +4,12 @@
+ #include <asm/atomic.h>
+ #include <asm/rwlock.h>
+ #include <asm/page.h>
++#include <asm/processor.h>
+ #include <linux/compiler.h>
+
++#define CLI_STRING "#cli"
++#define STI_STRING "#sti"
++
+ /*
+ * Your basic SMP spinlocks, allowing only a single CPU anywhere
+ *
+@@ -17,67 +21,64 @@
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+
+-#define __raw_spin_is_locked(x) \
+- (*(volatile signed char *)(&(x)->slock) <= 0)
+-
+-#define __raw_spin_lock_string \
+- "\n1:\t" \
+- LOCK_PREFIX " ; decb %0\n\t" \
+- "jns 3f\n" \
+- "2:\t" \
+- "rep;nop\n\t" \
+- "cmpb $0,%0\n\t" \
+- "jle 2b\n\t" \
+- "jmp 1b\n" \
+- "3:\n\t"
+-
+-/*
+- * NOTE: there's an irqs-on section here, which normally would have to be
+- * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use
+- * __raw_spin_lock_string_flags().
+- */
+-#define __raw_spin_lock_string_flags \
+- "\n1:\t" \
+- LOCK_PREFIX " ; decb %0\n\t" \
+- "jns 5f\n" \
+- "2:\t" \
+- "testl $0x200, %1\n\t" \
+- "jz 4f\n\t" \
+- "#sti\n" \
+- "3:\t" \
+- "rep;nop\n\t" \
+- "cmpb $0, %0\n\t" \
+- "jle 3b\n\t" \
+- "#cli\n\t" \
+- "jmp 1b\n" \
+- "4:\t" \
+- "rep;nop\n\t" \
+- "cmpb $0, %0\n\t" \
+- "jg 1b\n\t" \
+- "jmp 4b\n" \
+- "5:\n\t"
++static inline int __raw_spin_is_locked(raw_spinlock_t *x)
++{
++ return *(volatile signed char *)(&(x)->slock) <= 0;
++}
+
+ static inline void __raw_spin_lock(raw_spinlock_t *lock)
+ {
+- asm(__raw_spin_lock_string : "+m" (lock->slock) : : "memory");
++ asm volatile("\n1:\n" \
++ LOCK_PREFIX "decb %0\n\t"
++ "jns 3f\n"
++ "2:\t"
++ "rep;nop\n\t"
++ "cmpb $0,%0\n\t"
++ "jle 2b\n\t"
++ "jmp 1b\n"
++ "3:\n\t"
++ : "+m" (lock->slock) : : "memory");
+ }
+
+ /*
+ * It is easier for the lock validator if interrupts are not re-enabled
+ * in the middle of a lock-acquire. This is a performance feature anyway
+ * so we turn it off:
++ *
++ * NOTE: there's an irqs-on section here, which normally would have to be
++ * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant.
+ */
+ #ifndef CONFIG_PROVE_LOCKING
+ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+ {
+- asm(__raw_spin_lock_string_flags : "+m" (lock->slock) : "r" (flags) : "memory");
++ asm volatile(
++ "\n1:\t"
++ LOCK_PREFIX "decb %0\n\t"
++ "jns 5f\n"
++ "2:\t"
++ "testl $0x200, %1\n\t"
++ "jz 4f\n\t"
++ STI_STRING "\n"
++ "3:\t"
++ "rep;nop\n\t"
++ "cmpb $0, %0\n\t"
++ "jle 3b\n\t"
++ CLI_STRING "\n\t"
++ "jmp 1b\n"
++ "4:\t"
++ "rep;nop\n\t"
++ "cmpb $0, %0\n\t"
++ "jg 1b\n\t"
++ "jmp 4b\n"
++ "5:\n\t"
++ : "+m" (lock->slock) : "r" (flags) : "memory");
+ }
+ #endif
+
+ static inline int __raw_spin_trylock(raw_spinlock_t *lock)
+ {
+ char oldval;
+- __asm__ __volatile__(
++ asm volatile(
+ "xchgb %b0,%1"
+ :"=q" (oldval), "+m" (lock->slock)
+ :"0" (0) : "memory");
+@@ -93,38 +94,29 @@ static inline int __raw_spin_trylock(raw
+
+ #if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
+
+-#define __raw_spin_unlock_string \
+- "movb $1,%0" \
+- :"+m" (lock->slock) : : "memory"
+-
+-
+ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+ {
+- __asm__ __volatile__(
+- __raw_spin_unlock_string
+- );
++ asm volatile("movb $1,%0" : "+m" (lock->slock) :: "memory");
+ }
+
+ #else
+
+-#define __raw_spin_unlock_string \
+- "xchgb %b0, %1" \
+- :"=q" (oldval), "+m" (lock->slock) \
+- :"0" (oldval) : "memory"
+-
+ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
+ {
+ char oldval = 1;
+
+- __asm__ __volatile__(
+- __raw_spin_unlock_string
+- );
++ asm volatile("xchgb %b0, %1"
++ : "=q" (oldval), "+m" (lock->slock)
++ : "0" (oldval) : "memory");
+ }
+
+ #endif
+
+-#define __raw_spin_unlock_wait(lock) \
+- do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
++static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
++{
++ while (__raw_spin_is_locked(lock))
++ cpu_relax();
++}
+
+ /*
+ * Read-write spinlocks, allowing multiple readers
+@@ -151,22 +143,36 @@ static inline void __raw_spin_unlock(raw
+ * read_can_lock - would read_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+-#define __raw_read_can_lock(x) ((int)(x)->lock > 0)
++static inline int __raw_read_can_lock(raw_rwlock_t *x)
++{
++ return (int)(x)->lock > 0;
++}
+
+ /**
+ * write_can_lock - would write_trylock() succeed?
+ * @lock: the rwlock in question.
+ */
+-#define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS)
++static inline int __raw_write_can_lock(raw_rwlock_t *x)
++{
++ return (x)->lock == RW_LOCK_BIAS;
++}
+
+ static inline void __raw_read_lock(raw_rwlock_t *rw)
+ {
+- __build_read_lock(rw, "__read_lock_failed");
++ asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
++ "jns 1f\n"
++ "call __read_lock_failed\n\t"
++ "1:\n"
++ ::"a" (rw) : "memory");
+ }
+
+ static inline void __raw_write_lock(raw_rwlock_t *rw)
+ {
+- __build_write_lock(rw, "__write_lock_failed");
++ asm volatile(LOCK_PREFIX " subl $" RW_LOCK_BIAS_STR ",(%0)\n\t"
++ "jz 1f\n"
++ "call __write_lock_failed\n\t"
++ "1:\n"
++ ::"a" (rw) : "memory");
+ }
+
+ static inline int __raw_read_trylock(raw_rwlock_t *lock)
+@@ -199,4 +205,8 @@ static inline void __raw_write_unlock(ra
+ : "+m" (rw->lock) : : "memory");
+ }
+
++#define _raw_spin_relax(lock) cpu_relax()
++#define _raw_read_relax(lock) cpu_relax()
++#define _raw_write_relax(lock) cpu_relax()
++
+ #endif /* __ASM_SPINLOCK_H */
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/system.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/system.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/system.h 2007-10-22 13:53:08.000000000 +0200
+@@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsig
+ #define cmpxchg(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+ (unsigned long)(n),sizeof(*(ptr))))
++#define sync_cmpxchg(ptr,o,n)\
++ ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
++ (unsigned long)(n),sizeof(*(ptr))))
+ #endif
+
+ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+@@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(vo
+ return old;
+ }
+
++/*
++ * Always use locked operations when touching memory shared with a
++ * hypervisor, since the system may be SMP even if the guest kernel
++ * isn't.
++ */
++static inline unsigned long __sync_cmpxchg(volatile void *ptr,
++ unsigned long old,
++ unsigned long new, int size)
++{
++ unsigned long prev;
++ switch (size) {
++ case 1:
++ __asm__ __volatile__("lock; cmpxchgb %b1,%2"
++ : "=a"(prev)
++ : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ : "memory");
++ return prev;
++ case 2:
++ __asm__ __volatile__("lock; cmpxchgw %w1,%2"
++ : "=a"(prev)
++ : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ : "memory");
++ return prev;
++ case 4:
++ __asm__ __volatile__("lock; cmpxchgl %1,%2"
++ : "=a"(prev)
++ : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ : "memory");
++ return prev;
++ }
++ return old;
++}
++
+ #ifndef CONFIG_X86_CMPXCHG
+ /*
+ * Building a kernel capable running on 80386. It may be necessary to
+Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/tlbflush.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/tlbflush.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/tlbflush.h 2007-10-22 13:53:08.000000000 +0200
+@@ -8,8 +8,6 @@
+ #define __flush_tlb_global() xen_tlb_flush()
+ #define __flush_tlb_all() xen_tlb_flush()
+
+-extern unsigned long pgkern_mask;
+-
+ #define cpu_has_invlpg (boot_cpu_data.x86 > 3)
+
+ #define __flush_tlb_single(addr) xen_invlpg(addr)
+Index: 10.3-2007-11-26/include/asm-x86_64/acpi.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/acpi.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/acpi.h 2007-10-22 13:53:08.000000000 +0200
+@@ -136,7 +136,9 @@ extern void acpi_reserve_bootmem(void);
+ extern int acpi_disabled;
+ extern int acpi_pci_disabled;
+
++#ifndef CONFIG_XEN
+ #define ARCH_HAS_POWER_INIT 1
++#endif
+
+ extern int acpi_skip_timer_override;
+ extern int acpi_use_timer_override;
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/arch_hooks.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/arch_hooks.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/arch_hooks.h 2007-10-22 13:53:08.000000000 +0200
+@@ -14,7 +14,7 @@
+ extern void init_ISA_irqs(void);
+ extern void apic_intr_init(void);
+ extern void smp_intr_init(void);
+-extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs);
++extern irqreturn_t timer_interrupt(int irq, void *dev_id);
+
+ /* these are the defined hooks */
+ extern void intr_init_hook(void);
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/dma-mapping.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/dma-mapping.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/dma-mapping.h 2007-10-22 13:53:08.000000000 +0200
+@@ -55,13 +55,6 @@ extern dma_addr_t bad_dma_address;
+ extern struct dma_mapping_ops* dma_ops;
+ extern int iommu_merge;
+
+-static inline int valid_dma_direction(int dma_direction)
+-{
+- return ((dma_direction == DMA_BIDIRECTIONAL) ||
+- (dma_direction == DMA_TO_DEVICE) ||
+- (dma_direction == DMA_FROM_DEVICE));
+-}
+-
+ #if 0
+ static inline int dma_mapping_error(dma_addr_t dma_addr)
+ {
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/e820.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/e820.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/e820.h 2007-10-22 13:53:08.000000000 +0200
+@@ -19,13 +19,9 @@
+
+ #define E820_RAM 1
+ #define E820_RESERVED 2
+-#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */
++#define E820_ACPI 3
+ #define E820_NVS 4
+
+-#define HIGH_MEMORY (1024*1024)
+-
+-#define LOWMEMSIZE() (0x9f000)
+-
+ #ifndef __ASSEMBLY__
+ struct e820entry {
+ u64 addr; /* start of memory segment */
+@@ -46,17 +42,16 @@ extern void setup_memory_region(void);
+ extern void contig_e820_setup(void);
+ extern unsigned long e820_end_of_ram(void);
+ extern void e820_reserve_resources(struct e820entry *e820, int nr_map);
++extern void e820_mark_nosave_regions(void);
+ extern void e820_print_map(char *who);
+ extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
+ extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
+
+-extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end);
+ extern void e820_setup_gap(struct e820entry *e820, int nr_map);
+-extern unsigned long e820_hole_size(unsigned long start_pfn,
+- unsigned long end_pfn);
++extern void e820_register_active_regions(int nid,
++ unsigned long start_pfn, unsigned long end_pfn);
+
+-extern void __init parse_memopt(char *p, char **end);
+-extern void __init parse_memmapopt(char *p, char **end);
++extern void finish_e820_parsing(void);
+
+ extern struct e820map e820;
+
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/fixmap.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/fixmap.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/fixmap.h 2007-10-22 13:53:08.000000000 +0200
+@@ -41,7 +41,7 @@ enum fixed_addresses {
+ #ifdef CONFIG_X86_LOCAL_APIC
+ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
+ #endif
+-#ifdef CONFIG_X86_IO_APIC
++#ifndef CONFIG_XEN
+ FIX_IO_APIC_BASE_0,
+ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+ #endif
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/floppy.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/floppy.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/floppy.h 2007-10-22 13:53:08.000000000 +0200
+@@ -54,7 +54,7 @@ static char *virtual_dma_addr;
+ static int virtual_dma_mode;
+ static int doing_pdma;
+
+-static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs)
++static irqreturn_t floppy_hardint(int irq, void *dev_id)
+ {
+ register unsigned char st;
+
+@@ -66,7 +66,7 @@ static irqreturn_t floppy_hardint(int ir
+ static int dma_wait=0;
+ #endif
+ if (!doing_pdma)
+- return floppy_interrupt(irq, dev_id, regs);
++ return floppy_interrupt(irq, dev_id);
+
+ #ifdef TRACE_FLPY_INT
+ if(!calls)
+@@ -109,7 +109,7 @@ static irqreturn_t floppy_hardint(int ir
+ dma_wait=0;
+ #endif
+ doing_pdma = 0;
+- floppy_interrupt(irq, dev_id, regs);
++ floppy_interrupt(irq, dev_id);
+ return IRQ_HANDLED;
+ }
+ #ifdef TRACE_FLPY_INT
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/hw_irq.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/hw_irq.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/hw_irq.h 2007-10-22 13:53:08.000000000 +0200
+@@ -19,8 +19,7 @@
+ #include <asm/irq.h>
+ #include <linux/profile.h>
+ #include <linux/smp.h>
+-
+-struct hw_interrupt_type;
++#include <linux/percpu.h>
+ #endif
+
+ #define NMI_VECTOR 0x02
+@@ -77,9 +76,10 @@ struct hw_interrupt_type;
+
+
+ #ifndef __ASSEMBLY__
+-extern u8 irq_vector[NR_IRQ_VECTORS];
+-#define IO_APIC_VECTOR(irq) (irq_vector[irq])
+-#define AUTO_ASSIGN -1
++typedef int vector_irq_t[NR_VECTORS];
++DECLARE_PER_CPU(vector_irq_t, vector_irq);
++extern void __setup_vector_irq(int cpu);
++extern spinlock_t vector_lock;
+
+ /*
+ * Various low-level irq details needed by irq.c, process.c,
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/io.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/io.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/io.h 2007-10-22 13:53:08.000000000 +0200
+@@ -273,33 +273,6 @@ void memset_io(volatile void __iomem *a,
+
+ #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d))
+
+-/**
+- * check_signature - find BIOS signatures
+- * @io_addr: mmio address to check
+- * @signature: signature block
+- * @length: length of signature
+- *
+- * Perform a signature comparison with the mmio address io_addr. This
+- * address should have been obtained by ioremap.
+- * Returns 1 on a match.
+- */
+-
+-static inline int check_signature(void __iomem *io_addr,
+- const unsigned char *signature, int length)
+-{
+- int retval = 0;
+- do {
+- if (readb(io_addr) != *signature)
+- goto out;
+- io_addr++;
+- signature++;
+- length--;
+- } while (length);
+- retval = 1;
+-out:
+- return retval;
+-}
+-
+ /* Nothing to do */
+
+ #define dma_cache_inv(_start,_size) do { } while (0)
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/msr.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/msr.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/msr.h 2007-10-22 13:53:08.000000000 +0200
+@@ -66,14 +66,25 @@
+ #define rdtscl(low) \
+ __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx")
+
++#define rdtscp(low,high,aux) \
++ asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux))
++
+ #define rdtscll(val) do { \
+ unsigned int __a,__d; \
+ asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \
+ (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \
+ } while(0)
+
++#define rdtscpll(val, aux) do { \
++ unsigned long __a, __d; \
++ asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \
++ (val) = (__d << 32) | __a; \
++} while (0)
++
+ #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
+
++#define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0)
++
+ #define rdpmc(counter,low,high) \
+ __asm__ __volatile__("rdpmc" \
+ : "=a" (low), "=d" (high) \
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/nmi.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/nmi.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/nmi.h 2007-10-22 13:53:08.000000000 +0200
+@@ -9,24 +9,13 @@
+
+ #include <xen/interface/nmi.h>
+
+-struct pt_regs;
+-
+-typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
+-
+-/**
+- * set_nmi_callback
+- *
+- * Set a handler for an NMI. Only one handler may be
+- * set. Return 1 if the NMI was handled.
+- */
+-void set_nmi_callback(nmi_callback_t callback);
+-
+ /**
+- * unset_nmi_callback
++ * do_nmi_callback
+ *
+- * Remove the handler previously set.
++ * Check to see if a callback exists and execute it. Return 1
++ * if the handler exists and was handled successfully.
+ */
+-void unset_nmi_callback(void);
++int do_nmi_callback(struct pt_regs *regs, int cpu);
+
+ #ifdef CONFIG_PM
+
+@@ -50,7 +39,7 @@ static inline void unset_nmi_pm_callback
+ #endif /* CONFIG_PM */
+
+ extern void default_do_nmi(struct pt_regs *);
+-extern void die_nmi(char *str, struct pt_regs *regs);
++extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
+
+ static inline unsigned char get_nmi_reason(void)
+ {
+@@ -70,19 +59,26 @@ static inline unsigned char get_nmi_reas
+
+ extern int panic_on_timeout;
+ extern int unknown_nmi_panic;
++extern int nmi_watchdog_enabled;
+
+ extern int check_nmi_watchdog(void);
+-
+-extern void setup_apic_nmi_watchdog (void);
+-extern int reserve_lapic_nmi(void);
+-extern void release_lapic_nmi(void);
++extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
++extern int avail_to_resrv_perfctr_nmi(unsigned int);
++extern int reserve_perfctr_nmi(unsigned int);
++extern void release_perfctr_nmi(unsigned int);
++extern int reserve_evntsel_nmi(unsigned int);
++extern void release_evntsel_nmi(unsigned int);
++
++extern void setup_apic_nmi_watchdog (void *);
++extern void stop_apic_nmi_watchdog (void *);
+ extern void disable_timer_nmi_watchdog(void);
+ extern void enable_timer_nmi_watchdog(void);
+-extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
++extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
+
+ extern void nmi_watchdog_default(void);
+ extern int setup_nmi_watchdog(char *);
+
++extern atomic_t nmi_active;
+ extern unsigned int nmi_watchdog;
+ #define NMI_DEFAULT -1
+ #define NMI_NONE 0
+@@ -90,4 +86,11 @@ extern unsigned int nmi_watchdog;
+ #define NMI_LOCAL_APIC 2
+ #define NMI_INVALID 3
+
++struct ctl_table;
++struct file;
++extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
++ void __user *, size_t *, loff_t *);
++
++extern int unknown_nmi_panic;
++
+ #endif /* ASM_NMI_H */
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/pgtable.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-10-22 13:53:08.000000000 +0200
+@@ -44,12 +44,9 @@ extern unsigned long __supported_pte_mas
+
+ #define swapper_pg_dir init_level4_pgt
+
+-extern int nonx_setup(char *str);
+ extern void paging_init(void);
+ extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
+
+-extern unsigned long pgkern_mask;
+-
+ /*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+@@ -115,9 +112,6 @@ static inline void pgd_clear (pgd_t * pg
+ set_pgd(__user_pgd(pgd), __pgd(0));
+ }
+
+-#define pud_page(pud) \
+- ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
+-
+ #define pte_same(a, b) ((a).pte == (b).pte)
+
+ #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
+@@ -326,7 +320,7 @@ static inline pte_t ptep_get_and_clear_f
+ #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
+ static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
+ static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
+-static inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
++static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
+ static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
+ static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
+ static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
+@@ -339,29 +333,12 @@ static inline pte_t pte_mkclean(pte_t pt
+ static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+ static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
+ static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
+-static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
++static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; }
+ static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
+ static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+ static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
+ static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
+-
+-#define ptep_test_and_clear_dirty(vma, addr, ptep) \
+-({ \
+- pte_t __pte = *(ptep); \
+- int __ret = pte_dirty(__pte); \
+- if (__ret) \
+- set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \
+- __ret; \
+-})
+-
+-#define ptep_test_and_clear_young(vma, addr, ptep) \
+-({ \
+- pte_t __pte = *(ptep); \
+- int __ret = pte_young(__pte); \
+- if (__ret) \
+- set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \
+- __ret; \
+-})
++static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
+
+ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+ {
+@@ -389,7 +366,8 @@ static inline int pmd_large(pmd_t pte) {
+ * Level 4 access.
+ * Never use these in the common code.
+ */
+-#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
++#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
++#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
+ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+ #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
+ #define pgd_offset_k(address) (pgd_t *)(init_level4_pgt + pgd_index(address))
+@@ -398,16 +376,18 @@ static inline int pmd_large(pmd_t pte) {
+
+ /* PUD - Level3 access */
+ /* to find an entry in a page-table-directory. */
++#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
++#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
+ #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+-#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
++#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
+ #define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT)
+
+ /* PMD - Level 2 access */
+-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
++#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
+ #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+
+ #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+-#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
++#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
+ pmd_index(address))
+ #define pmd_none(x) (!pmd_val(x))
+ #if CONFIG_XEN_COMPAT <= 0x030002
+@@ -438,6 +418,7 @@ static inline pte_t mk_pte_phys(unsigned
+ {
+ unsigned long pteval;
+ pteval = physpage | pgprot_val(pgprot);
++ pteval &= __supported_pte_mask;
+ return __pte(pteval);
+ }
+
+@@ -459,7 +440,7 @@ static inline pte_t pte_modify(pte_t pte
+
+ #define pte_index(address) \
+ (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
++#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
+ pte_index(address))
+
+ /* x86-64 always has all page tables mapped. */
+@@ -500,6 +481,40 @@ static inline pte_t pte_modify(pte_t pte
+ ptep_establish(vma, address, ptep, entry); \
+ } while (0)
+
++
++/*
++ * i386 says: We don't actually have these, but we want to advertise
++ * them so that we can encompass the flush here.
++ */
++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
++
++#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
++#define ptep_clear_flush_dirty(vma, address, ptep) \
++({ \
++ pte_t __pte = *(ptep); \
++ int __dirty = pte_dirty(__pte); \
++ __pte = pte_mkclean(__pte); \
++ if ((vma)->vm_mm->context.pinned) \
++ ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
++ else if (__dirty) \
++ set_pte(ptep, __pte); \
++ __dirty; \
++})
++
++#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
++#define ptep_clear_flush_young(vma, address, ptep) \
++({ \
++ pte_t __pte = *(ptep); \
++ int __young = pte_young(__pte); \
++ __pte = pte_mkold(__pte); \
++ if ((vma)->vm_mm->context.pinned) \
++ ptep_set_access_flags(vma, address, ptep, __pte, __young); \
++ else if (__young) \
++ set_pte(ptep, __pte); \
++ __young; \
++})
++
+ /* Encode and de-code a swap entry */
+ #define __swp_type(x) (((x).val >> 1) & 0x3f)
+ #define __swp_offset(x) ((x).val >> 8)
+@@ -560,8 +575,6 @@ int touch_pte_range(struct mm_struct *mm
+ #define kc_offset_to_vaddr(o) \
+ (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
+
+-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+ #define __HAVE_ARCH_PTEP_CLEAR_FLUSH
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/processor.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/processor.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/processor.h 2007-10-22 13:53:08.000000000 +0200
+@@ -488,6 +488,8 @@ static inline void __mwait(unsigned long
+ : :"a" (eax), "c" (ecx));
+ }
+
++extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
++
+ #define stack_current() \
+ ({ \
+ struct thread_info *ti; \
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/ptrace.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/ptrace.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/ptrace.h 2007-10-22 13:53:08.000000000 +0200
+@@ -1,40 +1,9 @@
+ #ifndef _X86_64_PTRACE_H
+ #define _X86_64_PTRACE_H
+
+-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+-#define R15 0
+-#define R14 8
+-#define R13 16
+-#define R12 24
+-#define RBP 32
+-#define RBX 40
+-/* arguments: interrupts/non tracing syscalls only save upto here*/
+-#define R11 48
+-#define R10 56
+-#define R9 64
+-#define R8 72
+-#define RAX 80
+-#define RCX 88
+-#define RDX 96
+-#define RSI 104
+-#define RDI 112
+-#define ORIG_RAX 120 /* = ERROR */
+-/* end of arguments */
+-/* cpu exception frame or undefined in case of fast syscall. */
+-#define RIP 128
+-#define CS 136
+-#define EFLAGS 144
+-#define RSP 152
+-#define SS 160
+-#define ARGOFFSET R11
+-#endif /* __ASSEMBLY__ */
++#include <asm/ptrace-abi.h>
+
+-/* top of stack page */
+-#define FRAME_SIZE 168
+-
+-#define PTRACE_OLDSETOPTIONS 21
+-
+-#ifndef __ASSEMBLY__
++#ifndef __ASSEMBLY__
+
+ struct pt_regs {
+ unsigned long r15;
+@@ -45,7 +14,7 @@ struct pt_regs {
+ unsigned long rbx;
+ /* arguments: non interrupts/non tracing syscalls only save upto here*/
+ unsigned long r11;
+- unsigned long r10;
++ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
+ unsigned long rax;
+@@ -54,41 +23,25 @@ struct pt_regs {
+ unsigned long rsi;
+ unsigned long rdi;
+ unsigned long orig_rax;
+-/* end of arguments */
++/* end of arguments */
+ /* cpu exception frame or undefined */
+ unsigned long rip;
+ unsigned long cs;
+- unsigned long eflags;
+- unsigned long rsp;
++ unsigned long eflags;
++ unsigned long rsp;
+ unsigned long ss;
+-/* top of stack page */
++/* top of stack page */
+ };
+
+ #endif
+
+-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */
+-#define PTRACE_GETREGS 12
+-#define PTRACE_SETREGS 13
+-#define PTRACE_GETFPREGS 14
+-#define PTRACE_SETFPREGS 15
+-#define PTRACE_GETFPXREGS 18
+-#define PTRACE_SETFPXREGS 19
+-
+-/* only useful for access 32bit programs */
+-#define PTRACE_GET_THREAD_AREA 25
+-#define PTRACE_SET_THREAD_AREA 26
+-
+-#define PTRACE_ARCH_PRCTL 30 /* arch_prctl for child */
+-
+ #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+ #define user_mode(regs) (!!((regs)->cs & 3))
+ #define user_mode_vm(regs) user_mode(regs)
+ #define instruction_pointer(regs) ((regs)->rip)
+-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
++#define regs_return_value(regs) ((regs)->rax)
++
+ extern unsigned long profile_pc(struct pt_regs *regs);
+-#else
+-#define profile_pc(regs) instruction_pointer(regs)
+-#endif
+
+ #include <linux/compiler.h>
+
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/smp.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/smp.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/smp.h 2007-10-22 13:53:08.000000000 +0200
+@@ -4,15 +4,12 @@
+ /*
+ * We need the APIC definitions automatically as part of 'smp.h'
+ */
+-#ifndef __ASSEMBLY__
+ #include <linux/threads.h>
+ #include <linux/cpumask.h>
+ #include <linux/bitops.h>
+ extern int disable_apic;
+-#endif
+
+ #ifdef CONFIG_X86_LOCAL_APIC
+-#ifndef __ASSEMBLY__
+ #include <asm/fixmap.h>
+ #include <asm/mpspec.h>
+ #ifdef CONFIG_X86_IO_APIC
+@@ -21,10 +18,8 @@ extern int disable_apic;
+ #include <asm/apic.h>
+ #include <asm/thread_info.h>
+ #endif
+-#endif
+
+ #ifdef CONFIG_SMP
+-#ifndef ASSEMBLY
+
+ #include <asm/pda.h>
+
+@@ -41,14 +36,11 @@ extern cpumask_t cpu_initialized;
+
+ extern void smp_alloc_memory(void);
+ extern volatile unsigned long smp_invalidate_needed;
+-extern int pic_mode;
+ extern void lock_ipi_call_lock(void);
+ extern void unlock_ipi_call_lock(void);
+ extern int smp_num_siblings;
+ extern void smp_send_reschedule(int cpu);
+ void smp_stop_cpu(void);
+-extern int smp_call_function_single(int cpuid, void (*func) (void *info),
+- void *info, int retry, int wait);
+
+ extern cpumask_t cpu_sibling_map[NR_CPUS];
+ extern cpumask_t cpu_core_map[NR_CPUS];
+@@ -77,20 +69,16 @@ static inline int hard_smp_processor_id(
+ }
+ #endif
+
+-extern int safe_smp_processor_id(void);
+ extern int __cpu_disable(void);
+ extern void __cpu_die(unsigned int cpu);
+ extern void prefill_possible_map(void);
+ extern unsigned num_processors;
+ extern unsigned disabled_cpus;
+
+-#endif /* !ASSEMBLY */
+-
+ #define NO_PROC_ID 0xFF /* No processor magic marker */
+
+ #endif
+
+-#ifndef ASSEMBLY
+ /*
+ * Some lowlevel functions might want to know about
+ * the real APIC ID <-> CPU # mapping.
+@@ -114,11 +102,8 @@ static inline int cpu_present_to_apicid(
+ }
+ #endif
+
+-#endif /* !ASSEMBLY */
+-
+ #ifndef CONFIG_SMP
+ #define stack_smp_processor_id() 0
+-#define safe_smp_processor_id() 0
+ #define cpu_logical_map(x) (x)
+ #else
+ #include <asm/thread_info.h>
+@@ -130,7 +115,6 @@ static inline int cpu_present_to_apicid(
+ })
+ #endif
+
+-#ifndef __ASSEMBLY__
+ #ifdef CONFIG_X86_LOCAL_APIC
+ static __inline int logical_smp_processor_id(void)
+ {
+@@ -138,13 +122,18 @@ static __inline int logical_smp_processo
+ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
+ }
+ #endif
+-#endif
+
+ #ifdef CONFIG_SMP
+ #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
+ #else
+ #define cpu_physical_id(cpu) boot_cpu_id
+-#endif
+-
++static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
++ void *info, int retry, int wait)
++{
++ /* Disable interrupts here? */
++ func(info);
++ return 0;
++}
++#endif /* !CONFIG_SMP */
+ #endif
+
+Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/system.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/system.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/system.h 2007-10-22 13:53:08.000000000 +0200
+@@ -24,12 +24,13 @@
+ #define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
+
+ /* frame pointer must be last for get_wchan */
+-#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
+-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\n\t"
++#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t"
++#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t"
+
+ #define __EXTRA_CLOBBER \
+ ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
+
++/* Save restore flags to clear handle leaking NT */
+ #define switch_to(prev,next,last) \
+ asm volatile(SAVE_CONTEXT \
+ "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
+Index: 10.3-2007-11-26/include/asm-x86_64/thread_info.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/asm-x86_64/thread_info.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/asm-x86_64/thread_info.h 2007-10-22 13:53:08.000000000 +0200
+@@ -147,7 +147,11 @@ static inline struct thread_info *stack_
+ #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
+
+ /* flags to check in __switch_to() */
++#ifndef CONFIG_XEN
+ #define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP)
++#else
++#define _TIF_WORK_CTXSW _TIF_DEBUG
++#endif
+
+ #define PREEMPT_ACTIVE 0x10000000
+
+Index: 10.3-2007-11-26/include/linux/skbuff.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/linux/skbuff.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/linux/skbuff.h 2007-10-22 13:53:08.000000000 +0200
+@@ -1729,5 +1729,11 @@ static inline void skb_forward_csum(stru
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+
++#ifdef CONFIG_XEN
++int skb_checksum_setup(struct sk_buff *skb);
++#else
++static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
++#endif
++
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_SKBUFF_H */
+Index: 10.3-2007-11-26/include/xen/evtchn.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/xen/evtchn.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/xen/evtchn.h 2007-10-22 13:53:08.000000000 +0200
+@@ -54,34 +54,34 @@
+ */
+ int bind_caller_port_to_irqhandler(
+ unsigned int caller_port,
+- irqreturn_t (*handler)(int, void *, struct pt_regs *),
++ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id);
+ int bind_listening_port_to_irqhandler(
+ unsigned int remote_domain,
+- irqreturn_t (*handler)(int, void *, struct pt_regs *),
++ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id);
+ int bind_interdomain_evtchn_to_irqhandler(
+ unsigned int remote_domain,
+ unsigned int remote_port,
+- irqreturn_t (*handler)(int, void *, struct pt_regs *),
++ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id);
+ int bind_virq_to_irqhandler(
+ unsigned int virq,
+ unsigned int cpu,
+- irqreturn_t (*handler)(int, void *, struct pt_regs *),
++ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id);
+ int bind_ipi_to_irqhandler(
+ unsigned int ipi,
+ unsigned int cpu,
+- irqreturn_t (*handler)(int, void *, struct pt_regs *),
++ irq_handler_t handler,
+ unsigned long irqflags,
+ const char *devname,
+ void *dev_id);
+Index: 10.3-2007-11-26/include/xen/xencons.h
+===================================================================
+--- 10.3-2007-11-26.orig/include/xen/xencons.h 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/include/xen/xencons.h 2007-10-22 13:53:08.000000000 +0200
+@@ -8,7 +8,7 @@ void xencons_force_flush(void);
+ void xencons_resume(void);
+
+ /* Interrupt work hooks. Receive data, or kick data out. */
+-void xencons_rx(char *buf, unsigned len, struct pt_regs *regs);
++void xencons_rx(char *buf, unsigned len);
+ void xencons_tx(void);
+
+ int xencons_ring_init(void);
+Index: 10.3-2007-11-26/net/core/dev.c
+===================================================================
+--- 10.3-2007-11-26.orig/net/core/dev.c 2007-12-06 17:27:35.000000000 +0100
++++ 10.3-2007-11-26/net/core/dev.c 2007-10-22 13:53:08.000000000 +0200
+@@ -1487,15 +1487,13 @@ inline int skb_checksum_setup(struct sk_
+ }
+ if ((skb->h.raw + skb->csum + 2) > skb->tail)
+ goto out;
+- skb->ip_summed = CHECKSUM_HW;
++ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->proto_csum_blank = 0;
+ }
+ return 0;
+ out:
+ return -EPROTO;
+ }
+-#else
+-inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
+ #endif
+
+
+@@ -1928,7 +1926,7 @@ int netif_receive_skb(struct sk_buff *sk
+ case CHECKSUM_UNNECESSARY:
+ skb->proto_data_valid = 1;
+ break;
+- case CHECKSUM_HW:
++ case CHECKSUM_PARTIAL:
+ /* XXX Implement me. */
+ default:
+ skb->proto_data_valid = 0;