diff options
Diffstat (limited to 'trunk/2.6.22/20044_xen3-patch-2.6.19.patch1')
-rw-r--r-- | trunk/2.6.22/20044_xen3-patch-2.6.19.patch1 | 12637 |
1 files changed, 12637 insertions, 0 deletions
diff --git a/trunk/2.6.22/20044_xen3-patch-2.6.19.patch1 b/trunk/2.6.22/20044_xen3-patch-2.6.19.patch1 new file mode 100644 index 0000000..908b07d --- /dev/null +++ b/trunk/2.6.22/20044_xen3-patch-2.6.19.patch1 @@ -0,0 +1,12637 @@ +From: www.kernel.org +Subject: Linux 2.6.19 +Patch-mainline: 2.6.19 + +Automatically created from "patches.kernel.org/patch-2.6.19" by xen-port-patches.py + +Acked-by: jbeulich@novell.com + +Index: 10.3-2007-11-26/arch/i386/Kconfig +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/Kconfig 2007-09-03 09:52:56.000000000 +0200 ++++ 10.3-2007-11-26/arch/i386/Kconfig 2007-10-22 13:53:08.000000000 +0200 +@@ -222,7 +222,7 @@ endchoice + config PARAVIRT + bool "Paravirtualization support (EXPERIMENTAL)" + depends on EXPERIMENTAL +- depends on !(X86_VISWS || X86_VOYAGER) ++ depends on !(X86_VISWS || X86_VOYAGER || X86_XEN) + help + Paravirtualization is a way of running multiple instances of + Linux on the same machine, under a hypervisor. This option +Index: 10.3-2007-11-26/arch/i386/kernel/acpi/Makefile +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/acpi/Makefile 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/acpi/Makefile 2007-10-22 13:53:08.000000000 +0200 +@@ -7,5 +7,7 @@ endif + + ifdef CONFIG_XEN + include $(srctree)/scripts/Makefile.xen ++n-obj-xen := cstate.o ++obj-y := $(call filterxen, $(obj-y), $(n-obj-xen)) + obj-y := $(call cherrypickxen, $(obj-y), $(src)) + endif +Index: 10.3-2007-11-26/arch/i386/kernel/acpi/boot-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/acpi/boot-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/acpi/boot-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -26,9 +26,12 @@ + #include <linux/init.h> + #include <linux/acpi.h> + #include <linux/efi.h> ++#include <linux/cpumask.h> + #include <linux/module.h> + #include <linux/dmi.h> + #include <linux/irq.h> ++#include <linux/bootmem.h> ++#include <linux/ioport.h> + + #include <asm/pgtable.h> + #include <asm/io_apic.h> +@@ -36,11 +39,17 @@ + #include <asm/io.h> + #include <asm/mpspec.h> + +-#ifdef CONFIG_X86_64 ++static int __initdata acpi_force = 0; ++ ++#ifdef CONFIG_ACPI ++int acpi_disabled = 0; ++#else ++int acpi_disabled = 1; ++#endif ++EXPORT_SYMBOL(acpi_disabled); + +-extern void __init clustered_apic_check(void); ++#ifdef CONFIG_X86_64 + +-extern int gsi_irq_sharing(int gsi); + #include <asm/proto.h> + + static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } +@@ -53,8 +62,6 @@ static inline int acpi_madt_oem_check(ch + #include <mach_mpparse.h> + #endif /* CONFIG_X86_LOCAL_APIC */ + +-static inline int gsi_irq_sharing(int gsi) { return gsi; } +- + #endif /* X86 */ + + #define BAD_MADT_ENTRY(entry, end) ( \ +@@ -63,7 +70,7 @@ static inline int gsi_irq_sharing(int gs + + #define PREFIX "ACPI: " + +-int acpi_noirq __initdata; /* skip ACPI IRQ initialization */ ++int acpi_noirq; /* skip ACPI IRQ initialization */ + int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ + int acpi_ht __initdata = 1; /* enable HT */ + +@@ -75,6 +82,7 @@ EXPORT_SYMBOL(acpi_strict); + acpi_interrupt_flags acpi_sci_flags __initdata; + int acpi_sci_override_gsi __initdata; + int acpi_skip_timer_override __initdata; ++int acpi_use_timer_override __initdata; + + #ifdef CONFIG_X86_LOCAL_APIC + static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; +@@ -327,7 +335,7 @@ acpi_parse_ioapic(acpi_table_entry_heade + /* + * Parse Interrupt Source Override for the ACPI SCI + */ +-static void acpi_sci_ioapic_setup(u32 gsi, u16 polarity, u16 trigger) ++static void acpi_sci_ioapic_setup(u32 bus_irq, u32 gsi, u16 polarity, u16 trigger) + { + if (trigger == 0) /* compatible SCI trigger is level */ + trigger = 3; +@@ -347,13 +355,13 @@ static void acpi_sci_ioapic_setup(u32 gs + * If GSI is < 16, this will update its flags, + * else it will create a new mp_irqs[] entry. + */ +- mp_override_legacy_irq(gsi, polarity, trigger, gsi); ++ mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + + /* + * stash over-ride to indicate we've been here + * and for later update of acpi_fadt + */ +- acpi_sci_override_gsi = gsi; ++ acpi_sci_override_gsi = bus_irq; + return; + } + +@@ -371,7 +379,7 @@ acpi_parse_int_src_ovr(acpi_table_entry_ + acpi_table_print_madt_entry(header); + + if (intsrc->bus_irq == acpi_fadt.sci_int) { +- acpi_sci_ioapic_setup(intsrc->global_irq, ++ acpi_sci_ioapic_setup(intsrc->bus_irq, intsrc->global_irq, + intsrc->flags.polarity, + intsrc->flags.trigger); + return 0; +@@ -461,12 +469,7 @@ void __init acpi_pic_sci_set_trigger(uns + + int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) + { +-#ifdef CONFIG_X86_IO_APIC +- if (use_pci_vector() && !platform_legacy_irq(gsi)) +- *irq = IO_APIC_VECTOR(gsi); +- else +-#endif +- *irq = gsi_irq_sharing(gsi); ++ *irq = gsi; + return 0; + } + +@@ -508,16 +511,76 @@ EXPORT_SYMBOL(acpi_register_gsi); + #ifdef CONFIG_ACPI_HOTPLUG_CPU + int acpi_map_lsapic(acpi_handle handle, int *pcpu) + { +- /* TBD */ +- return -EINVAL; ++ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; ++ union acpi_object *obj; ++ struct acpi_table_lapic *lapic; ++ cpumask_t tmp_map, new_map; ++ u8 physid; ++ int cpu; ++ ++ if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) ++ return -EINVAL; ++ ++ if (!buffer.length || !buffer.pointer) ++ return -EINVAL; ++ ++ obj = buffer.pointer; ++ if (obj->type != ACPI_TYPE_BUFFER || ++ obj->buffer.length < sizeof(*lapic)) { ++ kfree(buffer.pointer); ++ return -EINVAL; ++ } ++ ++ lapic = (struct acpi_table_lapic *)obj->buffer.pointer; ++ ++ if ((lapic->header.type != ACPI_MADT_LAPIC) || ++ (!lapic->flags.enabled)) { ++ kfree(buffer.pointer); ++ return -EINVAL; ++ } ++ ++ physid = lapic->id; ++ ++ kfree(buffer.pointer); ++ buffer.length = ACPI_ALLOCATE_BUFFER; ++ buffer.pointer = NULL; ++ ++ tmp_map = cpu_present_map; ++ mp_register_lapic(physid, lapic->flags.enabled); ++ ++ /* ++ * If mp_register_lapic successfully generates a new logical cpu ++ * number, then the following will get us exactly what was mapped ++ */ ++ cpus_andnot(new_map, cpu_present_map, tmp_map); ++ if (cpus_empty(new_map)) { ++ printk ("Unable to map lapic to logical cpu number\n"); ++ return -EINVAL; ++ } ++ ++ cpu = first_cpu(new_map); ++ ++ *pcpu = cpu; ++ return 0; + } + + EXPORT_SYMBOL(acpi_map_lsapic); + + int acpi_unmap_lsapic(int cpu) + { +- /* TBD */ +- return -EINVAL; ++ int i; ++ ++ for_each_possible_cpu(i) { ++ if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) { ++ x86_acpiid_to_apicid[i] = -1; ++ break; ++ } ++ } ++ x86_cpu_to_apicid[cpu] = -1; ++ cpu_clear(cpu, cpu_present_map); ++ num_processors--; ++ ++ return (0); + } + + EXPORT_SYMBOL(acpi_unmap_lsapic); +@@ -582,6 +645,8 @@ static int __init acpi_parse_sbf(unsigne + static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) + { + struct acpi_table_hpet *hpet_tbl; ++ struct resource *hpet_res; ++ resource_size_t res_start; + + if (!phys || !size) + return -EINVAL; +@@ -597,12 +662,26 @@ static int __init acpi_parse_hpet(unsign + "memory.\n"); + return -1; + } ++ ++#define HPET_RESOURCE_NAME_SIZE 9 ++ hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); ++ if (hpet_res) { ++ memset(hpet_res, 0, sizeof(*hpet_res)); ++ hpet_res->name = (void *)&hpet_res[1]; ++ hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; ++ snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, ++ "HPET %u", hpet_tbl->number); ++ hpet_res->end = (1 * 1024) - 1; ++ } ++ + #ifdef CONFIG_X86_64 + vxtime.hpet_address = hpet_tbl->addr.addrl | + ((long)hpet_tbl->addr.addrh << 32); + + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", + hpet_tbl->id, vxtime.hpet_address); ++ ++ res_start = vxtime.hpet_address; + #else /* X86 */ + { + extern unsigned long hpet_address; +@@ -610,9 +689,17 @@ static int __init acpi_parse_hpet(unsign + hpet_address = hpet_tbl->addr.addrl; + printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", + hpet_tbl->id, hpet_address); ++ ++ res_start = hpet_address; + } + #endif /* X86 */ + ++ if (hpet_res) { ++ hpet_res->start = res_start; ++ hpet_res->end += res_start; ++ insert_resource(&iomem_resource, hpet_res); ++ } ++ + return 0; + } + #else +@@ -796,7 +883,7 @@ static int __init acpi_parse_madt_ioapic + * pretend we got one so we can set the SCI flags. + */ + if (!acpi_sci_override_gsi) +- acpi_sci_ioapic_setup(acpi_fadt.sci_int, 0, 0); ++ acpi_sci_ioapic_setup(acpi_fadt.sci_int, acpi_fadt.sci_int, 0, 0); + + /* Fill in identity legacy mapings where no override */ + mp_config_acpi_legacy_irqs(); +@@ -863,8 +950,6 @@ static void __init acpi_process_madt(voi + return; + } + +-extern int acpi_force; +- + #ifdef __i386__ + + static int __init disable_acpi_irq(struct dmi_system_id *d) +@@ -1166,3 +1251,82 @@ int __init acpi_boot_init(void) + + return 0; + } ++ ++static int __init parse_acpi(char *arg) ++{ ++ if (!arg) ++ return -EINVAL; ++ ++ /* "acpi=off" disables both ACPI table parsing and interpreter */ ++ if (strcmp(arg, "off") == 0) { ++ disable_acpi(); ++ } ++ /* acpi=force to over-ride black-list */ ++ else if (strcmp(arg, "force") == 0) { ++ acpi_force = 1; ++ acpi_ht = 1; ++ acpi_disabled = 0; ++ } ++ /* acpi=strict disables out-of-spec workarounds */ ++ else if (strcmp(arg, "strict") == 0) { ++ acpi_strict = 1; ++ } ++ /* Limit ACPI just to boot-time to enable HT */ ++ else if (strcmp(arg, "ht") == 0) { ++ if (!acpi_force) ++ disable_acpi(); ++ acpi_ht = 1; ++ } ++ /* "acpi=noirq" disables ACPI interrupt routing */ ++ else if (strcmp(arg, "noirq") == 0) { ++ acpi_noirq_set(); ++ } else { ++ /* Core will printk when we return error. */ ++ return -EINVAL; ++ } ++ return 0; ++} ++early_param("acpi", parse_acpi); ++ ++/* FIXME: Using pci= for an ACPI parameter is a travesty. */ ++static int __init parse_pci(char *arg) ++{ ++ if (arg && strcmp(arg, "noacpi") == 0) ++ acpi_disable_pci(); ++ return 0; ++} ++early_param("pci", parse_pci); ++ ++#ifdef CONFIG_X86_IO_APIC ++static int __init parse_acpi_skip_timer_override(char *arg) ++{ ++ acpi_skip_timer_override = 1; ++ return 0; ++} ++early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override); ++ ++static int __init parse_acpi_use_timer_override(char *arg) ++{ ++ acpi_use_timer_override = 1; ++ return 0; ++} ++early_param("acpi_use_timer_override", parse_acpi_use_timer_override); ++#endif /* CONFIG_X86_IO_APIC */ ++ ++static int __init setup_acpi_sci(char *s) ++{ ++ if (!s) ++ return -EINVAL; ++ if (!strcmp(s, "edge")) ++ acpi_sci_flags.trigger = 1; ++ else if (!strcmp(s, "level")) ++ acpi_sci_flags.trigger = 3; ++ else if (!strcmp(s, "high")) ++ acpi_sci_flags.polarity = 1; ++ else if (!strcmp(s, "low")) ++ acpi_sci_flags.polarity = 3; ++ else ++ return -EINVAL; ++ return 0; ++} ++early_param("acpi_sci", setup_acpi_sci); +Index: 10.3-2007-11-26/arch/i386/kernel/apic-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/apic-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/apic-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -54,7 +54,6 @@ static cpumask_t timer_bcast_ipi; + /* + * Knob to control our willingness to enable the local APIC. + */ +-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ + + /* + * Debug level +@@ -102,7 +101,7 @@ int get_physical_broadcast(void) + + #ifndef CONFIG_XEN + #ifndef CONFIG_SMP +-static void up_apic_timer_interrupt_call(struct pt_regs *regs) ++static void up_apic_timer_interrupt_call(void) + { + int cpu = smp_processor_id(); + +@@ -111,11 +110,11 @@ static void up_apic_timer_interrupt_call + */ + per_cpu(irq_stat, cpu).apic_timer_irqs++; + +- smp_local_timer_interrupt(regs); ++ smp_local_timer_interrupt(); + } + #endif + +-void smp_send_timer_broadcast_ipi(struct pt_regs *regs) ++void smp_send_timer_broadcast_ipi(void) + { + cpumask_t mask; + +@@ -128,7 +127,7 @@ void smp_send_timer_broadcast_ipi(struct + * We can directly call the apic timer interrupt handler + * in UP case. Minus all irq related functions + */ +- up_apic_timer_interrupt_call(regs); ++ up_apic_timer_interrupt_call(); + #endif + } + } +Index: 10.3-2007-11-26/arch/i386/kernel/cpu/common-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/cpu/common-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/cpu/common-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -43,7 +43,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM + + extern int disable_pse; + +-static void default_init(struct cpuinfo_x86 * c) ++static void __cpuinit default_init(struct cpuinfo_x86 * c) + { + /* Not much we can do here... */ + /* Check if at least it has cpuid */ +@@ -56,7 +56,7 @@ static void default_init(struct cpuinfo_ + } + } + +-static struct cpu_dev default_cpu = { ++static struct cpu_dev __cpuinitdata default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", + }; +@@ -191,7 +191,16 @@ static void __cpuinit get_cpu_vendor(str + + static int __init x86_fxsr_setup(char * s) + { ++ /* Tell all the other CPU's to not use it... */ + disable_x86_fxsr = 1; ++ ++ /* ++ * ... and clear the bits early in the boot_cpu_data ++ * so that the bootup process doesn't try to do this ++ * either. ++ */ ++ clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability); ++ clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability); + return 1; + } + __setup("nofxsr", x86_fxsr_setup); +@@ -272,7 +281,7 @@ static void __init early_cpu_detect(void + } + } + +-void __cpuinit generic_identify(struct cpuinfo_x86 * c) ++static void __cpuinit generic_identify(struct cpuinfo_x86 * c) + { + u32 tfms, xlvl; + int ebx; +@@ -698,8 +707,7 @@ old_gdt: + */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; +- if (current->mm) +- BUG(); ++ BUG_ON(current->mm); + enter_lazy_tlb(&init_mm, current); + + load_esp0(t, thread); +@@ -712,7 +720,7 @@ old_gdt: + #endif + + /* Clear %fs and %gs. */ +- asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); ++ asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); + + /* Clear all 6 debug registers: */ + set_debugreg(0, 0); +Index: 10.3-2007-11-26/arch/i386/kernel/crash.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/crash.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/crash.c 2007-10-22 13:53:08.000000000 +0200 +@@ -135,6 +135,8 @@ void machine_crash_shutdown(struct pt_re + #if defined(CONFIG_X86_IO_APIC) + disable_IO_APIC(); + #endif +-#endif /* CONFIG_XEN */ + crash_save_cpu(regs, safe_smp_processor_id()); ++#else ++ crash_save_cpu(regs, smp_processor_id()); ++#endif /* CONFIG_XEN */ + } +Index: 10.3-2007-11-26/arch/i386/kernel/entry-xen.S +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/entry-xen.S 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/entry-xen.S 2007-10-22 13:53:08.000000000 +0200 +@@ -80,8 +80,12 @@ VM_MASK = 0x00020000 + NMI_MASK = 0x80000000 + + #ifndef CONFIG_XEN +-#define DISABLE_INTERRUPTS cli +-#define ENABLE_INTERRUPTS sti ++/* These are replaces for paravirtualization */ ++#define DISABLE_INTERRUPTS cli ++#define ENABLE_INTERRUPTS sti ++#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit ++#define INTERRUPT_RETURN iret ++#define GET_CR0_INTO_EAX movl %cr0, %eax + #else + /* Offsets into shared_info_t. */ + #define evtchn_upcall_pending /* 0 */ +@@ -99,15 +103,29 @@ NMI_MASK = 0x80000000 + + #define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi) + #define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi) ++#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) + #define DISABLE_INTERRUPTS GET_VCPU_INFO ; \ + __DISABLE_INTERRUPTS + #define ENABLE_INTERRUPTS GET_VCPU_INFO ; \ + __ENABLE_INTERRUPTS +-#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi) ++#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \ ++sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \ ++ __TEST_PENDING ; \ ++ jnz 14f # process more events if necessary... ; \ ++ movl ESI(%esp), %esi ; \ ++ sysexit ; \ ++14: __DISABLE_INTERRUPTS ; \ ++ TRACE_IRQS_OFF ; \ ++sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \ ++ push %esp ; \ ++ call evtchn_do_upcall ; \ ++ add $4,%esp ; \ ++ jmp ret_from_intr ++#define INTERRUPT_RETURN iret + #endif + + #ifdef CONFIG_PREEMPT +-#define preempt_stop cli; TRACE_IRQS_OFF ++#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF + #else + #define preempt_stop + #define resume_kernel restore_nocheck +@@ -206,18 +224,21 @@ NMI_MASK = 0x80000000 + + #define RING0_INT_FRAME \ + CFI_STARTPROC simple;\ ++ CFI_SIGNAL_FRAME;\ + CFI_DEF_CFA esp, 3*4;\ + /*CFI_OFFSET cs, -2*4;*/\ + CFI_OFFSET eip, -3*4 + + #define RING0_EC_FRAME \ + CFI_STARTPROC simple;\ ++ CFI_SIGNAL_FRAME;\ + CFI_DEF_CFA esp, 4*4;\ + /*CFI_OFFSET cs, -2*4;*/\ + CFI_OFFSET eip, -3*4 + + #define RING0_PTREGS_FRAME \ + CFI_STARTPROC simple;\ ++ CFI_SIGNAL_FRAME;\ + CFI_DEF_CFA esp, OLDESP-EBX;\ + /*CFI_OFFSET cs, CS-OLDESP;*/\ + CFI_OFFSET eip, EIP-OLDESP;\ +@@ -263,8 +284,9 @@ ret_from_intr: + check_userspace: + movl EFLAGS(%esp), %eax # mix EFLAGS and CS + movb CS(%esp), %al +- testl $(VM_MASK | 2), %eax +- jz resume_kernel ++ andl $(VM_MASK | SEGMENT_RPL_MASK), %eax ++ cmpl $USER_RPL, %eax ++ jb resume_kernel # not returning to v8086 or userspace + ENTRY(resume_userspace) + DISABLE_INTERRUPTS # make sure we don't miss an interrupt + # setting need_resched or sigpending +@@ -277,7 +299,7 @@ ENTRY(resume_userspace) + + #ifdef CONFIG_PREEMPT + ENTRY(resume_kernel) +- cli ++ DISABLE_INTERRUPTS + cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? + jnz restore_nocheck + need_resched: +@@ -297,6 +319,7 @@ need_resched: + # sysenter call handler stub + ENTRY(sysenter_entry) + CFI_STARTPROC simple ++ CFI_SIGNAL_FRAME + CFI_DEF_CFA esp, 0 + CFI_REGISTER esp, ebp + movl SYSENTER_stack_esp0(%esp),%esp +@@ -305,7 +328,7 @@ sysenter_past_esp: + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ +- sti ++ ENABLE_INTERRUPTS + pushl $(__USER_DS) + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ss, 0*/ +@@ -359,26 +382,8 @@ sysenter_past_esp: + movl EIP(%esp), %edx + movl OLDESP(%esp), %ecx + xorl %ebp,%ebp +-#ifdef CONFIG_XEN + TRACE_IRQS_ON +- __ENABLE_INTERRUPTS +-sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ +- __TEST_PENDING +- jnz 14f # process more events if necessary... +- movl ESI(%esp), %esi +- sysexit +-14: __DISABLE_INTERRUPTS +- TRACE_IRQS_OFF +-sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ +- push %esp +- call evtchn_do_upcall +- add $4,%esp +- jmp ret_from_intr +-#else +- TRACE_IRQS_ON +- sti +- sysexit +-#endif /* !CONFIG_XEN */ ++ ENABLE_INTERRUPTS_SYSEXIT + CFI_ENDPROC + + +@@ -419,8 +424,8 @@ restore_all: + # See comments in process.c:copy_thread() for details. + movb OLDSS(%esp), %ah + movb CS(%esp), %al +- andl $(VM_MASK | (4 << 8) | 3), %eax +- cmpl $((4 << 8) | 3), %eax ++ andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax ++ cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax + CFI_REMEMBER_STATE + je ldt_ss # returning to user-space with LDT SS + restore_nocheck: +@@ -442,12 +447,11 @@ restore_nocheck_notrace: + RESTORE_REGS + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 +-1: iret ++1: INTERRUPT_RETURN + .section .fixup,"ax" + iret_exc: + #ifndef CONFIG_XEN +- TRACE_IRQS_ON +- sti ++ ENABLE_INTERRUPTS + #endif + pushl $0 # no error code + pushl $do_iret_error +@@ -473,7 +477,7 @@ ldt_ss: + * dosemu and wine happy. */ + subl $8, %esp # reserve space for switch16 pointer + CFI_ADJUST_CFA_OFFSET 8 +- cli ++ DISABLE_INTERRUPTS + TRACE_IRQS_OFF + movl %esp, %eax + /* Set up the 16bit stack frame with switch32 pointer on top, +@@ -483,7 +487,7 @@ ldt_ss: + TRACE_IRQS_IRET + RESTORE_REGS + lss 20+4(%esp), %esp # switch to 16bit stack +-1: iret ++1: INTERRUPT_RETURN + .section __ex_table,"a" + .align 4 + .long 1b,iret_exc +@@ -499,7 +503,7 @@ scrit: /**** START OF CRITICAL REGION ** + RESTORE_REGS + addl $4, %esp + CFI_ADJUST_CFA_OFFSET -4 +-1: iret ++1: INTERRUPT_RETURN + .section __ex_table,"a" + .align 4 + .long 1b,iret_exc +@@ -688,11 +692,9 @@ ENTRY(name) \ + #define UNWIND_ESPFIX_STACK + #endif + +-ENTRY(divide_error) +- RING0_INT_FRAME +- pushl $0 # no error code +- CFI_ADJUST_CFA_OFFSET 4 +- pushl $do_divide_error ++KPROBE_ENTRY(page_fault) ++ RING0_EC_FRAME ++ pushl $do_page_fault + CFI_ADJUST_CFA_OFFSET 4 + ALIGN + error_code: +@@ -742,6 +744,7 @@ error_code: + call *%edi + jmp ret_from_exception + CFI_ENDPROC ++KPROBE_END(page_fault) + + #ifdef CONFIG_XEN + # A note on the "critical region" in our callback handler. +@@ -901,7 +904,7 @@ ENTRY(device_not_available) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + #ifndef CONFIG_XEN +- movl %cr0, %eax ++ GET_CR0_INTO_EAX + testl $0x4, %eax # EM (math emulation bit) + je device_available_emulate + pushl $0 # temporary storage for ORIG_EIP +@@ -936,9 +939,15 @@ device_available_emulate: + jne ok; \ + label: \ + movl SYSENTER_stack_esp0+offset(%esp),%esp; \ ++ CFI_DEF_CFA esp, 0; \ ++ CFI_UNDEFINED eip; \ + pushfl; \ ++ CFI_ADJUST_CFA_OFFSET 4; \ + pushl $__KERNEL_CS; \ +- pushl $sysenter_past_esp ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ pushl $sysenter_past_esp; \ ++ CFI_ADJUST_CFA_OFFSET 4; \ ++ CFI_REL_OFFSET eip, 0 + #endif /* CONFIG_XEN */ + + KPROBE_ENTRY(debug) +@@ -957,7 +966,8 @@ debug_stack_correct: + call do_debug + jmp ret_from_exception + CFI_ENDPROC +- .previous .text ++KPROBE_END(debug) ++ + #ifndef CONFIG_XEN + /* + * NMI is doubly nasty. It can happen _while_ we're handling +@@ -967,7 +977,7 @@ debug_stack_correct: + * check whether we got an NMI on the debug path where the debug + * fault happened on the sysenter path. + */ +-ENTRY(nmi) ++KPROBE_ENTRY(nmi) + RING0_INT_FRAME + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 +@@ -992,6 +1002,7 @@ ENTRY(nmi) + cmpl $sysenter_entry,12(%esp) + je nmi_debug_stack_check + nmi_stack_correct: ++ /* We have a RING0_INT_FRAME here */ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL +@@ -1002,9 +1013,12 @@ nmi_stack_correct: + CFI_ENDPROC + + nmi_stack_fixup: ++ RING0_INT_FRAME + FIX_STACK(12,nmi_stack_correct, 1) + jmp nmi_stack_correct ++ + nmi_debug_stack_check: ++ /* We have a RING0_INT_FRAME here */ + cmpw $__KERNEL_CS,16(%esp) + jne nmi_stack_correct + cmpl $debug,(%esp) +@@ -1015,8 +1029,10 @@ nmi_debug_stack_check: + jmp nmi_stack_correct + + nmi_16bit_stack: +- RING0_INT_FRAME +- /* create the pointer to lss back */ ++ /* We have a RING0_INT_FRAME here. ++ * ++ * create the pointer to lss back ++ */ + pushl %ss + CFI_ADJUST_CFA_OFFSET 4 + pushl %esp +@@ -1037,14 +1053,14 @@ nmi_16bit_stack: + call do_nmi + RESTORE_REGS + lss 12+4(%esp), %esp # back to 16bit stack +-1: iret ++1: INTERRUPT_RETURN + CFI_ENDPROC + .section __ex_table,"a" + .align 4 + .long 1b,iret_exc + .previous + #else +-ENTRY(nmi) ++KPROBE_ENTRY(nmi) + RING0_INT_FRAME + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 +@@ -1056,6 +1072,7 @@ ENTRY(nmi) + jmp restore_all + CFI_ENDPROC + #endif ++KPROBE_END(nmi) + + KPROBE_ENTRY(int3) + RING0_INT_FRAME +@@ -1067,7 +1084,7 @@ KPROBE_ENTRY(int3) + call do_int3 + jmp ret_from_exception + CFI_ENDPROC +- .previous .text ++KPROBE_END(int3) + + ENTRY(overflow) + RING0_INT_FRAME +@@ -1132,7 +1149,7 @@ KPROBE_ENTRY(general_protection) + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +- .previous .text ++KPROBE_END(general_protection) + + ENTRY(alignment_check) + RING0_EC_FRAME +@@ -1141,13 +1158,14 @@ ENTRY(alignment_check) + jmp error_code + CFI_ENDPROC + +-KPROBE_ENTRY(page_fault) +- RING0_EC_FRAME +- pushl $do_page_fault ++ENTRY(divide_error) ++ RING0_INT_FRAME ++ pushl $0 # no error code ++ CFI_ADJUST_CFA_OFFSET 4 ++ pushl $do_divide_error + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +- .previous .text + + #ifdef CONFIG_X86_MCE + ENTRY(machine_check) +@@ -1209,6 +1227,19 @@ ENTRY(fixup_4gb_segment) + jmp error_code + CFI_ENDPROC + ++ENTRY(kernel_thread_helper) ++ pushl $0 # fake return address for unwinder ++ CFI_STARTPROC ++ movl %edx,%eax ++ push %edx ++ CFI_ADJUST_CFA_OFFSET 4 ++ call *%ebx ++ push %eax ++ CFI_ADJUST_CFA_OFFSET 4 ++ call do_exit ++ CFI_ENDPROC ++ENDPROC(kernel_thread_helper) ++ + .section .rodata,"a" + .align 4 + #include "syscall_table.S" +Index: 10.3-2007-11-26/arch/i386/kernel/head-xen.S +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/head-xen.S 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/head-xen.S 2007-10-22 13:53:08.000000000 +0200 +@@ -62,7 +62,7 @@ ENTRY(startup_32) + movl %eax,%gs + cld # gcc2 wants the direction flag cleared at all times + +- pushl %eax # fake return address ++ pushl $0 # fake return address for unwinder + jmp start_kernel + + #define HYPERCALL_PAGE_OFFSET 0x1000 +Index: 10.3-2007-11-26/arch/i386/kernel/io_apic-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/io_apic-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/io_apic-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -31,6 +31,9 @@ + #include <linux/acpi.h> + #include <linux/module.h> + #include <linux/sysdev.h> ++#include <linux/pci.h> ++#include <linux/msi.h> ++#include <linux/htirq.h> + + #include <asm/io.h> + #include <asm/smp.h> +@@ -38,13 +41,15 @@ + #include <asm/timer.h> + #include <asm/i8259.h> + #include <asm/nmi.h> ++#include <asm/msidef.h> ++#include <asm/hypertransport.h> + + #include <mach_apic.h> ++#include <mach_apicdef.h> + + #include "io_ports.h" + + #ifdef CONFIG_XEN +- + #include <xen/interface/xen.h> + #include <xen/interface/physdev.h> + +@@ -55,32 +60,7 @@ + + unsigned long io_apic_irqs; + +-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg) +-{ +- struct physdev_apic apic_op; +- int ret; +- +- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; +- apic_op.reg = reg; +- ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); +- if (ret) +- return ret; +- return apic_op.value; +-} +- +-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +-{ +- struct physdev_apic apic_op; +- +- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; +- apic_op.reg = reg; +- apic_op.value = value; +- HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op); +-} +- +-#define io_apic_read(a,r) xen_io_apic_read(a,r) +-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v) +- ++#define clear_IO_APIC() ((void)0) + #endif /* CONFIG_XEN */ + + int (*ioapic_renumber_irq)(int ioapic, int irq); +@@ -105,7 +85,7 @@ int sis_apic_bug = -1; + */ + int nr_ioapic_registers[MAX_IO_APICS]; + +-int disable_timer_pin_1 __initdata; ++static int disable_timer_pin_1 __initdata; + + /* + * Rough estimation of how many shared IRQs there are, can +@@ -125,12 +105,122 @@ static struct irq_pin_list { + int apic, pin, next; + } irq_2_pin[PIN_MAP_SIZE]; + +-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; +-#ifdef CONFIG_PCI_MSI +-#define vector_to_irq(vector) \ +- (platform_legacy_irq(vector) ? vector : vector_irq[vector]) ++#ifndef CONFIG_XEN ++struct io_apic { ++ unsigned int index; ++ unsigned int unused[3]; ++ unsigned int data; ++}; ++ ++static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) ++{ ++ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) ++ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); ++} ++#endif ++ ++static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) ++{ ++#ifndef CONFIG_XEN ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ writel(reg, &io_apic->index); ++ return readl(&io_apic->data); + #else +-#define vector_to_irq(vector) (vector) ++ struct physdev_apic apic_op; ++ int ret; ++ ++ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; ++ apic_op.reg = reg; ++ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); ++ if (ret) ++ return ret; ++ return apic_op.value; ++#endif ++} ++ ++static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) ++{ ++#ifndef CONFIG_XEN ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ writel(reg, &io_apic->index); ++ writel(value, &io_apic->data); ++#else ++ struct physdev_apic apic_op; ++ ++ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; ++ apic_op.reg = reg; ++ apic_op.value = value; ++ HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op); ++#endif ++} ++ ++#ifndef CONFIG_XEN ++/* ++ * Re-write a value: to be used for read-modify-write ++ * cycles where the read already set up the index register. ++ * ++ * Older SiS APIC requires we rewrite the index register ++ */ ++static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) ++{ ++ volatile struct io_apic *io_apic = io_apic_base(apic); ++ if (sis_apic_bug) ++ writel(reg, &io_apic->index); ++ writel(value, &io_apic->data); ++} ++#else ++#define io_apic_modify io_apic_write ++#endif ++ ++union entry_union { ++ struct { u32 w1, w2; }; ++ struct IO_APIC_route_entry entry; ++}; ++ ++static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) ++{ ++ union entry_union eu; ++ unsigned long flags; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); ++ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ return eu.entry; ++} ++ ++/* ++ * When we write a new IO APIC routing entry, we need to write the high ++ * word first! If the mask bit in the low word is clear, we will enable ++ * the interrupt, and we need to make sure the entry is fully populated ++ * before that happens. ++ */ ++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++{ ++ unsigned long flags; ++ union entry_union eu; ++ eu.entry = e; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11 + 2*pin, eu.w2); ++ io_apic_write(apic, 0x10 + 2*pin, eu.w1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++#ifndef CONFIG_XEN ++/* ++ * When we mask an IO APIC routing entry, we need to write the low ++ * word first, in order to set the mask bit before we change the ++ * high bits! ++ */ ++static void ioapic_mask_entry(int apic, int pin) ++{ ++ unsigned long flags; ++ union entry_union eu = { .entry.mask = 1 }; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x10 + 2*pin, eu.w1); ++ io_apic_write(apic, 0x11 + 2*pin, eu.w2); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} + #endif + + /* +@@ -156,9 +246,7 @@ static void add_pin_to_irq(unsigned int + entry->pin = pin; + } + +-#ifdef CONFIG_XEN +-#define clear_IO_APIC() ((void)0) +-#else ++#ifndef CONFIG_XEN + /* + * Reroute an IRQ to a different pin. + */ +@@ -243,25 +331,16 @@ static void unmask_IO_APIC_irq (unsigned + static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) + { + struct IO_APIC_route_entry entry; +- unsigned long flags; + + /* Check delivery_mode to be sure we're not clearing an SMI pin */ +- spin_lock_irqsave(&ioapic_lock, flags); +- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); +- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ entry = ioapic_read_entry(apic, pin); + if (entry.delivery_mode == dest_SMI) + return; + + /* + * Disable it in the IO-APIC irq-routing table: + */ +- memset(&entry, 0, sizeof(entry)); +- entry.mask = 1; +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); +- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ ioapic_mask_entry(apic, pin); + } + + static void clear_IO_APIC (void) +@@ -301,7 +380,7 @@ static void set_ioapic_affinity_irq(unsi + break; + entry = irq_2_pin + entry->next; + } +- set_irq_info(irq, cpumask); ++ set_native_irq_info(irq, cpumask); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +@@ -1207,40 +1286,40 @@ static inline int IO_APIC_irq_trigger(in + /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ + u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */ + +-int assign_irq_vector(int irq) ++static int __assign_irq_vector(int irq) + { +- unsigned long flags; + int vector; + struct physdev_irq irq_op; + +- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); ++ BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); + +- spin_lock_irqsave(&vector_lock, flags); +- +- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { +- spin_unlock_irqrestore(&vector_lock, flags); +- return IO_APIC_VECTOR(irq); +- } ++ if (irq_vector[irq] > 0) ++ return irq_vector[irq]; + + irq_op.irq = irq; +- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { +- spin_unlock_irqrestore(&vector_lock, flags); ++ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) + return -ENOSPC; +- } + + vector = irq_op.vector; +- vector_irq[vector] = irq; +- if (irq != AUTO_ASSIGN) +- IO_APIC_VECTOR(irq) = vector; ++ irq_vector[irq] = vector; ++ ++ return vector; ++} ++ ++static int assign_irq_vector(int irq) ++{ ++ unsigned long flags; ++ int vector; + ++ spin_lock_irqsave(&vector_lock, flags); ++ vector = __assign_irq_vector(irq); + spin_unlock_irqrestore(&vector_lock, flags); + + return vector; + } + + #ifndef CONFIG_XEN +-static struct hw_interrupt_type ioapic_level_type; +-static struct hw_interrupt_type ioapic_edge_type; ++static struct irq_chip ioapic_chip; + + #define IOAPIC_AUTO -1 + #define IOAPIC_EDGE 0 +@@ -1248,16 +1327,16 @@ static struct hw_interrupt_type ioapic_e + + static void ioapic_register_intr(int irq, int vector, unsigned long trigger) + { +- unsigned idx; +- +- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; +- + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) +- irq_desc[idx].chip = &ioapic_level_type; +- else +- irq_desc[idx].chip = &ioapic_edge_type; +- set_intr_gate(vector, interrupt[idx]); ++ set_irq_chip_and_handler_name(irq, &ioapic_chip, ++ handle_fasteoi_irq, "fasteoi"); ++ else { ++ irq_desc[irq].status |= IRQ_DELAYED_DISABLE; ++ set_irq_chip_and_handler_name(irq, &ioapic_chip, ++ handle_edge_irq, "edge"); ++ } ++ set_intr_gate(vector, interrupt[irq]); + } + #else + #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) +@@ -1328,9 +1407,8 @@ static void __init setup_IO_APIC_irqs(vo + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } ++ ioapic_write_entry(apic, pin, entry); + spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); +- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + } +@@ -1347,7 +1425,6 @@ static void __init setup_IO_APIC_irqs(vo + static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) + { + struct IO_APIC_route_entry entry; +- unsigned long flags; + + memset(&entry,0,sizeof(entry)); + +@@ -1372,15 +1449,13 @@ static void __init setup_ExtINT_IRQ0_pin + * The timer IRQ doesn't have to know that behind the + * scene we have a 8259A-master in AEOI mode ... + */ +- irq_desc[0].chip = &ioapic_edge_type; ++ irq_desc[0].chip = &ioapic_chip; ++ set_irq_handler(0, handle_edge_irq); + + /* + * Add it to the IO-APIC irq-routing table: + */ +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); +- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ ioapic_write_entry(apic, pin, entry); + + enable_8259A_irq(0); + } +@@ -1490,10 +1565,7 @@ void __init print_IO_APIC(void) + for (i = 0; i <= reg_01.bits.entries; i++) { + struct IO_APIC_route_entry entry; + +- spin_lock_irqsave(&ioapic_lock, flags); +- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); +- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ entry = ioapic_read_entry(apic, i); + + printk(KERN_DEBUG " %02x %03X %02X ", + i, +@@ -1513,17 +1585,12 @@ void __init print_IO_APIC(void) + ); + } + } +- if (use_pci_vector()) +- printk(KERN_INFO "Using vector-based indexing\n"); + printk(KERN_DEBUG "IRQ to pin mappings:\n"); + for (i = 0; i < NR_IRQS; i++) { + struct irq_pin_list *entry = irq_2_pin + i; + if (entry->pin < 0) + continue; +- if (use_pci_vector() && !platform_legacy_irq(i)) +- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); +- else +- printk(KERN_DEBUG "IRQ%d ", i); ++ printk(KERN_DEBUG "IRQ%d ", i); + for (;;) { + printk("-> %d:%d", entry->apic, entry->pin); + if (!entry->next) +@@ -1716,10 +1783,7 @@ static void __init enable_IO_APIC(void) + /* See if any of the pins is in ExtINT mode */ + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; +- spin_lock_irqsave(&ioapic_lock, flags); +- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); +- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ entry = ioapic_read_entry(apic, pin); + + + /* If the interrupt line is enabled and in ExtInt mode +@@ -1777,7 +1841,6 @@ void disable_IO_APIC(void) + */ + if (ioapic_i8259.pin != -1) { + struct IO_APIC_route_entry entry; +- unsigned long flags; + + memset(&entry, 0, sizeof(entry)); + entry.mask = 0; /* Enabled */ +@@ -1794,12 +1857,7 @@ void disable_IO_APIC(void) + /* + * Add it to the IO-APIC irq-routing table: + */ +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, +- *(((int *)&entry)+1)); +- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, +- *(((int *)&entry)+0)); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); + } + disconnect_bsp_APIC(ioapic_i8259.pin != -1); + #endif +@@ -1966,6 +2024,8 @@ static int __init timer_irq_works(void) + */ + + /* ++ * Startup quirk: ++ * + * Starting up a edge-triggered IO-APIC interrupt is + * nasty - we need to make sure that we get the edge. + * If it is already asserted for some reason, we need +@@ -1973,8 +2033,10 @@ static int __init timer_irq_works(void) + * + * This is not complete - we should be able to fake + * an edge even if it isn't on the 8259A... ++ * ++ * (We do this for level-triggered IRQs too - it cannot hurt.) + */ +-static unsigned int startup_edge_ioapic_irq(unsigned int irq) ++static unsigned int startup_ioapic_irq(unsigned int irq) + { + int was_pending = 0; + unsigned long flags; +@@ -1991,47 +2053,18 @@ static unsigned int startup_edge_ioapic_ + return was_pending; + } + +-/* +- * Once we have recorded IRQ_PENDING already, we can mask the +- * interrupt for real. This prevents IRQ storms from unhandled +- * devices. +- */ +-static void ack_edge_ioapic_irq(unsigned int irq) +-{ +- move_irq(irq); +- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) +- == (IRQ_PENDING | IRQ_DISABLED)) +- mask_IO_APIC_irq(irq); +- ack_APIC_irq(); +-} +- +-/* +- * Level triggered interrupts can just be masked, +- * and shutting down and starting up the interrupt +- * is the same as enabling and disabling them -- except +- * with a startup need to return a "was pending" value. +- * +- * Level triggered interrupts are special because we +- * do not touch any IO-APIC register while handling +- * them. We ack the APIC in the end-IRQ handler, not +- * in the start-IRQ-handler. Protection against reentrance +- * from the same interrupt is still provided, both by the +- * generic IRQ layer and by the fact that an unacked local +- * APIC does not accept IRQs. +- */ +-static unsigned int startup_level_ioapic_irq (unsigned int irq) ++static void ack_ioapic_irq(unsigned int irq) + { +- unmask_IO_APIC_irq(irq); +- +- return 0; /* don't check for pending */ ++ move_native_irq(irq); ++ ack_APIC_irq(); + } + +-static void end_level_ioapic_irq (unsigned int irq) ++static void ack_ioapic_quirk_irq(unsigned int irq) + { + unsigned long v; + int i; + +- move_irq(irq); ++ move_native_irq(irq); + /* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various +@@ -2051,7 +2084,7 @@ static void end_level_ioapic_irq (unsign + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ +- i = IO_APIC_VECTOR(irq); ++ i = irq_vector[irq]; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + +@@ -2066,104 +2099,24 @@ static void end_level_ioapic_irq (unsign + } + } + +-#ifdef CONFIG_PCI_MSI +-static unsigned int startup_edge_ioapic_vector(unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- return startup_edge_ioapic_irq(irq); +-} +- +-static void ack_edge_ioapic_vector(unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- move_native_irq(vector); +- ack_edge_ioapic_irq(irq); +-} +- +-static unsigned int startup_level_ioapic_vector (unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- return startup_level_ioapic_irq (irq); +-} +- +-static void end_level_ioapic_vector (unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- move_native_irq(vector); +- end_level_ioapic_irq(irq); +-} +- +-static void mask_IO_APIC_vector (unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- mask_IO_APIC_irq(irq); +-} +- +-static void unmask_IO_APIC_vector (unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- unmask_IO_APIC_irq(irq); +-} +- +-#ifdef CONFIG_SMP +-static void set_ioapic_affinity_vector (unsigned int vector, +- cpumask_t cpu_mask) ++static int ioapic_retrigger_irq(unsigned int irq) + { +- int irq = vector_to_irq(vector); +- +- set_native_irq_info(vector, cpu_mask); +- set_ioapic_affinity_irq(irq, cpu_mask); +-} +-#endif +-#endif +- +-static int ioapic_retrigger(unsigned int irq) +-{ +- send_IPI_self(IO_APIC_VECTOR(irq)); ++ send_IPI_self(irq_vector[irq]); + + return 1; + } + +-/* +- * Level and edge triggered IO-APIC interrupts need different handling, +- * so we use two separate IRQ descriptors. Edge triggered IRQs can be +- * handled with the level-triggered descriptor, but that one has slightly +- * more overhead. Level-triggered interrupts cannot be handled with the +- * edge-triggered handler, without risking IRQ storms and other ugly +- * races. +- */ +-static struct hw_interrupt_type ioapic_edge_type __read_mostly = { +- .typename = "IO-APIC-edge", +- .startup = startup_edge_ioapic, +- .shutdown = shutdown_edge_ioapic, +- .enable = enable_edge_ioapic, +- .disable = disable_edge_ioapic, +- .ack = ack_edge_ioapic, +- .end = end_edge_ioapic, ++static struct irq_chip ioapic_chip __read_mostly = { ++ .name = "IO-APIC", ++ .startup = startup_ioapic_irq, ++ .mask = mask_IO_APIC_irq, ++ .unmask = unmask_IO_APIC_irq, ++ .ack = ack_ioapic_irq, ++ .eoi = ack_ioapic_quirk_irq, + #ifdef CONFIG_SMP +- .set_affinity = set_ioapic_affinity, ++ .set_affinity = set_ioapic_affinity_irq, + #endif +- .retrigger = ioapic_retrigger, +-}; +- +-static struct hw_interrupt_type ioapic_level_type __read_mostly = { +- .typename = "IO-APIC-level", +- .startup = startup_level_ioapic, +- .shutdown = shutdown_level_ioapic, +- .enable = enable_level_ioapic, +- .disable = disable_level_ioapic, +- .ack = mask_and_ack_level_ioapic, +- .end = end_level_ioapic, +-#ifdef CONFIG_SMP +- .set_affinity = set_ioapic_affinity, +-#endif +- .retrigger = ioapic_retrigger, ++ .retrigger = ioapic_retrigger_irq, + }; + #endif /* !CONFIG_XEN */ + +@@ -2184,12 +2137,7 @@ static inline void init_IO_APIC_traps(vo + */ + for (irq = 0; irq < NR_IRQS ; irq++) { + int tmp = irq; +- if (use_pci_vector()) { +- if (!platform_legacy_irq(tmp)) +- if ((tmp = vector_to_irq(tmp)) == -1) +- continue; +- } +- if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { ++ if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 +@@ -2200,22 +2148,23 @@ static inline void init_IO_APIC_traps(vo + #ifndef CONFIG_XEN + else + /* Strange. Oh, well.. */ +- irq_desc[irq].chip = &no_irq_type; ++ irq_desc[irq].chip = &no_irq_chip; + #endif + } + } + } + + #ifndef CONFIG_XEN +-static void enable_lapic_irq (unsigned int irq) +-{ +- unsigned long v; ++/* ++ * The local APIC irq-chip implementation: ++ */ + +- v = apic_read(APIC_LVT0); +- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); ++static void ack_apic(unsigned int irq) ++{ ++ ack_APIC_irq(); + } + +-static void disable_lapic_irq (unsigned int irq) ++static void mask_lapic_irq (unsigned int irq) + { + unsigned long v; + +@@ -2223,21 +2172,19 @@ static void disable_lapic_irq (unsigned + apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); + } + +-static void ack_lapic_irq (unsigned int irq) ++static void unmask_lapic_irq (unsigned int irq) + { +- ack_APIC_irq(); +-} ++ unsigned long v; + +-static void end_lapic_irq (unsigned int i) { /* nothing */ } ++ v = apic_read(APIC_LVT0); ++ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); ++} + +-static struct hw_interrupt_type lapic_irq_type __read_mostly = { +- .typename = "local-APIC-edge", +- .startup = NULL, /* startup_irq() not used for IRQ0 */ +- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ +- .enable = enable_lapic_irq, +- .disable = disable_lapic_irq, +- .ack = ack_lapic_irq, +- .end = end_lapic_irq ++static struct irq_chip lapic_chip __read_mostly = { ++ .name = "local-APIC-edge", ++ .mask = mask_lapic_irq, ++ .unmask = unmask_lapic_irq, ++ .eoi = ack_apic, + }; + + static void setup_nmi (void) +@@ -2270,17 +2217,13 @@ static inline void unlock_ExtINT_logic(v + int apic, pin, i; + struct IO_APIC_route_entry entry0, entry1; + unsigned char save_control, save_freq_select; +- unsigned long flags; + + pin = find_isa_irq_pin(8, mp_INT); + apic = find_isa_irq_apic(8, mp_INT); + if (pin == -1) + return; + +- spin_lock_irqsave(&ioapic_lock, flags); +- *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); +- *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ entry0 = ioapic_read_entry(apic, pin); + clear_IO_APIC_pin(apic, pin); + + memset(&entry1, 0, sizeof(entry1)); +@@ -2293,10 +2236,7 @@ static inline void unlock_ExtINT_logic(v + entry1.trigger = 0; + entry1.vector = 0; + +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); +- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ ioapic_write_entry(apic, pin, entry1); + + save_control = CMOS_READ(RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); +@@ -2315,10 +2255,7 @@ static inline void unlock_ExtINT_logic(v + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + clear_IO_APIC_pin(apic, pin); + +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); +- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ ioapic_write_entry(apic, pin, entry0); + } + + int timer_uses_ioapic_pin_0; +@@ -2418,7 +2355,8 @@ static inline void check_timer(void) + printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); + + disable_8259A_irq(0); +- irq_desc[0].chip = &lapic_irq_type; ++ set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, ++ "fasteio"); + apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ + enable_8259A_irq(0); + +@@ -2530,17 +2468,12 @@ static int ioapic_suspend(struct sys_dev + { + struct IO_APIC_route_entry *entry; + struct sysfs_ioapic_data *data; +- unsigned long flags; + int i; + + data = container_of(dev, struct sysfs_ioapic_data, dev); + entry = data->entry; +- spin_lock_irqsave(&ioapic_lock, flags); +- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { +- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); +- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); +- } +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) ++ entry[i] = ioapic_read_entry(dev->id, i); + + return 0; + } +@@ -2562,11 +2495,9 @@ static int ioapic_resume(struct sys_devi + reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + io_apic_write(dev->id, 0, reg_00.raw); + } +- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { +- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); +- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); +- } + spin_unlock_irqrestore(&ioapic_lock, flags); ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) ++ ioapic_write_entry(dev->id, i, entry[i]); + + return 0; + } +@@ -2612,6 +2543,242 @@ static int __init ioapic_init_sysfs(void + + device_initcall(ioapic_init_sysfs); + ++#ifndef CONFIG_XEN ++/* ++ * Dynamic irq allocate and deallocation ++ */ ++int create_irq(void) ++{ ++ /* Allocate an unused irq */ ++ int irq, new, vector; ++ unsigned long flags; ++ ++ irq = -ENOSPC; ++ spin_lock_irqsave(&vector_lock, flags); ++ for (new = (NR_IRQS - 1); new >= 0; new--) { ++ if (platform_legacy_irq(new)) ++ continue; ++ if (irq_vector[new] != 0) ++ continue; ++ vector = __assign_irq_vector(new); ++ if (likely(vector > 0)) ++ irq = new; ++ break; ++ } ++ spin_unlock_irqrestore(&vector_lock, flags); ++ ++ if (irq >= 0) { ++#ifndef CONFIG_XEN ++ set_intr_gate(vector, interrupt[irq]); ++#endif ++ dynamic_irq_init(irq); ++ } ++ return irq; ++} ++ ++void destroy_irq(unsigned int irq) ++{ ++ unsigned long flags; ++ ++ dynamic_irq_cleanup(irq); ++ ++ spin_lock_irqsave(&vector_lock, flags); ++ irq_vector[irq] = 0; ++ spin_unlock_irqrestore(&vector_lock, flags); ++} ++#endif ++ ++/* ++ * MSI mesage composition ++ */ ++#ifdef CONFIG_PCI_MSI ++static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) ++{ ++ int vector; ++ unsigned dest; ++ ++ vector = assign_irq_vector(irq); ++ if (vector >= 0) { ++ dest = cpu_mask_to_apicid(TARGET_CPUS); ++ ++ msg->address_hi = MSI_ADDR_BASE_HI; ++ msg->address_lo = ++ MSI_ADDR_BASE_LO | ++ ((INT_DEST_MODE == 0) ? ++ MSI_ADDR_DEST_MODE_PHYSICAL: ++ MSI_ADDR_DEST_MODE_LOGICAL) | ++ ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ MSI_ADDR_REDIRECTION_CPU: ++ MSI_ADDR_REDIRECTION_LOWPRI) | ++ MSI_ADDR_DEST_ID(dest); ++ ++ msg->data = ++ MSI_DATA_TRIGGER_EDGE | ++ MSI_DATA_LEVEL_ASSERT | ++ ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ MSI_DATA_DELIVERY_FIXED: ++ MSI_DATA_DELIVERY_LOWPRI) | ++ MSI_DATA_VECTOR(vector); ++ } ++ return vector; ++} ++ ++#ifdef CONFIG_SMP ++static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) ++{ ++ struct msi_msg msg; ++ unsigned int dest; ++ cpumask_t tmp; ++ int vector; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ tmp = TARGET_CPUS; ++ ++ vector = assign_irq_vector(irq); ++ if (vector < 0) ++ return; ++ ++ dest = cpu_mask_to_apicid(mask); ++ ++ read_msi_msg(irq, &msg); ++ ++ msg.data &= ~MSI_DATA_VECTOR_MASK; ++ msg.data |= MSI_DATA_VECTOR(vector); ++ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; ++ msg.address_lo |= MSI_ADDR_DEST_ID(dest); ++ ++ write_msi_msg(irq, &msg); ++ set_native_irq_info(irq, mask); ++} ++#endif /* CONFIG_SMP */ ++ ++/* ++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, ++ * which implement the MSI or MSI-X Capability Structure. ++ */ ++static struct irq_chip msi_chip = { ++ .name = "PCI-MSI", ++ .unmask = unmask_msi_irq, ++ .mask = mask_msi_irq, ++ .ack = ack_ioapic_irq, ++#ifdef CONFIG_SMP ++ .set_affinity = set_msi_irq_affinity, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++ ++int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) ++{ ++ struct msi_msg msg; ++ int ret; ++ ret = msi_compose_msg(dev, irq, &msg); ++ if (ret < 0) ++ return ret; ++ ++ write_msi_msg(irq, &msg); ++ ++ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, ++ "edge"); ++ ++ return 0; ++} ++ ++void arch_teardown_msi_irq(unsigned int irq) ++{ ++ return; ++} ++ ++#endif /* CONFIG_PCI_MSI */ ++ ++/* ++ * Hypertransport interrupt support ++ */ ++#ifdef CONFIG_HT_IRQ ++ ++#ifdef CONFIG_SMP ++ ++static void target_ht_irq(unsigned int irq, unsigned int dest) ++{ ++ struct ht_irq_msg msg; ++ fetch_ht_irq_msg(irq, &msg); ++ ++ msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK); ++ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); ++ ++ msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest); ++ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); ++ ++ write_ht_irq_msg(irq, &msg); ++} ++ ++static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) ++{ ++ unsigned int dest; ++ cpumask_t tmp; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ tmp = TARGET_CPUS; ++ ++ cpus_and(mask, tmp, CPU_MASK_ALL); ++ ++ dest = cpu_mask_to_apicid(mask); ++ ++ target_ht_irq(irq, dest); ++ set_native_irq_info(irq, mask); ++} ++#endif ++ ++static struct irq_chip ht_irq_chip = { ++ .name = "PCI-HT", ++ .mask = mask_ht_irq, ++ .unmask = unmask_ht_irq, ++ .ack = ack_ioapic_irq, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ht_irq_affinity, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++ ++int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) ++{ ++ int vector; ++ ++ vector = assign_irq_vector(irq); ++ if (vector >= 0) { ++ struct ht_irq_msg msg; ++ unsigned dest; ++ cpumask_t tmp; ++ ++ cpus_clear(tmp); ++ cpu_set(vector >> 8, tmp); ++ dest = cpu_mask_to_apicid(tmp); ++ ++ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); ++ ++ msg.address_lo = ++ HT_IRQ_LOW_BASE | ++ HT_IRQ_LOW_DEST_ID(dest) | ++ HT_IRQ_LOW_VECTOR(vector) | ++ ((INT_DEST_MODE == 0) ? ++ HT_IRQ_LOW_DM_PHYSICAL : ++ HT_IRQ_LOW_DM_LOGICAL) | ++ HT_IRQ_LOW_RQEOI_EDGE | ++ ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ HT_IRQ_LOW_MT_FIXED : ++ HT_IRQ_LOW_MT_ARBITRATED) | ++ HT_IRQ_LOW_IRQ_MASKED; ++ ++ write_ht_irq_msg(irq, &msg); ++ ++ set_irq_chip_and_handler_name(irq, &ht_irq_chip, ++ handle_edge_irq, "edge"); ++ } ++ return vector; ++} ++#endif /* CONFIG_HT_IRQ */ ++ + /* -------------------------------------------------------------------------- + ACPI-based IOAPIC Configuration + -------------------------------------------------------------------------- */ +@@ -2765,13 +2932,34 @@ int io_apic_set_pci_routing (int ioapic, + if (!ioapic && (irq < 16)) + disable_8259A_irq(irq); + ++ ioapic_write_entry(ioapic, pin, entry); + spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); +- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); +- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); ++ set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return 0; + } + + #endif /* CONFIG_ACPI */ ++ ++static int __init parse_disable_timer_pin_1(char *arg) ++{ ++ disable_timer_pin_1 = 1; ++ return 0; ++} ++early_param("disable_timer_pin_1", parse_disable_timer_pin_1); ++ ++static int __init parse_enable_timer_pin_1(char *arg) ++{ ++ disable_timer_pin_1 = -1; ++ return 0; ++} ++early_param("enable_timer_pin_1", parse_enable_timer_pin_1); ++ ++static int __init parse_noapic(char *arg) ++{ ++ /* disable IO-APIC */ ++ disable_ioapic_setup(); ++ return 0; ++} ++early_param("noapic", parse_noapic); +Index: 10.3-2007-11-26/arch/i386/kernel/irq-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/irq-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/irq-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -53,8 +53,10 @@ static union irq_ctx *softirq_ctx[NR_CPU + */ + fastcall unsigned int do_IRQ(struct pt_regs *regs) + { ++ struct pt_regs *old_regs; + /* high bit used in ret_from_ code */ + int irq = ~regs->orig_eax; ++ struct irq_desc *desc = irq_desc + irq; + #ifdef CONFIG_4KSTACKS + union irq_ctx *curctx, *irqctx; + u32 *isp; +@@ -66,6 +68,7 @@ fastcall unsigned int do_IRQ(struct pt_r + BUG(); + } + ++ old_regs = set_irq_regs(regs); + irq_enter(); + #ifdef CONFIG_DEBUG_STACKOVERFLOW + /* Debugging check for stack overflow: is there less than 1KB free? */ +@@ -110,19 +113,20 @@ fastcall unsigned int do_IRQ(struct pt_r + (curctx->tinfo.preempt_count & SOFTIRQ_MASK); + + asm volatile( +- " xchgl %%ebx,%%esp \n" +- " call __do_IRQ \n" ++ " xchgl %%ebx,%%esp \n" ++ " call *%%edi \n" + " movl %%ebx,%%esp \n" + : "=a" (arg1), "=d" (arg2), "=b" (ebx) +- : "0" (irq), "1" (regs), "2" (isp) +- : "memory", "cc", "ecx" ++ : "0" (irq), "1" (desc), "2" (isp), ++ "D" (desc->handle_irq) ++ : "memory", "cc" + ); + } else + #endif +- __do_IRQ(irq, regs); ++ desc->handle_irq(irq, desc); + + irq_exit(); +- ++ set_irq_regs(old_regs); + return 1; + } + +@@ -253,7 +257,8 @@ int show_interrupts(struct seq_file *p, + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + #endif +- seq_printf(p, " %14s", irq_desc[i].chip->typename); ++ seq_printf(p, " %8s", irq_desc[i].chip->name); ++ seq_printf(p, "-%-8s", irq_desc[i].name); + seq_printf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) +Index: 10.3-2007-11-26/arch/i386/kernel/ldt-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/ldt-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/ldt-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -1,5 +1,5 @@ + /* +- * linux/kernel/ldt.c ++ * linux/arch/i386/kernel/ldt.c + * + * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> +Index: 10.3-2007-11-26/arch/i386/kernel/machine_kexec.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/machine_kexec.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/machine_kexec.c 2007-10-22 13:53:08.000000000 +0200 +@@ -137,6 +137,7 @@ NORET_TYPE void machine_kexec(struct kim + */ + static int __init parse_crashkernel(char *arg) + { ++#ifndef CONFIG_XEN + unsigned long size, base; + size = memparse(arg, &arg); + if (*arg == '@') { +@@ -147,6 +148,10 @@ static int __init parse_crashkernel(char + crashk_res.start = base; + crashk_res.end = base + size - 1; + } ++#else ++ printk("Ignoring crashkernel command line, " ++ "parameter will be supplied by xen\n"); ++#endif + return 0; + } + early_param("crashkernel", parse_crashkernel); +Index: 10.3-2007-11-26/arch/i386/kernel/microcode-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/microcode-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/microcode-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -2,6 +2,7 @@ + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2000-2004 Tigran Aivazian ++ * 2006 Shaohua Li <shaohua.li@intel.com> + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, +@@ -33,7 +34,9 @@ + #include <linux/spinlock.h> + #include <linux/mm.h> + #include <linux/mutex.h> +-#include <linux/syscalls.h> ++#include <linux/cpu.h> ++#include <linux/firmware.h> ++#include <linux/platform_device.h> + + #include <asm/msr.h> + #include <asm/uaccess.h> +@@ -55,12 +58,7 @@ module_param(verbose, int, 0644); + /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ + static DEFINE_MUTEX(microcode_mutex); + +-static int microcode_open (struct inode *unused1, struct file *unused2) +-{ +- return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; +-} +- +- ++#ifdef CONFIG_MICROCODE_OLD_INTERFACE + static int do_microcode_update (const void __user *ubuf, size_t len) + { + int err; +@@ -85,6 +83,11 @@ static int do_microcode_update (const vo + return err; + } + ++static int microcode_open (struct inode *unused1, struct file *unused2) ++{ ++ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; ++} ++ + static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos) + { + ssize_t ret; +@@ -117,7 +120,7 @@ static struct miscdevice microcode_dev = + .fops = µcode_fops, + }; + +-static int __init microcode_init (void) ++static int __init microcode_dev_init (void) + { + int error; + +@@ -129,6 +132,68 @@ static int __init microcode_init (void) + return error; + } + ++ return 0; ++} ++ ++static void __exit microcode_dev_exit (void) ++{ ++ misc_deregister(µcode_dev); ++} ++ ++MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); ++#else ++#define microcode_dev_init() 0 ++#define microcode_dev_exit() do { } while(0) ++#endif ++ ++/* fake device for request_firmware */ ++static struct platform_device *microcode_pdev; ++ ++static int request_microcode(void) ++{ ++ char name[30]; ++ const struct cpuinfo_x86 *c = &boot_cpu_data; ++ const struct firmware *firmware; ++ int error; ++ struct xen_platform_op op; ++ ++ sprintf(name,"intel-ucode/%02x-%02x-%02x", ++ c->x86, c->x86_model, c->x86_mask); ++ error = request_firmware(&firmware, name, µcode_pdev->dev); ++ if (error) { ++ pr_debug("ucode data file %s load failed\n", name); ++ return error; ++ } ++ ++ op.cmd = XENPF_microcode_update; ++ set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data); ++ op.u.microcode.length = firmware->size; ++ error = HYPERVISOR_platform_op(&op); ++ ++ release_firmware(firmware); ++ ++ if (error) ++ pr_debug("ucode load failed\n"); ++ ++ return error; ++} ++ ++static int __init microcode_init (void) ++{ ++ int error; ++ ++ error = microcode_dev_init(); ++ if (error) ++ return error; ++ microcode_pdev = platform_device_register_simple("microcode", -1, ++ NULL, 0); ++ if (IS_ERR(microcode_pdev)) { ++ microcode_dev_exit(); ++ return PTR_ERR(microcode_pdev); ++ } ++ ++ request_microcode(); ++ + printk(KERN_INFO + "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n"); + return 0; +@@ -136,9 +201,9 @@ static int __init microcode_init (void) + + static void __exit microcode_exit (void) + { +- misc_deregister(µcode_dev); ++ microcode_dev_exit(); ++ platform_device_unregister(microcode_pdev); + } + + module_init(microcode_init) + module_exit(microcode_exit) +-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +Index: 10.3-2007-11-26/arch/i386/kernel/mpparse-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/mpparse-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/mpparse-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -30,6 +30,7 @@ + #include <asm/io_apic.h> + + #include <mach_apic.h> ++#include <mach_apicdef.h> + #include <mach_mpparse.h> + #include <bios_ebda.h> + +@@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0; + /* Processor that is doing the boot up */ + unsigned int boot_cpu_physical_apicid = -1U; + /* Internal processor count */ +-static unsigned int __devinitdata num_processors; ++unsigned int __cpuinitdata num_processors; + + /* Bitmask of physically existing CPUs */ + physid_mask_t phys_cpu_present_map; +@@ -235,12 +236,14 @@ static void __init MP_bus_info (struct m + + mpc_oem_bus_info(m, str, translation_table[mpc_record]); + ++#if MAX_MP_BUSSES < 256 + if (m->mpc_busid >= MAX_MP_BUSSES) { + printk(KERN_WARNING "MP table busid value (%d) for bustype %s " + " is too large, max. supported is %d\n", + m->mpc_busid, str, MAX_MP_BUSSES - 1); + return; + } ++#endif + + if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; +@@ -300,19 +303,6 @@ static void __init MP_lintsrc_info (stru + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); +- /* +- * Well it seems all SMP boards in existence +- * use ExtINT/LVT1 == LINT0 and +- * NMI/LVT2 == LINT1 - the following check +- * will show us if this assumptions is false. +- * Until then we do not have to add baggage. +- */ +- if ((m->mpc_irqtype == mp_ExtINT) && +- (m->mpc_destapiclint != 0)) +- BUG(); +- if ((m->mpc_irqtype == mp_NMI) && +- (m->mpc_destapiclint != 1)) +- BUG(); + } + + #ifdef CONFIG_X86_NUMAQ +@@ -838,8 +828,7 @@ int es7000_plat; + + #ifdef CONFIG_ACPI + +-void __init mp_register_lapic_address ( +- u64 address) ++void __init mp_register_lapic_address(u64 address) + { + #ifndef CONFIG_XEN + mp_lapic_addr = (unsigned long) address; +@@ -853,13 +842,10 @@ void __init mp_register_lapic_address ( + #endif + } + +- +-void __devinit mp_register_lapic ( +- u8 id, +- u8 enabled) ++void __devinit mp_register_lapic (u8 id, u8 enabled) + { + struct mpc_config_processor processor; +- int boot_cpu = 0; ++ int boot_cpu = 0; + + if (MAX_APICS - id <= 0) { + printk(KERN_WARNING "Processor #%d invalid (max %d)\n", +@@ -898,11 +884,9 @@ static struct mp_ioapic_routing { + u32 pin_programmed[4]; + } mp_ioapic_routing[MAX_IO_APICS]; + +- +-static int mp_find_ioapic ( +- int gsi) ++static int mp_find_ioapic (int gsi) + { +- int i = 0; ++ int i = 0; + + /* Find the IOAPIC that manages this GSI. */ + for (i = 0; i < nr_ioapics; i++) { +@@ -915,15 +899,11 @@ static int mp_find_ioapic ( + + return -1; + } +- + +-void __init mp_register_ioapic ( +- u8 id, +- u32 address, +- u32 gsi_base) ++void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) + { +- int idx = 0; +- int tmpid; ++ int idx = 0; ++ int tmpid; + + if (nr_ioapics >= MAX_IO_APICS) { + printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " +@@ -971,16 +951,10 @@ void __init mp_register_ioapic ( + mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, + mp_ioapic_routing[idx].gsi_base, + mp_ioapic_routing[idx].gsi_end); +- +- return; + } + +- +-void __init mp_override_legacy_irq ( +- u8 bus_irq, +- u8 polarity, +- u8 trigger, +- u32 gsi) ++void __init ++mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) + { + struct mpc_config_intsrc intsrc; + int ioapic = -1; +@@ -1018,15 +992,13 @@ void __init mp_override_legacy_irq ( + mp_irqs[mp_irq_entries] = intsrc; + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!\n"); +- +- return; + } + + void __init mp_config_acpi_legacy_irqs (void) + { + struct mpc_config_intsrc intsrc; +- int i = 0; +- int ioapic = -1; ++ int i = 0; ++ int ioapic = -1; + + /* + * Fabricate the legacy ISA bus (bus #31). +@@ -1095,12 +1067,12 @@ void __init mp_config_acpi_legacy_irqs ( + + #define MAX_GSI_NUM 4096 + +-int mp_register_gsi (u32 gsi, int triggering, int polarity) ++int mp_register_gsi(u32 gsi, int triggering, int polarity) + { +- int ioapic = -1; +- int ioapic_pin = 0; +- int idx, bit = 0; +- static int pci_irq = 16; ++ int ioapic = -1; ++ int ioapic_pin = 0; ++ int idx, bit = 0; ++ static int pci_irq = 16; + /* + * Mapping between Global System Interrups, which + * represent all possible interrupts, and IRQs +Index: 10.3-2007-11-26/arch/i386/kernel/pci-dma-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/pci-dma-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/pci-dma-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -83,8 +83,7 @@ dma_map_sg(struct device *hwdev, struct + { + int i, rc; + +- if (direction == DMA_NONE) +- BUG(); ++ BUG_ON(!valid_dma_direction(direction)); + WARN_ON(nents == 0 || sg[0].length == 0); + + if (swiotlb) { +@@ -115,7 +114,7 @@ dma_unmap_sg(struct device *hwdev, struc + { + int i; + +- BUG_ON(direction == DMA_NONE); ++ BUG_ON(!valid_dma_direction(direction)); + if (swiotlb) + swiotlb_unmap_sg(hwdev, sg, nents, direction); + else { +@@ -132,8 +131,7 @@ dma_map_page(struct device *dev, struct + { + dma_addr_t dma_addr; + +- BUG_ON(direction == DMA_NONE); +- ++ BUG_ON(!valid_dma_direction(direction)); + if (swiotlb) { + dma_addr = swiotlb_map_page( + dev, page, offset, size, direction); +@@ -150,7 +148,7 @@ void + dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) + { +- BUG_ON(direction == DMA_NONE); ++ BUG_ON(!valid_dma_direction(direction)); + if (swiotlb) + swiotlb_unmap_page(dev, dma_address, size, direction); + else +@@ -332,8 +330,7 @@ dma_map_single(struct device *dev, void + { + dma_addr_t dma; + +- if (direction == DMA_NONE) +- BUG(); ++ BUG_ON(!valid_dma_direction(direction)); + WARN_ON(size == 0); + + if (swiotlb) { +@@ -354,8 +351,7 @@ void + dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) + { +- if (direction == DMA_NONE) +- BUG(); ++ BUG_ON(!valid_dma_direction(direction)); + if (swiotlb) + swiotlb_unmap_single(dev, dma_addr, size, direction); + else +Index: 10.3-2007-11-26/arch/i386/kernel/process-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/process-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/process-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -37,6 +37,7 @@ + #include <linux/kallsyms.h> + #include <linux/ptrace.h> + #include <linux/random.h> ++#include <linux/personality.h> + + #include <asm/uaccess.h> + #include <asm/pgtable.h> +@@ -186,7 +187,7 @@ void cpu_idle(void) + void cpu_idle_wait(void) + { + unsigned int cpu, this_cpu = get_cpu(); +- cpumask_t map; ++ cpumask_t map, tmp = current->cpus_allowed; + + set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); + put_cpu(); +@@ -208,6 +209,8 @@ void cpu_idle_wait(void) + } + cpus_and(map, map, cpu_online_map); + } while (!cpus_empty(map)); ++ ++ set_cpus_allowed(current, tmp); + } + EXPORT_SYMBOL_GPL(cpu_idle_wait); + +@@ -240,9 +243,9 @@ void show_regs(struct pt_regs * regs) + if (user_mode_vm(regs)) + printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); + printk(" EFLAGS: %08lx %s (%s %.*s)\n", +- regs->eflags, print_tainted(), system_utsname.release, +- (int)strcspn(system_utsname.version, " "), +- system_utsname.version); ++ regs->eflags, print_tainted(), init_utsname()->release, ++ (int)strcspn(init_utsname()->version, " "), ++ init_utsname()->version); + printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", + regs->eax,regs->ebx,regs->ecx,regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx", +@@ -264,15 +267,6 @@ void show_regs(struct pt_regs * regs) + * the "args". + */ + extern void kernel_thread_helper(void); +-__asm__(".section .text\n" +- ".align 4\n" +- "kernel_thread_helper:\n\t" +- "movl %edx,%eax\n\t" +- "pushl %edx\n\t" +- "call *%ebx\n\t" +- "pushl %eax\n\t" +- "call do_exit\n" +- ".previous"); + + /* + * Create a kernel thread +@@ -290,7 +284,7 @@ int kernel_thread(int (*fn)(void *), voi + regs.xes = __USER_DS; + regs.orig_eax = -1; + regs.eip = (unsigned long) kernel_thread_helper; +- regs.xcs = GET_KERNEL_CS(); ++ regs.xcs = __KERNEL_CS | get_kernel_rpl(); + regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; + + /* Ok, create the new process.. */ +@@ -368,13 +362,12 @@ int copy_thread(int nr, unsigned long cl + + tsk = current; + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { +- p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); ++ p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, ++ IO_BITMAP_BYTES, GFP_KERNEL); + if (!p->thread.io_bitmap_ptr) { + p->thread.io_bitmap_max = 0; + return -ENOMEM; + } +- memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr, +- IO_BITMAP_BYTES); + set_tsk_thread_flag(p, TIF_IO_BITMAP); + } + +@@ -847,7 +840,7 @@ asmlinkage int sys_get_thread_area(struc + + unsigned long arch_align_stack(unsigned long sp) + { +- if (randomize_va_space) ++ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() % 8192; + return sp & ~0xf; + } +Index: 10.3-2007-11-26/arch/i386/kernel/setup-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/setup-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/setup-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -56,6 +56,7 @@ + #include <asm/apic.h> + #include <asm/e820.h> + #include <asm/mpspec.h> ++#include <asm/mmzone.h> + #include <asm/setup.h> + #include <asm/arch_hooks.h> + #include <asm/sections.h> +@@ -105,18 +106,6 @@ EXPORT_SYMBOL(boot_cpu_data); + + unsigned long mmu_cr4_features; + +-#ifdef CONFIG_ACPI +- int acpi_disabled = 0; +-#else +- int acpi_disabled = 1; +-#endif +-EXPORT_SYMBOL(acpi_disabled); +- +-#ifdef CONFIG_ACPI +-int __initdata acpi_force = 0; +-extern acpi_interrupt_flags acpi_sci_flags; +-#endif +- + /* for MCA, but anyone else can use it if they want */ + unsigned int machine_id; + #ifdef CONFIG_MCA +@@ -170,7 +159,6 @@ struct e820map machine_e820; + #endif + + extern void early_cpu_init(void); +-extern void generic_apic_probe(char *); + extern int root_mountflags; + + unsigned long saved_videomode; +@@ -243,9 +231,6 @@ static struct resource adapter_rom_resou + .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM + } }; + +-#define ADAPTER_ROM_RESOURCES \ +- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) +- + static struct resource video_rom_resource = { + .name = "Video ROM", + .start = 0xc0000, +@@ -307,9 +292,6 @@ static struct resource standard_io_resou + .flags = IORESOURCE_BUSY | IORESOURCE_IO + } }; + +-#define STANDARD_IO_RESOURCES \ +- (sizeof standard_io_resources / sizeof standard_io_resources[0]) +- + #define romsignature(x) (*(unsigned short *)(x) == 0xaa55) + + static int __init romchecksum(unsigned char *rom, unsigned long length) +@@ -372,7 +354,7 @@ static void __init probe_roms(void) + } + + /* check for adapter roms on 2k boundaries */ +- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { ++ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; +@@ -764,246 +746,152 @@ static inline void copy_edd(void) + } + #endif + +-static void __init parse_cmdline_early (char ** cmdline_p) ++static int __initdata user_defined_memmap = 0; ++ ++/* ++ * "mem=nopentium" disables the 4MB page tables. ++ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM ++ * to <mem>, overriding the bios size. ++ * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from ++ * <start> to <start>+<mem>, overriding the bios size. ++ * ++ * HPA tells me bootloaders need to parse mem=, so no new ++ * option should be mem= [also see Documentation/i386/boot.txt] ++ */ ++static int __init parse_mem(char *arg) + { +- char c = ' ', *to = command_line, *from = saved_command_line; +- int len = 0, max_cmdline; +- int userdef = 0; +- +- if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) +- max_cmdline = COMMAND_LINE_SIZE; +- memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); +- /* Save unparsed command line copy for /proc/cmdline */ +- saved_command_line[max_cmdline-1] = '\0'; +- +- for (;;) { +- if (c != ' ') +- goto next_char; +- /* +- * "mem=nopentium" disables the 4MB page tables. +- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM +- * to <mem>, overriding the bios size. +- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from +- * <start> to <start>+<mem>, overriding the bios size. +- * +- * HPA tells me bootloaders need to parse mem=, so no new +- * option should be mem= [also see Documentation/i386/boot.txt] +- */ +- if (!memcmp(from, "mem=", 4)) { +- if (to != command_line) +- to--; +- if (!memcmp(from+4, "nopentium", 9)) { +- from += 9+4; +- clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); +- disable_pse = 1; +- } else { +- /* If the user specifies memory size, we +- * limit the BIOS-provided memory map to +- * that size. exactmap can be used to specify +- * the exact map. mem=number can be used to +- * trim the existing memory map. +- */ +- unsigned long long mem_size; +- +- mem_size = memparse(from+4, &from); +- limit_regions(mem_size); +- userdef=1; +- } +- } ++ if (!arg) ++ return -EINVAL; + +- else if (!memcmp(from, "memmap=", 7)) { +- if (to != command_line) +- to--; +- if (!memcmp(from+7, "exactmap", 8)) { +-#ifdef CONFIG_CRASH_DUMP +- /* If we are doing a crash dump, we +- * still need to know the real mem +- * size before original memory map is +- * reset. +- */ +- find_max_pfn(); +- saved_max_pfn = max_pfn; +-#endif +- from += 8+7; +- e820.nr_map = 0; +- userdef = 1; +- } else { +- /* If the user specifies memory size, we +- * limit the BIOS-provided memory map to +- * that size. exactmap can be used to specify +- * the exact map. mem=number can be used to +- * trim the existing memory map. +- */ +- unsigned long long start_at, mem_size; ++ if (strcmp(arg, "nopentium") == 0) { ++ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); ++ disable_pse = 1; ++ } else { ++ /* If the user specifies memory size, we ++ * limit the BIOS-provided memory map to ++ * that size. exactmap can be used to specify ++ * the exact map. mem=number can be used to ++ * trim the existing memory map. ++ */ ++ unsigned long long mem_size; + +- mem_size = memparse(from+7, &from); +- if (*from == '@') { +- start_at = memparse(from+1, &from); +- add_memory_region(start_at, mem_size, E820_RAM); +- } else if (*from == '#') { +- start_at = memparse(from+1, &from); +- add_memory_region(start_at, mem_size, E820_ACPI); +- } else if (*from == '$') { +- start_at = memparse(from+1, &from); +- add_memory_region(start_at, mem_size, E820_RESERVED); +- } else { +- limit_regions(mem_size); +- userdef=1; +- } +- } +- } +- +- else if (!memcmp(from, "noexec=", 7)) +- noexec_setup(from + 7); ++ mem_size = memparse(arg, &arg); ++ limit_regions(mem_size); ++ user_defined_memmap = 1; ++ } ++ return 0; ++} ++early_param("mem", parse_mem); + ++static int __init parse_memmap(char *arg) ++{ ++ if (!arg) ++ return -EINVAL; + +-#ifdef CONFIG_X86_MPPARSE +- /* +- * If the BIOS enumerates physical processors before logical, +- * maxcpus=N at enumeration-time can be used to disable HT. ++ if (strcmp(arg, "exactmap") == 0) { ++#ifdef CONFIG_CRASH_DUMP ++ /* If we are doing a crash dump, we ++ * still need to know the real mem ++ * size before original memory map is ++ * reset. + */ +- else if (!memcmp(from, "maxcpus=", 8)) { +- extern unsigned int maxcpus; +- +- maxcpus = simple_strtoul(from + 8, NULL, 0); +- } ++ find_max_pfn(); ++ saved_max_pfn = max_pfn; + #endif ++ e820.nr_map = 0; ++ user_defined_memmap = 1; ++ } else { ++ /* If the user specifies memory size, we ++ * limit the BIOS-provided memory map to ++ * that size. exactmap can be used to specify ++ * the exact map. mem=number can be used to ++ * trim the existing memory map. ++ */ ++ unsigned long long start_at, mem_size; + +-#ifdef CONFIG_ACPI +- /* "acpi=off" disables both ACPI table parsing and interpreter */ +- else if (!memcmp(from, "acpi=off", 8)) { +- disable_acpi(); +- } +- +- /* acpi=force to over-ride black-list */ +- else if (!memcmp(from, "acpi=force", 10)) { +- acpi_force = 1; +- acpi_ht = 1; +- acpi_disabled = 0; +- } +- +- /* acpi=strict disables out-of-spec workarounds */ +- else if (!memcmp(from, "acpi=strict", 11)) { +- acpi_strict = 1; +- } +- +- /* Limit ACPI just to boot-time to enable HT */ +- else if (!memcmp(from, "acpi=ht", 7)) { +- if (!acpi_force) +- disable_acpi(); +- acpi_ht = 1; +- } +- +- /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */ +- else if (!memcmp(from, "pci=noacpi", 10)) { +- acpi_disable_pci(); +- } +- /* "acpi=noirq" disables ACPI interrupt routing */ +- else if (!memcmp(from, "acpi=noirq", 10)) { +- acpi_noirq_set(); ++ mem_size = memparse(arg, &arg); ++ if (*arg == '@') { ++ start_at = memparse(arg+1, &arg); ++ add_memory_region(start_at, mem_size, E820_RAM); ++ } else if (*arg == '#') { ++ start_at = memparse(arg+1, &arg); ++ add_memory_region(start_at, mem_size, E820_ACPI); ++ } else if (*arg == '$') { ++ start_at = memparse(arg+1, &arg); ++ add_memory_region(start_at, mem_size, E820_RESERVED); ++ } else { ++ limit_regions(mem_size); ++ user_defined_memmap = 1; + } ++ } ++ return 0; ++} ++early_param("memmap", parse_memmap); + +- else if (!memcmp(from, "acpi_sci=edge", 13)) +- acpi_sci_flags.trigger = 1; +- +- else if (!memcmp(from, "acpi_sci=level", 14)) +- acpi_sci_flags.trigger = 3; ++#ifdef CONFIG_PROC_VMCORE ++/* elfcorehdr= specifies the location of elf core header ++ * stored by the crashed kernel. ++ */ ++static int __init parse_elfcorehdr(char *arg) ++{ ++ if (!arg) ++ return -EINVAL; + +- else if (!memcmp(from, "acpi_sci=high", 13)) +- acpi_sci_flags.polarity = 1; ++ elfcorehdr_addr = memparse(arg, &arg); ++ return 0; ++} ++early_param("elfcorehdr", parse_elfcorehdr); ++#endif /* CONFIG_PROC_VMCORE */ + +- else if (!memcmp(from, "acpi_sci=low", 12)) +- acpi_sci_flags.polarity = 3; ++/* ++ * highmem=size forces highmem to be exactly 'size' bytes. ++ * This works even on boxes that have no highmem otherwise. ++ * This also works to reduce highmem size on bigger boxes. ++ */ ++static int __init parse_highmem(char *arg) ++{ ++ if (!arg) ++ return -EINVAL; + +-#ifdef CONFIG_X86_IO_APIC +- else if (!memcmp(from, "acpi_skip_timer_override", 24)) +- acpi_skip_timer_override = 1; ++ highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; ++ return 0; ++} ++early_param("highmem", parse_highmem); + +- if (!memcmp(from, "disable_timer_pin_1", 19)) +- disable_timer_pin_1 = 1; +- if (!memcmp(from, "enable_timer_pin_1", 18)) +- disable_timer_pin_1 = -1; +- +- /* disable IO-APIC */ +- else if (!memcmp(from, "noapic", 6)) +- disable_ioapic_setup(); +-#endif /* CONFIG_X86_IO_APIC */ +-#endif /* CONFIG_ACPI */ ++/* ++ * vmalloc=size forces the vmalloc area to be exactly 'size' ++ * bytes. This can be used to increase (or decrease) the ++ * vmalloc area - the default is 128m. ++ */ ++static int __init parse_vmalloc(char *arg) ++{ ++ if (!arg) ++ return -EINVAL; + +-#ifdef CONFIG_X86_LOCAL_APIC +- /* enable local APIC */ +- else if (!memcmp(from, "lapic", 5)) +- lapic_enable(); +- +- /* disable local APIC */ +- else if (!memcmp(from, "nolapic", 6)) +- lapic_disable(); +-#endif /* CONFIG_X86_LOCAL_APIC */ ++ __VMALLOC_RESERVE = memparse(arg, &arg); ++ return 0; ++} ++early_param("vmalloc", parse_vmalloc); + +-#ifdef CONFIG_KEXEC +- /* crashkernel=size@addr specifies the location to reserve for +- * a crash kernel. By reserving this memory we guarantee +- * that linux never set's it up as a DMA target. +- * Useful for holding code to do something appropriate +- * after a kernel panic. +- */ +- else if (!memcmp(from, "crashkernel=", 12)) { + #ifndef CONFIG_XEN +- unsigned long size, base; +- size = memparse(from+12, &from); +- if (*from == '@') { +- base = memparse(from+1, &from); +- /* FIXME: Do I want a sanity check +- * to validate the memory range? +- */ +- crashk_res.start = base; +- crashk_res.end = base + size - 1; +- } +-#else +- printk("Ignoring crashkernel command line, " +- "parameter will be supplied by xen\n"); +-#endif +- } +-#endif +-#ifdef CONFIG_PROC_VMCORE +- /* elfcorehdr= specifies the location of elf core header +- * stored by the crashed kernel. +- */ +- else if (!memcmp(from, "elfcorehdr=", 11)) +- elfcorehdr_addr = memparse(from+11, &from); +-#endif ++/* ++ * reservetop=size reserves a hole at the top of the kernel address space which ++ * a hypervisor can load into later. Needed for dynamically loaded hypervisors, ++ * so relocating the fixmap can be done before paging initialization. ++ */ ++static int __init parse_reservetop(char *arg) ++{ ++ unsigned long address; + +- /* +- * highmem=size forces highmem to be exactly 'size' bytes. +- * This works even on boxes that have no highmem otherwise. +- * This also works to reduce highmem size on bigger boxes. +- */ +- else if (!memcmp(from, "highmem=", 8)) +- highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; +- +- /* +- * vmalloc=size forces the vmalloc area to be exactly 'size' +- * bytes. This can be used to increase (or decrease) the +- * vmalloc area - the default is 128m. +- */ +- else if (!memcmp(from, "vmalloc=", 8)) +- __VMALLOC_RESERVE = memparse(from+8, &from); ++ if (!arg) ++ return -EINVAL; + +- next_char: +- c = *(from++); +- if (!c) +- break; +- if (COMMAND_LINE_SIZE <= ++len) +- break; +- *(to++) = c; +- } +- *to = '\0'; +- *cmdline_p = command_line; +- if (userdef) { +- printk(KERN_INFO "user-defined physical RAM map:\n"); +- print_memory_map("user"); +- } ++ address = memparse(arg, &arg); ++ reserve_top_address(address); ++ return 0; + } ++early_param("reservetop", parse_reservetop); ++#endif + + /* + * Callback for efi_memory_walk. +@@ -1024,7 +912,7 @@ efi_find_max_pfn(unsigned long start, un + static int __init + efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) + { +- memory_present(0, start, end); ++ memory_present(0, PFN_UP(start), PFN_DOWN(end)); + return 0; + } + +@@ -1262,6 +1150,14 @@ static unsigned long __init setup_memory + } + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); ++ num_physpages = highend_pfn; ++ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; ++#else ++ num_physpages = max_low_pfn; ++ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; ++#endif ++#ifdef CONFIG_FLATMEM ++ max_mapnr = num_physpages; + #endif + printk(KERN_NOTICE "%ldMB LOWMEM available.\n", + pages_to_mb(max_low_pfn)); +@@ -1273,9 +1169,9 @@ static unsigned long __init setup_memory + + void __init zone_sizes_init(void) + { +- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; +- unsigned int max_dma, low; ++ unsigned long max_zone_pfns[MAX_NR_ZONES]; + ++ memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + /* + * XEN: Our notion of "DMA memory" is fake when running over Xen. + * We simply put all RAM in the DMA zone so that those drivers which +@@ -1283,19 +1179,16 @@ void __init zone_sizes_init(void) + * Those drivers that *do* require lowmem are screwed anyway when + * running over Xen! + */ +- max_dma = max_low_pfn; +- low = max_low_pfn; +- +- if (low < max_dma) +- zones_size[ZONE_DMA] = low; +- else { +- zones_size[ZONE_DMA] = max_dma; +- zones_size[ZONE_NORMAL] = low - max_dma; ++ max_zone_pfns[ZONE_DMA] = max_low_pfn; ++ max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + #ifdef CONFIG_HIGHMEM +- zones_size[ZONE_HIGHMEM] = highend_pfn - low; ++ max_zone_pfns[ZONE_HIGHMEM] = highend_pfn; ++ add_active_range(0, 0, highend_pfn); ++#else ++ add_active_range(0, 0, max_low_pfn); + #endif +- } +- free_area_init(zones_size); ++ ++ free_area_init_nodes(max_zone_pfns); + } + #else + extern unsigned long __init setup_memory(void); +@@ -1352,6 +1245,7 @@ void __init setup_bootmem_allocator(void + */ + acpi_reserve_bootmem(); + #endif ++ numa_kva_reserve(); + #endif /* !CONFIG_XEN */ + + #ifdef CONFIG_BLK_DEV_INITRD +@@ -1541,7 +1435,7 @@ static int __init request_standard_resou + request_resource(&iomem_resource, &video_ram_resource); + + /* request I/O space for devices used on all i[345]86 PCs */ +- for (i = 0; i < STANDARD_IO_RESOURCES; i++) ++ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) + request_resource(&ioport_resource, &standard_io_resources[i]); + return 0; + } +@@ -1692,17 +1586,19 @@ void __init setup_arch(char **cmdline_p) + data_resource.start = virt_to_phys(_etext); + data_resource.end = virt_to_phys(_edata)-1; + +- parse_cmdline_early(cmdline_p); ++ if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) ++ i = COMMAND_LINE_SIZE; ++ memcpy(saved_command_line, xen_start_info->cmd_line, i); ++ saved_command_line[i - 1] = '\0'; ++ parse_early_param(); + +-#ifdef CONFIG_EARLY_PRINTK +- { +- char *s = strstr(*cmdline_p, "earlyprintk="); +- if (s) { +- setup_early_printk(strchr(s, '=') + 1); +- printk("early console enabled\n"); +- } ++ if (user_defined_memmap) { ++ printk(KERN_INFO "user-defined physical RAM map:\n"); ++ print_memory_map("user"); + } +-#endif ++ ++ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); ++ *cmdline_p = command_line; + + max_low_pfn = setup_memory(); + +@@ -1778,7 +1674,7 @@ void __init setup_arch(char **cmdline_p) + dmi_scan_machine(); + + #ifdef CONFIG_X86_GENERICARCH +- generic_apic_probe(*cmdline_p); ++ generic_apic_probe(); + #endif + if (efi_enabled) + efi_map_memmap(); +@@ -1799,9 +1695,11 @@ void __init setup_arch(char **cmdline_p) + acpi_boot_table_init(); + #endif + ++#ifdef CONFIG_PCI + #ifdef CONFIG_X86_IO_APIC + check_acpi_pci(); /* Checks more than just ACPI actually */ + #endif ++#endif + + #ifdef CONFIG_ACPI + acpi_boot_init(); +Index: 10.3-2007-11-26/arch/i386/kernel/smp-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/smp-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/smp-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -279,8 +279,7 @@ static inline void leave_mm (unsigned lo + * 2) Leave the mm if we are in the lazy tlb mode. + */ + +-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id, +- struct pt_regs *regs) ++irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id) + { + unsigned long cpu; + +@@ -443,8 +442,7 @@ void flush_tlb_all(void) + + #else + +-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id, +- struct pt_regs *regs) ++irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id) + { return 0; } + void flush_tlb_current_task(void) + { xen_tlb_flush_mask(¤t->mm->cpu_vm_mask); } +@@ -586,16 +584,14 @@ void smp_send_stop(void) + * all the work is done automatically when + * we return from the interrupt. + */ +-irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id, +- struct pt_regs *regs) ++irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) + { + + return IRQ_HANDLED; + } + + #include <linux/kallsyms.h> +-irqreturn_t smp_call_function_interrupt(int irq, void *dev_id, +- struct pt_regs *regs) ++irqreturn_t smp_call_function_interrupt(int irq, void *dev_id) + { + void (*func) (void *info) = call_data->func; + void *info = call_data->info; +@@ -622,3 +618,69 @@ irqreturn_t smp_call_function_interrupt( + return IRQ_HANDLED; + } + ++/* ++ * this function sends a 'generic call function' IPI to one other CPU ++ * in the system. ++ * ++ * cpu is a standard Linux logical CPU number. ++ */ ++static void ++__smp_call_function_single(int cpu, void (*func) (void *info), void *info, ++ int nonatomic, int wait) ++{ ++ struct call_data_struct data; ++ int cpus = 1; ++ ++ data.func = func; ++ data.info = info; ++ atomic_set(&data.started, 0); ++ data.wait = wait; ++ if (wait) ++ atomic_set(&data.finished, 0); ++ ++ call_data = &data; ++ wmb(); ++ /* Send a message to all other CPUs and wait for them to respond */ ++ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); ++ ++ /* Wait for response */ ++ while (atomic_read(&data.started) != cpus) ++ cpu_relax(); ++ ++ if (!wait) ++ return; ++ ++ while (atomic_read(&data.finished) != cpus) ++ cpu_relax(); ++} ++ ++/* ++ * smp_call_function_single - Run a function on another CPU ++ * @func: The function to run. This must be fast and non-blocking. ++ * @info: An arbitrary pointer to pass to the function. ++ * @nonatomic: Currently unused. ++ * @wait: If true, wait until function has completed on other CPUs. ++ * ++ * Retrurns 0 on success, else a negative status code. ++ * ++ * Does not return until the remote CPU is nearly ready to execute <func> ++ * or is or has executed. ++ */ ++ ++int smp_call_function_single(int cpu, void (*func) (void *info), void *info, ++ int nonatomic, int wait) ++{ ++ /* prevent preemption and reschedule on another processor */ ++ int me = get_cpu(); ++ if (cpu == me) { ++ WARN_ON(1); ++ put_cpu(); ++ return -EBUSY; ++ } ++ spin_lock_bh(&call_lock); ++ __smp_call_function_single(cpu, func, info, nonatomic, wait); ++ spin_unlock_bh(&call_lock); ++ put_cpu(); ++ return 0; ++} ++EXPORT_SYMBOL(smp_call_function_single); +Index: 10.3-2007-11-26/arch/i386/kernel/time-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/time-xen.c 2007-12-06 17:31:37.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/time-xen.c 2007-12-06 17:31:58.000000000 +0100 +@@ -88,7 +88,6 @@ int pit_latch_buggy; /* ext + unsigned long vxtime_hz = PIT_TICK_RATE; + struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */ + volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; +-unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES; + struct timespec __xtime __section_xtime; + struct timezone __sys_tz __section_sys_tz; + #endif +@@ -96,8 +95,6 @@ struct timezone __sys_tz __section_sys_t + unsigned int cpu_khz; /* Detected as we calibrate the TSC */ + EXPORT_SYMBOL(cpu_khz); + +-extern unsigned long wall_jiffies; +- + DEFINE_SPINLOCK(rtc_lock); + EXPORT_SYMBOL(rtc_lock); + +@@ -261,11 +258,10 @@ static void __update_wallclock(time_t se + time_t wtm_sec, xtime_sec; + u64 tmp, wc_nsec; + +- /* Adjust wall-clock time base based on wall_jiffies ticks. */ ++ /* Adjust wall-clock time base. */ + wc_nsec = processed_system_time; + wc_nsec += sec * (u64)NSEC_PER_SEC; + wc_nsec += nsec; +- wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK; + + /* Split wallclock base into seconds and nanoseconds. */ + tmp = wc_nsec; +@@ -383,13 +379,10 @@ void do_gettimeofday(struct timeval *tv) + shadow = &per_cpu(shadow_time, cpu); + + do { +- unsigned long lost; +- + local_time_version = shadow->version; + seq = read_seqbegin(&xtime_lock); + + usec = get_usec_offset(shadow); +- lost = jiffies - wall_jiffies; + + /* + * If time_adjust is negative then NTP is slowing the clock +@@ -399,12 +392,7 @@ void do_gettimeofday(struct timeval *tv) + if (unlikely(time_adjust < 0)) { + max_ntp_tick = (USEC_PER_SEC / HZ) - tickadj; + usec = min(usec, max_ntp_tick); +- +- if (lost) +- usec += lost * max_ntp_tick; + } +- else if (unlikely(lost)) +- usec += lost * (USEC_PER_SEC / HZ); + + sec = xtime.tv_sec; + usec += (xtime.tv_nsec / NSEC_PER_USEC); +@@ -509,7 +497,7 @@ static void sync_xen_wallclock(unsigned + write_seqlock_irq(&xtime_lock); + + sec = xtime.tv_sec; +- nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK); ++ nsec = xtime.tv_nsec; + __normalize_time(&sec, &nsec); + + op.cmd = XENPF_settime; +@@ -583,7 +571,6 @@ unsigned long long sched_clock(void) + } + #endif + +-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) + unsigned long profile_pc(struct pt_regs *regs) + { + unsigned long pc = instruction_pointer(regs); +@@ -604,21 +591,38 @@ unsigned long profile_pc(struct pt_regs + return ((unsigned long *)regs->rsp)[1]; + } + #else +- if (!user_mode_vm(regs) && in_lock_functions(pc)) ++#ifdef CONFIG_SMP ++ if (!user_mode_vm(regs) && in_lock_functions(pc)) { ++#ifdef CONFIG_FRAME_POINTER + return *(unsigned long *)(regs->ebp + 4); ++#else ++ unsigned long *sp; ++ if ((regs->xcs & 2) == 0) ++ sp = (unsigned long *)®s->esp; ++ else ++ sp = (unsigned long *)regs->esp; ++ /* Return address is either directly at stack pointer ++ or above a saved eflags. Eflags has bits 22-31 zero, ++ kernel addresses don't. */ ++ if (sp[0] >> 22) ++ return sp[0]; ++ if (sp[1] >> 22) ++ return sp[1]; ++#endif ++ } ++#endif + #endif + + return pc; + } + EXPORT_SYMBOL(profile_pc); +-#endif + + /* + * This is the same as the above, except we _also_ save the current + * Time Stamp Counter value at the time of the timer interrupt, so that + * we later on can estimate the time of day more exactly. + */ +-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) ++irqreturn_t timer_interrupt(int irq, void *dev_id) + { + s64 delta, delta_cpu, stolen, blocked; + u64 sched_time; +@@ -676,10 +680,14 @@ irqreturn_t timer_interrupt(int irq, voi + } + + /* System-wide jiffy work. */ +- while (delta >= NS_PER_TICK) { +- delta -= NS_PER_TICK; +- processed_system_time += NS_PER_TICK; +- do_timer(regs); ++ if (delta >= NS_PER_TICK) { ++ do_div(delta, NS_PER_TICK); ++ processed_system_time += delta * NS_PER_TICK; ++ while (delta > HZ) { ++ do_timer(HZ); ++ delta -= HZ; ++ } ++ do_timer(delta); + } + + if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) { +@@ -724,7 +732,7 @@ irqreturn_t timer_interrupt(int irq, voi + if (delta_cpu > 0) { + do_div(delta_cpu, NS_PER_TICK); + per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK; +- if (user_mode_vm(regs)) ++ if (user_mode_vm(get_irq_regs())) + account_user_time(current, (cputime_t)delta_cpu); + else + account_system_time(current, HARDIRQ_OFFSET, +@@ -738,10 +746,10 @@ irqreturn_t timer_interrupt(int irq, voi + /* Local timer processing (see update_process_times()). */ + run_local_timers(); + if (rcu_pending(cpu)) +- rcu_check_callbacks(cpu, user_mode_vm(regs)); ++ rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs())); + scheduler_tick(); + run_posix_cpu_timers(current); +- profile_tick(CPU_PROFILING, regs); ++ profile_tick(CPU_PROFILING); + + return IRQ_HANDLED; + } +@@ -913,16 +921,19 @@ void notify_arch_cmos_timer(void) + mod_timer(&sync_xen_wallclock_timer, jiffies + 1); + } + +-static long clock_cmos_diff, sleep_start; ++static long clock_cmos_diff; ++static unsigned long sleep_start; + + static int timer_suspend(struct sys_device *dev, pm_message_t state) + { + /* + * Estimate time zone so that set_time can update the clock + */ +- clock_cmos_diff = -get_cmos_time(); ++ unsigned long ctime = get_cmos_time(); ++ ++ clock_cmos_diff = -ctime; + clock_cmos_diff += get_seconds(); +- sleep_start = get_cmos_time(); ++ sleep_start = ctime; + return 0; + } + +@@ -930,19 +941,29 @@ static int timer_resume(struct sys_devic + { + unsigned long flags; + unsigned long sec; +- unsigned long sleep_length; +- ++ unsigned long ctime = get_cmos_time(); ++ long sleep_length = (ctime - sleep_start) * HZ; ++ struct timespec ts; ++ ++ if (sleep_length < 0) { ++ printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n"); ++ /* The time after the resume must not be earlier than the time ++ * before the suspend or some nasty things will happen ++ */ ++ sleep_length = 0; ++ ctime = sleep_start; ++ } + #ifdef CONFIG_HPET_TIMER + if (is_hpet_enabled()) + hpet_reenable(); + #endif +- sec = get_cmos_time() + clock_cmos_diff; +- sleep_length = (get_cmos_time() - sleep_start) * HZ; ++ ++ sec = ctime + clock_cmos_diff; ++ ts.tv_sec = sec; ++ ts.tv_nsec = 0; ++ do_settimeofday(&ts); + write_seqlock_irqsave(&xtime_lock, flags); +- xtime.tv_sec = sec; +- xtime.tv_nsec = 0; + jiffies_64 += sleep_length; +- wall_jiffies += sleep_length; + write_sequnlock_irqrestore(&xtime_lock, flags); + touch_softlockup_watchdog(); + return 0; +@@ -976,10 +997,11 @@ extern void (*late_time_init)(void); + /* Duplicate of time_init() below, with hpet_enable part added */ + static void __init hpet_time_init(void) + { +- xtime.tv_sec = get_cmos_time(); +- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); +- set_normalized_timespec(&wall_to_monotonic, +- -xtime.tv_sec, -xtime.tv_nsec); ++ struct timespec ts; ++ ts.tv_sec = get_cmos_time(); ++ ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); ++ ++ do_settimeofday(&ts); + + if ((hpet_enable() >= 0) && hpet_use_timer) { + printk("Using HPET for base-timer\n"); +Index: 10.3-2007-11-26/arch/i386/kernel/traps-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/kernel/traps-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/kernel/traps-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -28,6 +28,7 @@ + #include <linux/kprobes.h> + #include <linux/kexec.h> + #include <linux/unwind.h> ++#include <linux/uaccess.h> + + #ifdef CONFIG_EISA + #include <linux/ioport.h> +@@ -40,7 +41,6 @@ + + #include <asm/processor.h> + #include <asm/system.h> +-#include <asm/uaccess.h> + #include <asm/io.h> + #include <asm/atomic.h> + #include <asm/debugreg.h> +@@ -51,11 +51,14 @@ + #include <asm/smp.h> + #include <asm/arch_hooks.h> + #include <asm/kdebug.h> ++#include <asm/stacktrace.h> + + #include <linux/module.h> + + #include "mach_traps.h" + ++int panic_on_unrecovered_nmi; ++ + asmlinkage int system_call(void); + + struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 }, +@@ -124,62 +127,63 @@ static inline int valid_stack_ptr(struct + p < (void *)tinfo + THREAD_SIZE - 3; + } + +-/* +- * Print one address/symbol entries per line. +- */ +-static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl) +-{ +- printk(" [<%08lx>] ", addr); +- +- print_symbol("%s\n", addr); +-} +- + static inline unsigned long print_context_stack(struct thread_info *tinfo, + unsigned long *stack, unsigned long ebp, +- char *log_lvl) ++ struct stacktrace_ops *ops, void *data) + { + unsigned long addr; + + #ifdef CONFIG_FRAME_POINTER + while (valid_stack_ptr(tinfo, (void *)ebp)) { ++ unsigned long new_ebp; + addr = *(unsigned long *)(ebp + 4); +- print_addr_and_symbol(addr, log_lvl); ++ ops->address(data, addr); + /* + * break out of recursive entries (such as +- * end_of_stack_stop_unwind_function): ++ * end_of_stack_stop_unwind_function). Also, ++ * we can never allow a frame pointer to ++ * move downwards! + */ +- if (ebp == *(unsigned long *)ebp) ++ new_ebp = *(unsigned long *)ebp; ++ if (new_ebp <= ebp) + break; +- ebp = *(unsigned long *)ebp; ++ ebp = new_ebp; + } + #else + while (valid_stack_ptr(tinfo, stack)) { + addr = *stack++; + if (__kernel_text_address(addr)) +- print_addr_and_symbol(addr, log_lvl); ++ ops->address(data, addr); + } + #endif + return ebp; + } + ++struct ops_and_data { ++ struct stacktrace_ops *ops; ++ void *data; ++}; ++ + static asmlinkage int +-show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) ++dump_trace_unwind(struct unwind_frame_info *info, void *data) + { ++ struct ops_and_data *oad = (struct ops_and_data *)data; + int n = 0; + + while (unwind(info) == 0 && UNW_PC(info)) { + n++; +- print_addr_and_symbol(UNW_PC(info), log_lvl); ++ oad->ops->address(oad->data, UNW_PC(info)); + if (arch_unw_user_mode(info)) + break; + } + return n; + } + +-static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, +- unsigned long *stack, char *log_lvl) ++void dump_trace(struct task_struct *task, struct pt_regs *regs, ++ unsigned long *stack, ++ struct stacktrace_ops *ops, void *data) + { +- unsigned long ebp; ++ unsigned long ebp = 0; + + if (!task) + task = current; +@@ -187,54 +191,116 @@ static void show_trace_log_lvl(struct ta + if (call_trace >= 0) { + int unw_ret = 0; + struct unwind_frame_info info; ++ struct ops_and_data oad = { .ops = ops, .data = data }; + + if (regs) { + if (unwind_init_frame_info(&info, task, regs) == 0) +- unw_ret = show_trace_unwind(&info, log_lvl); ++ unw_ret = dump_trace_unwind(&info, &oad); + } else if (task == current) +- unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); ++ unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); + else { + if (unwind_init_blocked(&info, task) == 0) +- unw_ret = show_trace_unwind(&info, log_lvl); ++ unw_ret = dump_trace_unwind(&info, &oad); + } + if (unw_ret > 0) { + if (call_trace == 1 && !arch_unw_user_mode(&info)) { +- print_symbol("DWARF2 unwinder stuck at %s\n", ++ ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", + UNW_PC(&info)); + if (UNW_SP(&info) >= PAGE_OFFSET) { +- printk("Leftover inexact backtrace:\n"); ++ ops->warning(data, "Leftover inexact backtrace:\n"); + stack = (void *)UNW_SP(&info); ++ if (!stack) ++ return; ++ ebp = UNW_FP(&info); + } else +- printk("Full inexact backtrace again:\n"); ++ ops->warning(data, "Full inexact backtrace again:\n"); + } else if (call_trace >= 1) + return; + else +- printk("Full inexact backtrace again:\n"); ++ ops->warning(data, "Full inexact backtrace again:\n"); + } else +- printk("Inexact backtrace:\n"); ++ ops->warning(data, "Inexact backtrace:\n"); + } +- +- if (task == current) { +- /* Grab ebp right from our regs */ +- asm ("movl %%ebp, %0" : "=r" (ebp) : ); +- } else { +- /* ebp is the last reg pushed by switch_to */ +- ebp = *(unsigned long *) task->thread.esp; ++ if (!stack) { ++ unsigned long dummy; ++ stack = &dummy; ++ if (task && task != current) ++ stack = (unsigned long *)task->thread.esp; ++ } ++ ++#ifdef CONFIG_FRAME_POINTER ++ if (!ebp) { ++ if (task == current) { ++ /* Grab ebp right from our regs */ ++ asm ("movl %%ebp, %0" : "=r" (ebp) : ); ++ } else { ++ /* ebp is the last reg pushed by switch_to */ ++ ebp = *(unsigned long *) task->thread.esp; ++ } + } ++#endif + + while (1) { + struct thread_info *context; + context = (struct thread_info *) + ((unsigned long)stack & (~(THREAD_SIZE - 1))); +- ebp = print_context_stack(context, stack, ebp, log_lvl); ++ ebp = print_context_stack(context, stack, ebp, ops, data); ++ /* Should be after the line below, but somewhere ++ in early boot context comes out corrupted and we ++ can't reference it -AK */ ++ if (ops->stack(data, "IRQ") < 0) ++ break; + stack = (unsigned long*)context->previous_esp; + if (!stack) + break; +- printk("%s =======================\n", log_lvl); + } + } ++EXPORT_SYMBOL(dump_trace); ++ ++static void ++print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) ++{ ++ printk(data); ++ print_symbol(msg, symbol); ++ printk("\n"); ++} ++ ++static void print_trace_warning(void *data, char *msg) ++{ ++ printk("%s%s\n", (char *)data, msg); ++} + +-void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) ++static int print_trace_stack(void *data, char *name) ++{ ++ return 0; ++} ++ ++/* ++ * Print one address/symbol entries per line. ++ */ ++static void print_trace_address(void *data, unsigned long addr) ++{ ++ printk("%s [<%08lx>] ", (char *)data, addr); ++ print_symbol("%s\n", addr); ++} ++ ++static struct stacktrace_ops print_trace_ops = { ++ .warning = print_trace_warning, ++ .warning_symbol = print_trace_warning_symbol, ++ .stack = print_trace_stack, ++ .address = print_trace_address, ++}; ++ ++static void ++show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, ++ unsigned long * stack, char *log_lvl) ++{ ++ dump_trace(task, regs, stack, &print_trace_ops, log_lvl); ++ printk("%s =======================\n", log_lvl); ++} ++ ++void show_trace(struct task_struct *task, struct pt_regs *regs, ++ unsigned long * stack) + { + show_trace_log_lvl(task, regs, stack, ""); + } +@@ -297,12 +363,13 @@ void show_registers(struct pt_regs *regs + ss = regs->xss & 0xffff; + } + print_modules(); +- printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n" +- "EFLAGS: %08lx (%s %.*s) \n", ++ printk(KERN_EMERG "CPU: %d\n" ++ KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" ++ KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", + smp_processor_id(), 0xffff & regs->xcs, regs->eip, +- print_tainted(), regs->eflags, system_utsname.release, +- (int)strcspn(system_utsname.version, " "), +- system_utsname.version); ++ print_tainted(), regs->eflags, init_utsname()->release, ++ (int)strcspn(init_utsname()->version, " "), ++ init_utsname()->version); + print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); + printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", + regs->eax, regs->ebx, regs->ecx, regs->edx); +@@ -319,6 +386,8 @@ void show_registers(struct pt_regs *regs + */ + if (in_kernel) { + u8 __user *eip; ++ int code_bytes = 64; ++ unsigned char c; + + printk("\n" KERN_EMERG "Stack: "); + show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); +@@ -326,9 +395,12 @@ void show_registers(struct pt_regs *regs + printk(KERN_EMERG "Code: "); + + eip = (u8 __user *)regs->eip - 43; +- for (i = 0; i < 64; i++, eip++) { +- unsigned char c; +- ++ if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { ++ /* try starting at EIP */ ++ eip = (u8 __user *)regs->eip; ++ code_bytes = 32; ++ } ++ for (i = 0; i < code_bytes; i++, eip++) { + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { + printk(" Bad EIP value."); + break; +@@ -349,7 +421,7 @@ static void handle_BUG(struct pt_regs *r + + if (eip < PAGE_OFFSET) + return; +- if (__get_user(ud2, (unsigned short __user *)eip)) ++ if (probe_kernel_address((unsigned short __user *)eip, ud2)) + return; + if (ud2 != 0x0b0f) + return; +@@ -362,7 +434,8 @@ static void handle_BUG(struct pt_regs *r + char *file; + char c; + +- if (__get_user(line, (unsigned short __user *)(eip + 2))) ++ if (probe_kernel_address((unsigned short __user *)(eip + 2), ++ line)) + break; + if (__get_user(file, (char * __user *)(eip + 4)) || + (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) +@@ -604,18 +677,24 @@ gp_in_kernel: + } + } + +-static void mem_parity_error(unsigned char reason, struct pt_regs * regs) ++static __kprobes void ++mem_parity_error(unsigned char reason, struct pt_regs * regs) + { +- printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying " +- "to continue\n"); ++ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " ++ "CPU %d.\n", reason, smp_processor_id()); + printk(KERN_EMERG "You probably have a hardware problem with your RAM " + "chips\n"); ++ if (panic_on_unrecovered_nmi) ++ panic("NMI: Not continuing"); ++ ++ printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + + /* Clear and disable the memory parity error line. */ + clear_mem_error(reason); + } + +-static void io_check_error(unsigned char reason, struct pt_regs * regs) ++static __kprobes void ++io_check_error(unsigned char reason, struct pt_regs * regs) + { + printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + show_registers(regs); +@@ -624,7 +703,8 @@ static void io_check_error(unsigned char + clear_io_check_error(reason); + } + +-static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) ++static __kprobes void ++unknown_nmi_error(unsigned char reason, struct pt_regs * regs) + { + #ifdef CONFIG_MCA + /* Might actually be able to figure out what the guilty party +@@ -634,15 +714,18 @@ static void unknown_nmi_error(unsigned c + return; + } + #endif +- printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", +- reason, smp_processor_id()); +- printk("Dazed and confused, but trying to continue\n"); +- printk("Do you have a strange power saving mode enabled?\n"); ++ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " ++ "CPU %d.\n", reason, smp_processor_id()); ++ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); ++ if (panic_on_unrecovered_nmi) ++ panic("NMI: Not continuing"); ++ ++ printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + } + + static DEFINE_SPINLOCK(nmi_print_lock); + +-void die_nmi (struct pt_regs *regs, const char *msg) ++void __kprobes die_nmi(struct pt_regs *regs, const char *msg) + { + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == + NOTIFY_STOP) +@@ -674,7 +757,7 @@ void die_nmi (struct pt_regs *regs, cons + do_exit(SIGSEGV); + } + +-static void default_do_nmi(struct pt_regs * regs) ++static __kprobes void default_do_nmi(struct pt_regs * regs) + { + unsigned char reason = 0; + +@@ -691,12 +774,12 @@ static void default_do_nmi(struct pt_reg + * Ok, so this is none of the documented NMI sources, + * so it must be the NMI watchdog. + */ +- if (nmi_watchdog) { +- nmi_watchdog_tick(regs); ++ if (nmi_watchdog_tick(regs, reason)) + return; +- } ++ if (!do_nmi_callback(regs, smp_processor_id())) + #endif +- unknown_nmi_error(reason, regs); ++ unknown_nmi_error(reason, regs); ++ + return; + } + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) +@@ -712,14 +795,7 @@ static void default_do_nmi(struct pt_reg + reassert_nmi(); + } + +-static int dummy_nmi_callback(struct pt_regs * regs, int cpu) +-{ +- return 0; +-} +- +-static nmi_callback_t nmi_callback = dummy_nmi_callback; +- +-fastcall void do_nmi(struct pt_regs * regs, long error_code) ++fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) + { + int cpu; + +@@ -729,25 +805,11 @@ fastcall void do_nmi(struct pt_regs * re + + ++nmi_count(cpu); + +- if (!rcu_dereference(nmi_callback)(regs, cpu)) +- default_do_nmi(regs); ++ default_do_nmi(regs); + + nmi_exit(); + } + +-void set_nmi_callback(nmi_callback_t callback) +-{ +- vmalloc_sync_all(); +- rcu_assign_pointer(nmi_callback, callback); +-} +-EXPORT_SYMBOL_GPL(set_nmi_callback); +- +-void unset_nmi_callback(void) +-{ +- nmi_callback = dummy_nmi_callback; +-} +-EXPORT_SYMBOL_GPL(unset_nmi_callback); +- + #ifdef CONFIG_KPROBES + fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) + { +Index: 10.3-2007-11-26/arch/i386/mach-xen/setup.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/mach-xen/setup.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/mach-xen/setup.c 2007-10-22 13:53:08.000000000 +0200 +@@ -133,8 +133,10 @@ void __init machine_specific_arch_setup( + } + #endif + +- if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) +- set_fixaddr_top(pp.virt_start); ++ if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) { ++ hypervisor_virt_start = pp.virt_start; ++ reserve_top_address(0UL - pp.virt_start); ++ } + + machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START; + machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; +Index: 10.3-2007-11-26/arch/i386/mm/fault-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/mm/fault-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/mm/fault-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -27,21 +27,24 @@ + #include <asm/uaccess.h> + #include <asm/desc.h> + #include <asm/kdebug.h> ++#include <asm/segment.h> + + extern void die(const char *,struct pt_regs *,long); + +-#ifdef CONFIG_KPROBES +-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); ++static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); ++ + int register_page_fault_notifier(struct notifier_block *nb) + { + vmalloc_sync_all(); + return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); + } ++EXPORT_SYMBOL_GPL(register_page_fault_notifier); + + int unregister_page_fault_notifier(struct notifier_block *nb) + { + return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); + } ++EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + + static inline int notify_page_fault(enum die_val val, const char *str, + struct pt_regs *regs, long err, int trap, int sig) +@@ -55,14 +58,6 @@ static inline int notify_page_fault(enum + }; + return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); + } +-#else +-static inline int notify_page_fault(enum die_val val, const char *str, +- struct pt_regs *regs, long err, int trap, int sig) +-{ +- return NOTIFY_DONE; +-} +-#endif +- + + /* + * Unlock any spinlocks which will prevent us from getting the +@@ -119,10 +114,10 @@ static inline unsigned long get_segment_ + } + + /* The standard kernel/user address space limit. */ +- *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg; ++ *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; + + /* By far the most common cases. */ +- if (likely(seg == __USER_CS || seg == GET_KERNEL_CS())) ++ if (likely(SEGMENT_IS_FLAT_CODE(seg))) + return eip; + + /* Check the segment exists, is within the current LDT/GDT size, +@@ -559,11 +554,7 @@ good_area: + write = 0; + switch (error_code & 3) { + default: /* 3: write, present */ +-#ifdef TEST_VERIFY_AREA +- if (regs->cs == GET_KERNEL_CS()) +- printk("WP fault at %08lx\n", regs->eip); +-#endif +- /* fall through */ ++ /* fall through */ + case 2: /* write, not present */ + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; +@@ -572,7 +563,7 @@ good_area: + case 1: /* read, present */ + goto bad_area; + case 0: /* read, not present */ +- if (!(vma->vm_flags & (VM_READ | VM_EXEC))) ++ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) + goto bad_area; + } + +@@ -704,7 +695,7 @@ no_context: + */ + out_of_memory: + up_read(&mm->mmap_sem); +- if (tsk->pid == 1) { ++ if (is_init(tsk)) { + yield(); + down_read(&mm->mmap_sem); + goto survive; +Index: 10.3-2007-11-26/arch/i386/mm/highmem-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/mm/highmem-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/mm/highmem-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -38,11 +38,9 @@ static void *__kmap_atomic(struct page * + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); +-#ifdef CONFIG_DEBUG_HIGHMEM + if (!pte_none(*(kmap_pte-idx))) + BUG(); +-#endif +- set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); ++ set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot)); + + return (void*) vaddr; + } +@@ -62,36 +60,26 @@ void *kmap_atomic_pte(struct page *page, + + void kunmap_atomic(void *kvaddr, enum km_type type) + { +-#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN) + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + +- if (vaddr < FIXADDR_START) { // FIXME ++#ifdef CONFIG_DEBUG_HIGHMEM ++ if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { + dec_preempt_count(); + preempt_check_resched(); + return; + } +-#endif + +-#if defined(CONFIG_DEBUG_HIGHMEM) + if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx)) + BUG(); +- +- /* +- * force other mappings to Oops if they'll try to access +- * this pte without first remap it +- */ +- pte_clear(&init_mm, vaddr, kmap_pte-idx); +- __flush_tlb_one(vaddr); +-#elif defined(CONFIG_XEN) ++#endif + /* +- * We must ensure there are no dangling pagetable references when +- * returning memory to Xen (decrease_reservation). +- * XXX TODO: We could make this faster by only zapping when +- * kmap_flush_unused is called but that is trickier and more invasive. ++ * Force other mappings to Oops if they'll try to access this pte ++ * without first remap it. Keeping stale mappings around is a bad idea ++ * also, in case the page changes cacheability attributes or becomes ++ * a protected page in a hypervisor. + */ +- pte_clear(&init_mm, vaddr, kmap_pte-idx); +-#endif ++ kpte_clear_flush(kmap_pte-idx, vaddr); + + dec_preempt_count(); + preempt_check_resched(); +@@ -110,7 +98,6 @@ void *kmap_atomic_pfn(unsigned long pfn, + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot)); +- __flush_tlb_one(vaddr); + + return (void*) vaddr; + } +Index: 10.3-2007-11-26/arch/i386/mm/init-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/mm/init-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/mm/init-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -467,16 +467,22 @@ EXPORT_SYMBOL(__supported_pte_mask); + * on Enable + * off Disable + */ +-void __init noexec_setup(const char *str) ++static int __init noexec_setup(char *str) + { +- if (!strncmp(str, "on",2) && cpu_has_nx) { +- __supported_pte_mask |= _PAGE_NX; +- disable_nx = 0; +- } else if (!strncmp(str,"off",3)) { ++ if (!str || !strcmp(str, "on")) { ++ if (cpu_has_nx) { ++ __supported_pte_mask |= _PAGE_NX; ++ disable_nx = 0; ++ } ++ } else if (!strcmp(str,"off")) { + disable_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; +- } ++ } else ++ return -EINVAL; ++ ++ return 0; + } ++early_param("noexec", noexec_setup); + + int nx_enabled = 0; + #ifdef CONFIG_X86_PAE +@@ -519,6 +525,7 @@ int __init set_kernel_exec(unsigned long + pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); + else + pte->pte_high |= 1 << (_PAGE_BIT_NX - 32); ++ pte_update_defer(&init_mm, vaddr, pte); + __flush_tlb_all(); + out: + return ret; +@@ -601,18 +608,6 @@ static void __init test_wp_bit(void) + } + } + +-static void __init set_max_mapnr_init(void) +-{ +-#ifdef CONFIG_HIGHMEM +- num_physpages = highend_pfn; +-#else +- num_physpages = max_low_pfn; +-#endif +-#ifdef CONFIG_FLATMEM +- max_mapnr = num_physpages; +-#endif +-} +- + static struct kcore_list kcore_mem, kcore_vmalloc; + + void __init mem_init(void) +@@ -633,8 +628,7 @@ void __init mem_init(void) + #endif + + #ifdef CONFIG_FLATMEM +- if (!mem_map) +- BUG(); ++ BUG_ON(!mem_map); + #endif + + bad_ppro = ppro_with_ram_bug(); +@@ -649,13 +643,6 @@ void __init mem_init(void) + } + #endif + +- set_max_mapnr_init(); +- +-#ifdef CONFIG_HIGHMEM +- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +-#else +- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; +-#endif + printk("vmalloc area: %lx-%lx, maxmem %lx\n", + VMALLOC_START,VMALLOC_END,MAXMEM); + BUG_ON(VMALLOC_START > VMALLOC_END); +@@ -697,6 +684,48 @@ void __init mem_init(void) + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) + ); + ++#if 1 /* double-sanity-check paranoia */ ++ printk("virtual kernel memory layout:\n" ++ " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" ++#ifdef CONFIG_HIGHMEM ++ " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" ++#endif ++ " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" ++ " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" ++ " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" ++ " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" ++ " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", ++ FIXADDR_START, FIXADDR_TOP, ++ (FIXADDR_TOP - FIXADDR_START) >> 10, ++ ++#ifdef CONFIG_HIGHMEM ++ PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, ++ (LAST_PKMAP*PAGE_SIZE) >> 10, ++#endif ++ ++ VMALLOC_START, VMALLOC_END, ++ (VMALLOC_END - VMALLOC_START) >> 20, ++ ++ (unsigned long)__va(0), (unsigned long)high_memory, ++ ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, ++ ++ (unsigned long)&__init_begin, (unsigned long)&__init_end, ++ ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, ++ ++ (unsigned long)&_etext, (unsigned long)&_edata, ++ ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, ++ ++ (unsigned long)&_text, (unsigned long)&_etext, ++ ((unsigned long)&_etext - (unsigned long)&_text) >> 10); ++ ++#ifdef CONFIG_HIGHMEM ++ BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); ++ BUG_ON(VMALLOC_END > PKMAP_BASE); ++#endif ++ BUG_ON(VMALLOC_START > VMALLOC_END); ++ BUG_ON((unsigned long)high_memory > VMALLOC_START); ++#endif /* double-sanity-check paranoia */ ++ + #ifdef CONFIG_X86_PAE + if (!cpu_has_pae) + panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); +@@ -727,7 +756,7 @@ void __init mem_init(void) + int arch_add_memory(int nid, u64 start, u64 size) + { + struct pglist_data *pgdata = &contig_page_data; +- struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; ++ struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + +Index: 10.3-2007-11-26/arch/i386/mm/ioremap-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/mm/ioremap-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/mm/ioremap-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -12,7 +12,7 @@ + #include <linux/init.h> + #include <linux/slab.h> + #include <linux/module.h> +-#include <asm/io.h> ++#include <linux/io.h> + #include <asm/fixmap.h> + #include <asm/cacheflush.h> + #include <asm/tlbflush.h> +@@ -118,7 +118,7 @@ int direct_remap_pfn_range(struct vm_are + if (domid == DOMID_SELF) + return -EINVAL; + +- vma->vm_flags |= VM_IO | VM_RESERVED; ++ vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + + vma->vm_mm->context.has_foreign_mappings = 1; + +@@ -203,6 +203,7 @@ void __iomem * __ioremap(unsigned long p + void __iomem * addr; + struct vm_struct * area; + unsigned long offset, last_addr; ++ pgprot_t prot; + domid_t domid = DOMID_IO; + + /* Don't allow wraparound or zero size */ +@@ -234,6 +235,8 @@ void __iomem * __ioremap(unsigned long p + domid = DOMID_SELF; + } + ++ prot = __pgprot(_KERNPG_TABLE | flags); ++ + /* + * Mappings have to be page-aligned + */ +@@ -249,10 +252,9 @@ void __iomem * __ioremap(unsigned long p + return NULL; + area->phys_addr = phys_addr; + addr = (void __iomem *) area->addr; +- flags |= _KERNPG_TABLE; + if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr, + phys_addr>>PAGE_SHIFT, +- size, __pgprot(flags), domid)) { ++ size, prot, domid)) { + vunmap((void __force *) addr); + return NULL; + } +Index: 10.3-2007-11-26/arch/i386/mm/pgtable-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/mm/pgtable-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/mm/pgtable-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -68,7 +68,9 @@ void show_mem(void) + printk(KERN_INFO "%lu pages writeback\n", + global_page_state(NR_WRITEBACK)); + printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); +- printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB)); ++ printk(KERN_INFO "%lu pages slab\n", ++ global_page_state(NR_SLAB_RECLAIMABLE) + ++ global_page_state(NR_SLAB_UNRECLAIMABLE)); + printk(KERN_INFO "%lu pages pagetables\n", + global_page_state(NR_PAGETABLE)); + } +@@ -189,18 +191,11 @@ void set_pmd_pfn(unsigned long vaddr, un + __flush_tlb_one(vaddr); + } + +-static int nr_fixmaps = 0; ++static int fixmaps = 0; + unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START; +-unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE); ++unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE); + EXPORT_SYMBOL(__FIXADDR_TOP); + +-void __init set_fixaddr_top(unsigned long top) +-{ +- BUG_ON(nr_fixmaps > 0); +- hypervisor_virt_start = top; +- __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE; +-} +- + void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags) + { + unsigned long address = __fix_to_virt(idx); +@@ -221,7 +216,21 @@ void __set_fixmap (enum fixed_addresses + set_pte_pfn_ma(address, phys >> PAGE_SHIFT, flags); + break; + } +- nr_fixmaps++; ++ fixmaps++; ++} ++ ++/** ++ * reserve_top_address - reserves a hole in the top of kernel address space ++ * @reserve - size of hole to reserve ++ * ++ * Can be used to relocate the fixmap area and poke a hole in the top ++ * of kernel address space to make room for a hypervisor. ++ */ ++void __init reserve_top_address(unsigned long reserve) ++{ ++ BUG_ON(fixmaps > 0); ++ __FIXADDR_TOP = -reserve - PAGE_SIZE; ++ __VMALLOC_RESERVE += reserve; + } + + pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +Index: 10.3-2007-11-26/arch/i386/pci/irq-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/i386/pci/irq-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/i386/pci/irq-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -985,10 +985,6 @@ static void __init pcibios_fixup_irqs(vo + pci_name(bridge), 'A' + pin, irq); + } + if (irq >= 0) { +- if (use_pci_vector() && +- !platform_legacy_irq(irq)) +- irq = IO_APIC_VECTOR(irq); +- + printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", + pci_name(dev), 'A' + pin, irq); + dev->irq = irq; +@@ -1149,10 +1145,6 @@ static int pirq_enable_irq(struct pci_de + } + dev = temp_dev; + if (irq >= 0) { +-#ifdef CONFIG_PCI_MSI +- if (!platform_legacy_irq(irq)) +- irq = IO_APIC_VECTOR(irq); +-#endif + printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", + pci_name(dev), 'A' + pin, irq); + dev->irq = irq; +@@ -1173,33 +1165,3 @@ static int pirq_enable_irq(struct pci_de + } + return 0; + } +- +-int pci_vector_resources(int last, int nr_released) +-{ +- int count = nr_released; +- +- int next = last; +- int offset = (last % 8); +- +- while (next < FIRST_SYSTEM_VECTOR) { +- next += 8; +-#ifdef CONFIG_X86_64 +- if (next == IA32_SYSCALL_VECTOR) +- continue; +-#else +- if (next == SYSCALL_VECTOR) +- continue; +-#endif +- count++; +- if (next >= FIRST_SYSTEM_VECTOR) { +- if (offset%8) { +- next = FIRST_DEVICE_VECTOR + offset; +- offset++; +- continue; +- } +- count--; +- } +- } +- +- return count; +-} +Index: 10.3-2007-11-26/arch/x86_64/ia32/ia32entry-xen.S +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/ia32/ia32entry-xen.S 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/ia32/ia32entry-xen.S 2007-10-22 13:53:08.000000000 +0200 +@@ -88,6 +88,7 @@ + */ + ENTRY(ia32_sysenter_target) + CFI_STARTPROC32 simple ++ CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,0 + CFI_REGISTER rsp,rbp + __swapgs +@@ -206,6 +207,7 @@ ENDPROC(ia32_sysenter_target) + */ + ENTRY(ia32_cstar_target) + CFI_STARTPROC32 simple ++ CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,PDA_STACKOFFSET + CFI_REGISTER rip,rcx + /*CFI_REGISTER rflags,r11*/ +@@ -315,6 +317,7 @@ ia32_badarg: + + ENTRY(ia32_syscall) + CFI_STARTPROC simple ++ CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,SS+8-RIP + /*CFI_REL_OFFSET ss,SS-RIP*/ + CFI_REL_OFFSET rsp,RSP-RIP +@@ -397,6 +400,7 @@ ENTRY(ia32_ptregs_common) + popq %r11 + CFI_ENDPROC + CFI_STARTPROC32 simple ++ CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,SS+8-ARGOFFSET + CFI_REL_OFFSET rax,RAX-ARGOFFSET + CFI_REL_OFFSET rcx,RCX-ARGOFFSET +@@ -730,8 +734,8 @@ ia32_sys_call_table: + .quad sys_readlinkat /* 305 */ + .quad sys_fchmodat + .quad sys_faccessat +- .quad quiet_ni_syscall /* pselect6 for now */ +- .quad quiet_ni_syscall /* ppoll for now */ ++ .quad compat_sys_pselect6 ++ .quad compat_sys_ppoll + .quad sys_unshare /* 310 */ + .quad compat_sys_set_robust_list + .quad compat_sys_get_robust_list +@@ -740,4 +744,5 @@ ia32_sys_call_table: + .quad sys_tee + .quad compat_sys_vmsplice + .quad compat_sys_move_pages ++ .quad sys_getcpu + ia32_syscall_end: +Index: 10.3-2007-11-26/arch/x86_64/kernel/Makefile +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/Makefile 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/Makefile 2007-10-22 13:53:08.000000000 +0200 +@@ -4,7 +4,7 @@ + + extra-y := head.o head64.o init_task.o vmlinux.lds + EXTRA_AFLAGS := -traditional +-obj-y := process.o signal.o entry.o traps.o irq.o \ ++obj-y := process.o signal.o entry.o traps.o irq.o nmi.o \ + ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ + x8664_ksyms.o i387.o syscall.o vsyscall.o \ + setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ +@@ -21,9 +21,9 @@ obj-$(CONFIG_X86_MSR) += msr.o + obj-$(CONFIG_MICROCODE) += microcode.o + obj-$(CONFIG_X86_CPUID) += cpuid.o + obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o +-obj-y += apic.o nmi.o ++obj-$(CONFIG_X86_LOCAL_APIC) += apic.o + obj-$(CONFIG_X86_XEN_GENAPIC) += genapic.o genapic_xen.o +-obj-y += io_apic.o mpparse.o genapic.o genapic_flat.o ++obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o genapic.o genapic_flat.o + obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o + obj-$(CONFIG_CRASH_DUMP) += crash_dump.o + obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o +@@ -68,7 +68,7 @@ pci-dma-y += ../../i386/kernel/pci-dma + microcode-$(subst m,y,$(CONFIG_MICROCODE)) := ../../i386/kernel/microcode-xen.o + quirks-y := ../../i386/kernel/quirks-xen.o + +-n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o ++n-obj-xen := early-quirks.o i8259.o reboot.o i8237.o smpboot.o trampoline.o + + include $(srctree)/scripts/Makefile.xen + +Index: 10.3-2007-11-26/arch/x86_64/kernel/acpi/Makefile +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/acpi/Makefile 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/acpi/Makefile 2007-10-22 13:53:08.000000000 +0200 +@@ -8,3 +8,4 @@ processor-y := ../../../i386/kernel/acp + endif + + boot-$(CONFIG_XEN) := ../../../i386/kernel/acpi/boot-xen.o ++processor-$(CONFIG_XEN) := ../../../i386/kernel/acpi/processor.o +Index: 10.3-2007-11-26/arch/x86_64/kernel/apic-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/apic-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/apic-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -43,7 +43,7 @@ int apic_verbosity; + */ + void ack_bad_irq(unsigned int irq) + { +- printk("unexpected IRQ trap at vector %02x\n", irq); ++ printk("unexpected IRQ trap at irq %02x\n", irq); + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N +@@ -62,19 +62,19 @@ int setup_profiling_timer(unsigned int m + return -EINVAL; + } + +-void smp_local_timer_interrupt(struct pt_regs *regs) ++void smp_local_timer_interrupt(void) + { +- profile_tick(CPU_PROFILING, regs); ++ profile_tick(CPU_PROFILING); + #ifndef CONFIG_XEN + #ifdef CONFIG_SMP +- update_process_times(user_mode(regs)); ++ update_process_times(user_mode(get_irq_regs())); + #endif + #endif + /* + * We take the 'long' return path, and there every subsystem + * grabs the appropriate locks (kernel lock/ irq lock). + * +- * we might want to decouple profiling from the 'long path', ++ * We might want to decouple profiling from the 'long path', + * and do the profiling totally in assembly. + * + * Currently this isn't too much of an issue (performance wise), +@@ -92,6 +92,8 @@ void smp_local_timer_interrupt(struct pt + */ + void smp_apic_timer_interrupt(struct pt_regs *regs) + { ++ struct pt_regs *old_regs = set_irq_regs(regs); ++ + /* + * the NMI deadlock-detector uses this. + */ +@@ -109,8 +111,9 @@ void smp_apic_timer_interrupt(struct pt_ + */ + exit_idle(); + irq_enter(); +- smp_local_timer_interrupt(regs); ++ smp_local_timer_interrupt(); + irq_exit(); ++ set_irq_regs(old_regs); + } + + /* +@@ -188,9 +191,8 @@ int disable_apic; + int __init APIC_init_uniprocessor (void) + { + #ifdef CONFIG_X86_IO_APIC +- if (smp_found_config) +- if (!skip_ioapic_setup && nr_ioapics) +- setup_IO_APIC(); ++ if (smp_found_config && !skip_ioapic_setup && nr_ioapics) ++ setup_IO_APIC(); + #endif + + return 1; +Index: 10.3-2007-11-26/arch/x86_64/kernel/e820-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/e820-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/e820-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -16,6 +16,7 @@ + #include <linux/string.h> + #include <linux/kexec.h> + #include <linux/module.h> ++#include <linux/mm.h> + + #include <asm/pgtable.h> + #include <asm/page.h> +@@ -25,6 +26,11 @@ + #include <asm/sections.h> + #include <xen/interface/memory.h> + ++struct e820map e820 __initdata; ++#ifdef CONFIG_XEN ++struct e820map machine_e820 __initdata; ++#endif ++ + /* + * PFN of last memory page. + */ +@@ -41,7 +47,7 @@ unsigned long end_pfn_map; + /* + * Last pfn which the user wants to use. + */ +-unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT; ++static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; + + extern struct resource code_resource, data_resource; + +@@ -53,13 +59,13 @@ static inline int bad_addr(unsigned long + #ifndef CONFIG_XEN + /* various gunk below that needed for SMP startup */ + if (addr < 0x8000) { +- *addrp = 0x8000; ++ *addrp = PAGE_ALIGN(0x8000); + return 1; + } + + /* direct mapping tables of the kernel */ + if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) { +- *addrp = table_end << PAGE_SHIFT; ++ *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT); + return 1; + } + +@@ -67,23 +73,18 @@ static inline int bad_addr(unsigned long + #ifdef CONFIG_BLK_DEV_INITRD + if (LOADER_TYPE && INITRD_START && last >= INITRD_START && + addr < INITRD_START+INITRD_SIZE) { +- *addrp = INITRD_START + INITRD_SIZE; ++ *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE); + return 1; + } + #endif +- /* kernel code + 640k memory hole (later should not be needed, but +- be paranoid for now) */ +- if (last >= 640*1024 && addr < 1024*1024) { +- *addrp = 1024*1024; +- return 1; +- } +- if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) { +- *addrp = __pa_symbol(&_end); ++ /* kernel code */ ++ if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) { ++ *addrp = PAGE_ALIGN(__pa_symbol(&_end)); + return 1; + } + + if (last >= ebda_addr && addr < ebda_addr + ebda_size) { +- *addrp = ebda_addr + ebda_size; ++ *addrp = PAGE_ALIGN(ebda_addr + ebda_size); + return 1; + } + +@@ -132,8 +133,6 @@ int __init e820_all_mapped(unsigned long + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + #else +- extern struct e820map machine_e820; +- + if (!is_initial_xendomain()) + return 0; + for (i = 0; i < machine_e820.nr_map; i++) { +@@ -175,7 +174,7 @@ unsigned long __init find_e820_area(unsi + continue; + while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size) + ; +- last = addr + size; ++ last = PAGE_ALIGN(addr) + size; + if (last > ei->addr + ei->size) + continue; + if (last > end) +@@ -185,59 +184,14 @@ unsigned long __init find_e820_area(unsi + return -1UL; + } + +-/* +- * Free bootmem based on the e820 table for a node. +- */ +-void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end) +-{ +- int i; +- for (i = 0; i < e820.nr_map; i++) { +- struct e820entry *ei = &e820.map[i]; +- unsigned long last, addr; +- +- if (ei->type != E820_RAM || +- ei->addr+ei->size <= start || +- ei->addr >= end) +- continue; +- +- addr = round_up(ei->addr, PAGE_SIZE); +- if (addr < start) +- addr = start; +- +- last = round_down(ei->addr + ei->size, PAGE_SIZE); +- if (last >= end) +- last = end; +- +- if (last > addr && last-addr >= PAGE_SIZE) +- free_bootmem_node(pgdat, addr, last-addr); +- } +-} +- + /* + * Find the highest page frame number we have available + */ + unsigned long __init e820_end_of_ram(void) + { +- int i; + unsigned long end_pfn = 0; ++ end_pfn = find_max_pfn_with_active_regions(); + +- for (i = 0; i < e820.nr_map; i++) { +- struct e820entry *ei = &e820.map[i]; +- unsigned long start, end; +- +- start = round_up(ei->addr, PAGE_SIZE); +- end = round_down(ei->addr + ei->size, PAGE_SIZE); +- if (start >= end) +- continue; +- if (ei->type == E820_RAM) { +- if (end > end_pfn<<PAGE_SHIFT) +- end_pfn = end>>PAGE_SHIFT; +- } else { +- if (end > end_pfn_map<<PAGE_SHIFT) +- end_pfn_map = end>>PAGE_SHIFT; +- } +- } +- + if (end_pfn > end_pfn_map) + end_pfn_map = end_pfn; + if (end_pfn_map > MAXMEM>>PAGE_SHIFT) +@@ -247,43 +201,10 @@ unsigned long __init e820_end_of_ram(voi + if (end_pfn > end_pfn_map) + end_pfn = end_pfn_map; + ++ printk("end_pfn_map = %lu\n", end_pfn_map); + return end_pfn; + } + +-/* +- * Compute how much memory is missing in a range. +- * Unlike the other functions in this file the arguments are in page numbers. +- */ +-unsigned long __init +-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn) +-{ +- unsigned long ram = 0; +- unsigned long start = start_pfn << PAGE_SHIFT; +- unsigned long end = end_pfn << PAGE_SHIFT; +- int i; +- for (i = 0; i < e820.nr_map; i++) { +- struct e820entry *ei = &e820.map[i]; +- unsigned long last, addr; +- +- if (ei->type != E820_RAM || +- ei->addr+ei->size <= start || +- ei->addr >= end) +- continue; +- +- addr = round_up(ei->addr, PAGE_SIZE); +- if (addr < start) +- addr = start; +- +- last = round_down(ei->addr + ei->size, PAGE_SIZE); +- if (last >= end) +- last = end; +- +- if (last > addr) +- ram += last - addr; +- } +- return ((end - start) - ram) >> PAGE_SHIFT; +-} +- + /* + * Mark e820 reserved areas as busy for the resource manager. + */ +@@ -324,6 +245,98 @@ void __init e820_reserve_resources(struc + } + } + ++#ifndef CONFIG_XEN ++/* Mark pages corresponding to given address range as nosave */ ++static void __init ++e820_mark_nosave_range(unsigned long start, unsigned long end) ++{ ++ unsigned long pfn, max_pfn; ++ ++ if (start >= end) ++ return; ++ ++ printk("Nosave address range: %016lx - %016lx\n", start, end); ++ max_pfn = end >> PAGE_SHIFT; ++ for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++) ++ if (pfn_valid(pfn)) ++ SetPageNosave(pfn_to_page(pfn)); ++} ++ ++/* ++ * Find the ranges of physical addresses that do not correspond to ++ * e820 RAM areas and mark the corresponding pages as nosave for software ++ * suspend and suspend to RAM. ++ * ++ * This function requires the e820 map to be sorted and without any ++ * overlapping entries and assumes the first e820 area to be RAM. ++ */ ++void __init e820_mark_nosave_regions(void) ++{ ++ int i; ++ unsigned long paddr; ++ ++ paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE); ++ for (i = 1; i < e820.nr_map; i++) { ++ struct e820entry *ei = &e820.map[i]; ++ ++ if (paddr < ei->addr) ++ e820_mark_nosave_range(paddr, ++ round_up(ei->addr, PAGE_SIZE)); ++ ++ paddr = round_down(ei->addr + ei->size, PAGE_SIZE); ++ if (ei->type != E820_RAM) ++ e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE), ++ paddr); ++ ++ if (paddr >= (end_pfn << PAGE_SHIFT)) ++ break; ++ } ++} ++#endif ++ ++/* Walk the e820 map and register active regions within a node */ ++void __init ++e820_register_active_regions(int nid, unsigned long start_pfn, ++ unsigned long end_pfn) ++{ ++ int i; ++ unsigned long ei_startpfn, ei_endpfn; ++ for (i = 0; i < e820.nr_map; i++) { ++ struct e820entry *ei = &e820.map[i]; ++ ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT; ++ ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) ++ >> PAGE_SHIFT; ++ ++ /* Skip map entries smaller than a page */ ++ if (ei_startpfn >= ei_endpfn) ++ continue; ++ ++ /* Check if end_pfn_map should be updated */ ++ if (ei->type != E820_RAM && ei_endpfn > end_pfn_map) ++ end_pfn_map = ei_endpfn; ++ ++ /* Skip if map is outside the node */ ++ if (ei->type != E820_RAM || ++ ei_endpfn <= start_pfn || ++ ei_startpfn >= end_pfn) ++ continue; ++ ++ /* Check for overlaps */ ++ if (ei_startpfn < start_pfn) ++ ei_startpfn = start_pfn; ++ if (ei_endpfn > end_pfn) ++ ei_endpfn = end_pfn; ++ ++ /* Obey end_user_pfn to save on memmap */ ++ if (ei_startpfn >= end_user_pfn) ++ continue; ++ if (ei_endpfn > end_user_pfn) ++ ei_endpfn = end_user_pfn; ++ ++ add_active_range(nid, ei_startpfn, ei_endpfn); ++ } ++} ++ + /* + * Add a memory region to the kernel e820 map. + */ +@@ -544,13 +557,6 @@ static int __init sanitize_e820_map(stru + * If we're lucky and live on a modern system, the setup code + * will have given us a memory map that we can use to properly + * set up memory. If we aren't, we'll fake a memory map. +- * +- * We check to see that the memory map contains at least 2 elements +- * before we'll use it, because the detection code in setup.S may +- * not be perfect and most every PC known to man has two memory +- * regions: one from 0 to 640k, and one from 1mb up. (The IBM +- * thinkpad 560x, for example, does not cooperate with the memory +- * detection code.) + */ + static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) + { +@@ -572,37 +578,20 @@ static int __init copy_e820_map(struct e + if (start > end) + return -1; + +-#ifndef CONFIG_XEN +- /* +- * Some BIOSes claim RAM in the 640k - 1M region. +- * Not right. Fix it up. +- * +- * This should be removed on Hammer which is supposed to not +- * have non e820 covered ISA mappings there, but I had some strange +- * problems so it stays for now. -AK +- */ +- if (type == E820_RAM) { +- if (start < 0x100000ULL && end > 0xA0000ULL) { +- if (start < 0xA0000ULL) +- add_memory_region(start, 0xA0000ULL-start, type); +- if (end <= 0x100000ULL) +- continue; +- start = 0x100000ULL; +- size = end - start; +- } +- } +-#endif +- + add_memory_region(start, size, type); + } while (biosmap++,--nr_map); + return 0; + } + ++void early_panic(char *msg) ++{ ++ early_printk(msg); ++ panic(msg); ++} ++ + #ifndef CONFIG_XEN + void __init setup_memory_region(void) + { +- char *who = "BIOS-e820"; +- + /* + * Try to copy the BIOS-supplied E820-map. + * +@@ -610,24 +599,10 @@ void __init setup_memory_region(void) + * the next section from 1mb->appropriate_mem_k + */ + sanitize_e820_map(E820_MAP, &E820_MAP_NR); +- if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { +- unsigned long mem_size; +- +- /* compare results from other methods and take the greater */ +- if (ALT_MEM_K < EXT_MEM_K) { +- mem_size = EXT_MEM_K; +- who = "BIOS-88"; +- } else { +- mem_size = ALT_MEM_K; +- who = "BIOS-e801"; +- } +- +- e820.nr_map = 0; +- add_memory_region(0, LOWMEMSIZE(), E820_RAM); +- add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); +- } ++ if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) ++ early_panic("Cannot find a valid memory map"); + printk(KERN_INFO "BIOS-provided physical RAM map:\n"); +- e820_print_map(who); ++ e820_print_map("BIOS-e820"); + } + + #else /* CONFIG_XEN */ +@@ -659,20 +634,23 @@ void __init setup_memory_region(void) + + sanitize_e820_map(map, (char *)&memmap.nr_entries); + +- BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0); ++ if (copy_e820_map(map, (char)memmap.nr_entries) < 0) ++ early_panic("Cannot find a valid memory map"); + + printk(KERN_INFO "BIOS-provided physical RAM map:\n"); + e820_print_map("Xen"); + } + #endif + +-void __init parse_memopt(char *p, char **from) +-{ ++static int __init parse_memopt(char *p) ++{ + int i; + unsigned long current_end; + unsigned long end; + +- end_user_pfn = memparse(p, from); ++ if (!p) ++ return -EINVAL; ++ end_user_pfn = memparse(p, &p); + end_user_pfn >>= PAGE_SHIFT; + + end = end_user_pfn<<PAGE_SHIFT; +@@ -689,27 +667,61 @@ void __init parse_memopt(char *p, char * + else + add_memory_region(current_end, end - current_end, E820_RAM); + } ++ ++ return 0; + } ++early_param("mem", parse_memopt); ++ ++static int userdef __initdata; + +-void __init parse_memmapopt(char *p, char **from) ++static int __init parse_memmap_opt(char *p) + { ++ char *oldp; + unsigned long long start_at, mem_size; + +- mem_size = memparse(p, from); +- p = *from; ++ if (!strcmp(p, "exactmap")) { ++#ifdef CONFIG_CRASH_DUMP ++ /* If we are doing a crash dump, we ++ * still need to know the real mem ++ * size before original memory map is ++ * reset. ++ */ ++ e820_register_active_regions(0, 0, -1UL); ++ saved_max_pfn = e820_end_of_ram(); ++ remove_all_active_ranges(); ++#endif ++ end_pfn_map = 0; ++ e820.nr_map = 0; ++ userdef = 1; ++ return 0; ++ } ++ ++ oldp = p; ++ mem_size = memparse(p, &p); ++ if (p == oldp) ++ return -EINVAL; + if (*p == '@') { +- start_at = memparse(p+1, from); ++ start_at = memparse(p+1, &p); + add_memory_region(start_at, mem_size, E820_RAM); + } else if (*p == '#') { +- start_at = memparse(p+1, from); ++ start_at = memparse(p+1, &p); + add_memory_region(start_at, mem_size, E820_ACPI); + } else if (*p == '$') { +- start_at = memparse(p+1, from); ++ start_at = memparse(p+1, &p); + add_memory_region(start_at, mem_size, E820_RESERVED); + } else { + end_user_pfn = (mem_size >> PAGE_SHIFT); + } +- p = *from; ++ return *p == '\0' ? 0 : -EINVAL; ++} ++early_param("memmap", parse_memmap_opt); ++ ++void finish_e820_parsing(void) ++{ ++ if (userdef) { ++ printk(KERN_INFO "user-defined physical RAM map:\n"); ++ e820_print_map("user"); ++ } + } + + unsigned long pci_mem_start = 0xaeedbabe; +Index: 10.3-2007-11-26/arch/x86_64/kernel/early_printk-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/early_printk-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/early_printk-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -244,20 +244,16 @@ void early_printk(const char *fmt, ...) + + static int __initdata keep_early; + +-int __init setup_early_printk(char *opt) ++static int __init setup_early_printk(char *buf) + { +- char *space; +- char buf[256]; ++ if (!buf) ++ return 0; + + if (early_console_initialized) +- return 1; +- +- strlcpy(buf,opt,sizeof(buf)); +- space = strchr(buf, ' '); +- if (space) +- *space = 0; ++ return 0; ++ early_console_initialized = 1; + +- if (strstr(buf,"keep")) ++ if (strstr(buf, "keep")) + keep_early = 1; + + if (!strncmp(buf, "serial", 6)) { +@@ -281,11 +277,12 @@ int __init setup_early_printk(char *opt) + early_console = &simnow_console; + keep_early = 1; + } +- early_console_initialized = 1; + register_console(early_console); + return 0; + } + ++early_param("earlyprintk", setup_early_printk); ++ + void __init disable_early_printk(void) + { + if (!early_console_initialized || !early_console) +@@ -299,4 +296,3 @@ void __init disable_early_printk(void) + } + } + +-__setup("earlyprintk=", setup_early_printk); +Index: 10.3-2007-11-26/arch/x86_64/kernel/entry-xen.S +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/entry-xen.S 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/entry-xen.S 2007-10-22 13:53:08.000000000 +0200 +@@ -4,9 +4,6 @@ + * Copyright (C) 1991, 1992 Linus Torvalds + * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs + * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> +- * +- * $Id$ +- * + * Jun Nakajima <jun.nakajima@intel.com> + * Asit Mallick <asit.k.mallick@intel.com> + * Modified for Xen +@@ -26,15 +23,25 @@ + * at the top of the kernel process stack. + * - partial stack frame: partially saved registers upto R11. + * - full stack frame: Like partial stack frame, but all register saved. +- * +- * TODO: +- * - schedule it carefully for the final hardware. ++ * ++ * Some macro usage: ++ * - CFI macros are used to generate dwarf2 unwind information for better ++ * backtraces. They don't change any code. ++ * - SAVE_ALL/RESTORE_ALL - Save/restore all registers ++ * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify. ++ * There are unfortunately lots of special cases where some registers ++ * not touched. The macro is a big mess that should be cleaned up. ++ * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS. ++ * Gives a full stack frame. ++ * - ENTRY/END Define functions in the symbol table. ++ * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack ++ * frame that is otherwise undefined after a SYSCALL ++ * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging. ++ * - errorentry/paranoidentry/zeroentry - Define exception entry points. + */ + +-#define ASSEMBLY 1 + #include <linux/linkage.h> + #include <asm/segment.h> +-#include <asm/smp.h> + #include <asm/cache.h> + #include <asm/errno.h> + #include <asm/dwarf2.h> +@@ -119,6 +126,7 @@ NMI_MASK = 0x80000000 + .macro CFI_DEFAULT_STACK start=1,adj=0 + .if \start + CFI_STARTPROC simple ++ CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET) + .else + CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET) +@@ -180,6 +188,10 @@ NMI_MASK = 0x80000000 + /* rdi: prev */ + ENTRY(ret_from_fork) + CFI_DEFAULT_STACK ++ push kernel_eflags(%rip) ++ CFI_ADJUST_CFA_OFFSET 4 ++ popf # reset kernel eflags ++ CFI_ADJUST_CFA_OFFSET -4 + call schedule_tail + GET_THREAD_INFO(%rcx) + testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) +@@ -205,6 +217,7 @@ END(ret_from_fork) + */ + .macro _frame ref + CFI_STARTPROC simple ++ CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,SS+8-\ref + /*CFI_REL_OFFSET ss,SS-\ref*/ + CFI_REL_OFFSET rsp,RSP-\ref +@@ -337,6 +350,8 @@ tracesys: + LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ + RESTORE_REST + cmpq $__NR_syscall_max,%rax ++ movq $-ENOSYS,%rcx ++ cmova %rcx,%rax + ja 1f + movq %r10,%rcx /* fixup for C */ + call *sys_call_table(,%rax,8) +@@ -352,6 +367,7 @@ END(system_call) + */ + ENTRY(int_ret_from_sys_call) + CFI_STARTPROC simple ++ CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,SS+8-ARGOFFSET + /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/ + CFI_REL_OFFSET rsp,RSP-ARGOFFSET +@@ -586,8 +602,7 @@ retint_signal: + #ifdef CONFIG_PREEMPT + /* Returning to kernel space. Check if we need preemption */ + /* rcx: threadinfo. interrupts off. */ +- .p2align +-retint_kernel: ++ENTRY(retint_kernel) + cmpl $0,threadinfo_preempt_count(%rcx) + jnz retint_restore_args + bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) +@@ -647,7 +662,6 @@ ENTRY(call_function_interrupt) + END(call_function_interrupt) + #endif + +-#ifdef CONFIG_X86_LOCAL_APIC + ENTRY(apic_timer_interrupt) + apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt + END(apic_timer_interrupt) +@@ -659,7 +673,6 @@ END(error_interrupt) + ENTRY(spurious_interrupt) + apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt + END(spurious_interrupt) +-#endif + #endif /* !CONFIG_XEN */ + + /* +@@ -758,7 +771,9 @@ paranoid_exit\trace: + testl $3,CS(%rsp) + jnz paranoid_userspace\trace + paranoid_swapgs\trace: ++ .if \trace + TRACE_IRQS_IRETQ 0 ++ .endif + swapgs + paranoid_restore\trace: + RESTORE_ALL 8 +@@ -805,7 +820,7 @@ paranoid_schedule\trace: + * Exception entry point. This expects an error code/orig_rax on the stack + * and the exception handler in %rax. + */ +-ENTRY(error_entry) ++KPROBE_ENTRY(error_entry) + _frame RDI + CFI_REL_OFFSET rax,0 + /* rdi slot contains rax, oldrax contains error code */ +@@ -899,7 +914,7 @@ error_kernelspace: + jmp error_sti + #endif + CFI_ENDPROC +-END(error_entry) ++KPROBE_END(error_entry) + + ENTRY(hypervisor_callback) + zeroentry do_hypervisor_callback +@@ -939,26 +954,6 @@ ENTRY(do_hypervisor_callback) # do_hyp + CFI_ENDPROC + END(do_hypervisor_callback) + +-#ifdef CONFIG_X86_LOCAL_APIC +-KPROBE_ENTRY(nmi) +- zeroentry do_nmi_callback +-ENTRY(do_nmi_callback) +- CFI_STARTPROC +- addq $8, %rsp +- CFI_ENDPROC +- CFI_DEFAULT_STACK +- call do_nmi +- orl $NMI_MASK,EFLAGS(%rsp) +- RESTORE_REST +- XEN_BLOCK_EVENTS(%rsi) +- TRACE_IRQS_OFF +- GET_THREAD_INFO(%rcx) +- jmp retint_restore_args +- CFI_ENDPROC +- .previous .text +-END(nmi) +-#endif +- + ALIGN + restore_all_enable_events: + CFI_DEFAULT_STACK adj=1 +@@ -1124,7 +1119,7 @@ ENDPROC(child_rip) + * do_sys_execve asm fallback arguments: + * rdi: name, rsi: argv, rdx: envp, fake frame on the stack + */ +-ENTRY(execve) ++ENTRY(kernel_execve) + CFI_STARTPROC + FAKE_STACK_FRAME $0 + SAVE_ALL +@@ -1138,12 +1133,11 @@ ENTRY(execve) + UNFAKE_STACK_FRAME + ret + CFI_ENDPROC +-ENDPROC(execve) ++ENDPROC(kernel_execve) + + KPROBE_ENTRY(page_fault) + errorentry do_page_fault +-END(page_fault) +- .previous .text ++KPROBE_END(page_fault) + + ENTRY(coprocessor_error) + zeroentry do_coprocessor_error +@@ -1165,25 +1159,25 @@ KPROBE_ENTRY(debug) + zeroentry do_debug + /* paranoidexit + CFI_ENDPROC */ +-END(debug) +- .previous .text ++KPROBE_END(debug) + +-#if 0 +- /* runs on exception stack */ + KPROBE_ENTRY(nmi) +- INTR_FRAME +- pushq $-1 +- CFI_ADJUST_CFA_OFFSET 8 +- paranoidentry do_nmi, 0, 0 +-#ifdef CONFIG_TRACE_IRQFLAGS +- paranoidexit 0 +-#else +- jmp paranoid_exit1 +- CFI_ENDPROC +-#endif +-END(nmi) +- .previous .text +-#endif ++ zeroentry do_nmi_callback ++KPROBE_END(nmi) ++do_nmi_callback: ++ CFI_STARTPROC ++ addq $8, %rsp ++ CFI_ENDPROC ++ CFI_DEFAULT_STACK ++ call do_nmi ++ orl $NMI_MASK,EFLAGS(%rsp) ++ RESTORE_REST ++ XEN_BLOCK_EVENTS(%rsi) ++ TRACE_IRQS_OFF ++ GET_THREAD_INFO(%rcx) ++ jmp retint_restore_args ++ CFI_ENDPROC ++END(do_nmi_callback) + + KPROBE_ENTRY(int3) + /* INTR_FRAME +@@ -1192,8 +1186,7 @@ KPROBE_ENTRY(int3) + zeroentry do_int3 + /* jmp paranoid_exit1 + CFI_ENDPROC */ +-END(int3) +- .previous .text ++KPROBE_END(int3) + + ENTRY(overflow) + zeroentry do_overflow +@@ -1244,8 +1237,7 @@ END(stack_segment) + + KPROBE_ENTRY(general_protection) + errorentry do_general_protection +-END(general_protection) +- .previous .text ++KPROBE_END(general_protection) + + ENTRY(alignment_check) + errorentry do_alignment_check +Index: 10.3-2007-11-26/arch/x86_64/kernel/genapic_xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/genapic_xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/genapic_xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -71,6 +71,13 @@ static cpumask_t xen_target_cpus(void) + return cpu_online_map; + } + ++static cpumask_t xen_vector_allocation_domain(int cpu) ++{ ++ cpumask_t domain = CPU_MASK_NONE; ++ cpu_set(cpu, domain); ++ return domain; ++} ++ + /* + * Set up the logical destination ID. + * Do nothing, not called now. +@@ -147,8 +154,8 @@ struct genapic apic_xen = { + .int_delivery_mode = dest_LowestPrio, + #endif + .int_dest_mode = (APIC_DEST_LOGICAL != 0), +- .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST, + .target_cpus = xen_target_cpus, ++ .vector_allocation_domain = xen_vector_allocation_domain, + #ifdef CONFIG_XEN_PRIVILEGED_GUEST + .apic_id_registered = xen_apic_id_registered, + #endif +Index: 10.3-2007-11-26/arch/x86_64/kernel/head-xen.S +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/head-xen.S 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/head-xen.S 2007-10-22 13:53:08.000000000 +0200 +@@ -5,9 +5,6 @@ + * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> + * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> + * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> +- * +- * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $ +- * + * Jun Nakajima <jun.nakajima@intel.com> + * Modified for Xen + */ +@@ -138,7 +135,7 @@ ENTRY(cpu_gdt_table) + .quad 0,0 /* TSS */ + .quad 0,0 /* LDT */ + .quad 0,0,0 /* three TLS descriptors */ +- .quad 0 /* unused */ ++ .quad 0x0000f40000000000 /* node/CPU stored in limit */ + gdt_end: + /* asm/segment.h:GDT_ENTRIES must match this */ + /* This should be a multiple of the cache line size */ +Index: 10.3-2007-11-26/arch/x86_64/kernel/head64-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/head64-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/head64-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -54,11 +54,9 @@ static void __init copy_bootdata(char *r + new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); + if (!new_data) { + if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { +- printk("so old bootloader that it does not support commandline?!\n"); + return; + } + new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; +- printk("old bootloader convention, maybe loadlin?\n"); + } + command_line = (char *) ((u64)(new_data)); + memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE); +@@ -70,25 +68,6 @@ static void __init copy_bootdata(char *r + memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline); + saved_command_line[max_cmdline-1] = '\0'; + #endif +- printk("Bootdata ok (command line is %s)\n", saved_command_line); +-} +- +-static void __init setup_boot_cpu_data(void) +-{ +- unsigned int dummy, eax; +- +- /* get vendor info */ +- cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level, +- (unsigned int *)&boot_cpu_data.x86_vendor_id[0], +- (unsigned int *)&boot_cpu_data.x86_vendor_id[8], +- (unsigned int *)&boot_cpu_data.x86_vendor_id[4]); +- +- /* get cpu type */ +- cpuid(1, &eax, &dummy, &dummy, +- (unsigned int *) &boot_cpu_data.x86_capability); +- boot_cpu_data.x86 = (eax >> 8) & 0xf; +- boot_cpu_data.x86_model = (eax >> 4) & 0xf; +- boot_cpu_data.x86_mask = eax & 0xf; + } + + #include <xen/interface/memory.h> +@@ -101,7 +80,6 @@ void __init x86_64_start_kernel(char * r + { + struct xen_machphys_mapping mapping; + unsigned long machine_to_phys_nr_ents; +- char *s; + int i; + + setup_xen_features(); +@@ -128,10 +106,7 @@ void __init x86_64_start_kernel(char * r + asm volatile("lidt %0" :: "m" (idt_descr)); + #endif + +- /* +- * This must be called really, really early: +- */ +- lockdep_init(); ++ early_printk("Kernel alive\n"); + + for (i = 0; i < NR_CPUS; i++) + cpu_pda(i) = &boot_cpu_pda[i]; +@@ -141,22 +116,5 @@ void __init x86_64_start_kernel(char * r + #ifdef CONFIG_SMP + cpu_set(0, cpu_online_map); + #endif +- s = strstr(saved_command_line, "earlyprintk="); +- if (s != NULL) +- setup_early_printk(strchr(s, '=') + 1); +-#ifdef CONFIG_NUMA +- s = strstr(saved_command_line, "numa="); +- if (s != NULL) +- numa_setup(s+5); +-#endif +-#ifdef CONFIG_X86_IO_APIC +- if (strstr(saved_command_line, "disableapic")) +- disable_apic = 1; +-#endif +- /* You need early console to see that */ +- if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE) +- panic("Kernel too big for kernel mapping\n"); +- +- setup_boot_cpu_data(); + start_kernel(); + } +Index: 10.3-2007-11-26/arch/x86_64/kernel/io_apic-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/io_apic-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/io_apic-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -26,9 +26,12 @@ + #include <linux/delay.h> + #include <linux/sched.h> + #include <linux/smp_lock.h> ++#include <linux/pci.h> + #include <linux/mc146818rtc.h> + #include <linux/acpi.h> + #include <linux/sysdev.h> ++#include <linux/msi.h> ++#include <linux/htirq.h> + #ifdef CONFIG_ACPI + #include <acpi/acpi_bus.h> + #endif +@@ -41,6 +44,10 @@ + #include <asm/acpi.h> + #include <asm/dma.h> + #include <asm/nmi.h> ++#include <asm/msidef.h> ++#include <asm/hypertransport.h> ++ ++static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result); + + #define __apicdebuginit __init + +@@ -48,17 +55,29 @@ int sis_apic_bug; /* not actually suppor + + static int no_timer_check; + +-int disable_timer_pin_1 __initdata; ++static int disable_timer_pin_1 __initdata; + +-#ifndef CONFIG_XEN +-int timer_over_8254 __initdata = 0; ++#ifdef CONFIG_XEN ++#include <xen/interface/xen.h> ++#include <xen/interface/physdev.h> ++ ++/* Fake i8259 */ ++#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) ++#define disable_8259A_irq(_irq) ((void)0) ++#define i8259A_irq_pending(_irq) (0) ++ ++unsigned long io_apic_irqs; ++ ++#define clear_IO_APIC() ((void)0) ++#else ++int timer_over_8254 __initdata = 1; + + /* Where if anywhere is the i8259 connect in external int mode */ + static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + #endif + + static DEFINE_SPINLOCK(ioapic_lock); +-static DEFINE_SPINLOCK(vector_lock); ++DEFINE_SPINLOCK(vector_lock); + + /* + * # of IRQ routing registers +@@ -83,28 +102,27 @@ static struct irq_pin_list { + short apic, pin, next; + } irq_2_pin[PIN_MAP_SIZE]; + +-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; +-#ifdef CONFIG_PCI_MSI +-#define vector_to_irq(vector) \ +- (platform_legacy_irq(vector) ? vector : vector_irq[vector]) +-#else +-#define vector_to_irq(vector) (vector) +-#endif +- +-#ifdef CONFIG_XEN +- +-#include <xen/interface/xen.h> +-#include <xen/interface/physdev.h> +- +-/* Fake i8259 */ +-#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) +-#define disable_8259A_irq(_irq) ((void)0) +-#define i8259A_irq_pending(_irq) (0) ++#ifndef CONFIG_XEN ++struct io_apic { ++ unsigned int index; ++ unsigned int unused[3]; ++ unsigned int data; ++}; + +-unsigned long io_apic_irqs; ++static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) ++{ ++ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) ++ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK); ++} ++#endif + +-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg) ++static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) + { ++#ifndef CONFIG_XEN ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ writel(reg, &io_apic->index); ++ return readl(&io_apic->data); ++#else + struct physdev_apic apic_op; + int ret; + +@@ -114,31 +132,131 @@ static inline unsigned int xen_io_apic_r + if (ret) + return ret; + return apic_op.value; ++#endif + } + +-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) ++static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) + { ++#ifndef CONFIG_XEN ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ writel(reg, &io_apic->index); ++ writel(value, &io_apic->data); ++#else + struct physdev_apic apic_op; + + apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr; + apic_op.reg = reg; + apic_op.value = value; + HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op); ++#endif ++} ++ ++#ifndef CONFIG_XEN ++/* ++ * Re-write a value: to be used for read-modify-write ++ * cycles where the read already set up the index register. ++ */ ++static inline void io_apic_modify(unsigned int apic, unsigned int value) ++{ ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ writel(value, &io_apic->data); + } ++#else ++#define io_apic_modify io_apic_write ++#endif + +-#define io_apic_read(a,r) xen_io_apic_read(a,r) +-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v) ++/* ++ * Synchronize the IO-APIC and the CPU by doing ++ * a dummy read from the IO-APIC ++ */ ++static inline void io_apic_sync(unsigned int apic) ++{ ++#ifndef CONFIG_XEN ++ struct io_apic __iomem *io_apic = io_apic_base(apic); ++ readl(&io_apic->data); ++#endif ++} + +-#define clear_IO_APIC() ((void)0) ++union entry_union { ++ struct { u32 w1, w2; }; ++ struct IO_APIC_route_entry entry; ++}; + +-#else ++static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) ++{ ++ union entry_union eu; ++ unsigned long flags; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); ++ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++ return eu.entry; ++} ++ ++/* ++ * When we write a new IO APIC routing entry, we need to write the high ++ * word first! If the mask bit in the low word is clear, we will enable ++ * the interrupt, and we need to make sure the entry is fully populated ++ * before that happens. ++ */ ++static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) ++{ ++ unsigned long flags; ++ union entry_union eu; ++ eu.entry = e; ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x11 + 2*pin, eu.w2); ++ io_apic_write(apic, 0x10 + 2*pin, eu.w1); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} ++ ++#ifndef CONFIG_XEN ++/* ++ * When we mask an IO APIC routing entry, we need to write the low ++ * word first, in order to set the mask bit before we change the ++ * high bits! ++ */ ++static void ioapic_mask_entry(int apic, int pin) ++{ ++ unsigned long flags; ++ union entry_union eu = { .entry.mask = 1 }; ++ ++ spin_lock_irqsave(&ioapic_lock, flags); ++ io_apic_write(apic, 0x10 + 2*pin, eu.w1); ++ io_apic_write(apic, 0x11 + 2*pin, eu.w2); ++ spin_unlock_irqrestore(&ioapic_lock, flags); ++} + + #ifdef CONFIG_SMP ++static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) ++{ ++ int apic, pin; ++ struct irq_pin_list *entry = irq_2_pin + irq; ++ ++ BUG_ON(irq >= NR_IRQS); ++ for (;;) { ++ unsigned int reg; ++ apic = entry->apic; ++ pin = entry->pin; ++ if (pin == -1) ++ break; ++ io_apic_write(apic, 0x11 + pin*2, dest); ++ reg = io_apic_read(apic, 0x10 + pin*2); ++ reg &= ~0x000000ff; ++ reg |= vector; ++ io_apic_modify(apic, reg); ++ if (!entry->next) ++ break; ++ entry = irq_2_pin + entry->next; ++ } ++} ++ + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) + { + unsigned long flags; + unsigned int dest; + cpumask_t tmp; ++ int vector; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) +@@ -146,7 +264,11 @@ static void set_ioapic_affinity_irq(unsi + + cpus_and(mask, tmp, CPU_MASK_ALL); + +- dest = cpu_mask_to_apicid(mask); ++ vector = assign_irq_vector(irq, mask, &tmp); ++ if (vector < 0) ++ return; ++ ++ dest = cpu_mask_to_apicid(tmp); + + /* + * Only the high 8 bits are valid. +@@ -154,13 +276,12 @@ static void set_ioapic_affinity_irq(unsi + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); +- __DO_ACTION(1, = dest, ) +- set_irq_info(irq, mask); ++ __target_IO_APIC_irq(irq, dest, vector); ++ set_native_irq_info(irq, mask); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + #endif +- +-#endif /* !CONFIG_XEN */ ++#endif + + /* + * The common case is 1:1 IRQ<->pin mappings. Sometimes there are +@@ -240,24 +361,15 @@ static void unmask_IO_APIC_irq (unsigned + static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) + { + struct IO_APIC_route_entry entry; +- unsigned long flags; + + /* Check delivery_mode to be sure we're not clearing an SMI pin */ +- spin_lock_irqsave(&ioapic_lock, flags); +- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); +- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ entry = ioapic_read_entry(apic, pin); + if (entry.delivery_mode == dest_SMI) + return; + /* + * Disable it in the IO-APIC irq-routing table: + */ +- memset(&entry, 0, sizeof(entry)); +- entry.mask = 1; +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); +- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ ioapic_mask_entry(apic, pin); + } + + static void clear_IO_APIC (void) +@@ -271,16 +383,6 @@ static void clear_IO_APIC (void) + + #endif /* !CONFIG_XEN */ + +-static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF }; +- +-/* +- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to +- * specific CPU-side IRQs. +- */ +- +-#define MAX_PIRQS 8 +-static int pirq_entries [MAX_PIRQS]; +-static int pirqs_enabled; + int skip_ioapic_setup; + int ioapic_force; + +@@ -289,18 +391,17 @@ int ioapic_force; + static int __init disable_ioapic_setup(char *str) + { + skip_ioapic_setup = 1; +- return 1; ++ return 0; + } ++early_param("noapic", disable_ioapic_setup); + +-static int __init enable_ioapic_setup(char *str) ++/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ ++static int __init disable_timer_pin_setup(char *arg) + { +- ioapic_force = 1; +- skip_ioapic_setup = 0; ++ disable_timer_pin_1 = 1; + return 1; + } +- +-__setup("noapic", disable_ioapic_setup); +-__setup("apic", enable_ioapic_setup); ++__setup("disable_timer_pin_1", disable_timer_pin_setup); + + #ifndef CONFIG_XEN + static int __init setup_disable_8254_timer(char *s) +@@ -318,137 +419,6 @@ __setup("disable_8254_timer", setup_disa + __setup("enable_8254_timer", setup_enable_8254_timer); + #endif /* !CONFIG_XEN */ + +-#include <asm/pci-direct.h> +-#include <linux/pci_ids.h> +-#include <linux/pci.h> +- +- +-#ifdef CONFIG_ACPI +- +-static int nvidia_hpet_detected __initdata; +- +-static int __init nvidia_hpet_check(unsigned long phys, unsigned long size) +-{ +- nvidia_hpet_detected = 1; +- return 0; +-} +-#endif +- +-/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC +- off. Check for an Nvidia or VIA PCI bridge and turn it off. +- Use pci direct infrastructure because this runs before the PCI subsystem. +- +- Can be overwritten with "apic" +- +- And another hack to disable the IOMMU on VIA chipsets. +- +- ... and others. Really should move this somewhere else. +- +- Kludge-O-Rama. */ +-void __init check_ioapic(void) +-{ +- int num,slot,func; +- /* Poor man's PCI discovery */ +- for (num = 0; num < 32; num++) { +- for (slot = 0; slot < 32; slot++) { +- for (func = 0; func < 8; func++) { +- u32 class; +- u32 vendor; +- u8 type; +- class = read_pci_config(num,slot,func, +- PCI_CLASS_REVISION); +- if (class == 0xffffffff) +- break; +- +- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI) +- continue; +- +- vendor = read_pci_config(num, slot, func, +- PCI_VENDOR_ID); +- vendor &= 0xffff; +- switch (vendor) { +- case PCI_VENDOR_ID_VIA: +-#ifdef CONFIG_IOMMU +- if ((end_pfn > MAX_DMA32_PFN || +- force_iommu) && +- !iommu_aperture_allowed) { +- printk(KERN_INFO +- "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n"); +- iommu_aperture_disabled = 1; +- } +-#endif +- return; +- case PCI_VENDOR_ID_NVIDIA: +-#ifdef CONFIG_ACPI +- /* +- * All timer overrides on Nvidia are +- * wrong unless HPET is enabled. +- */ +- nvidia_hpet_detected = 0; +- acpi_table_parse(ACPI_HPET, +- nvidia_hpet_check); +- if (nvidia_hpet_detected == 0) { +- acpi_skip_timer_override = 1; +- printk(KERN_INFO "Nvidia board " +- "detected. Ignoring ACPI " +- "timer override.\n"); +- } +-#endif +- /* RED-PEN skip them on mptables too? */ +- return; +- case PCI_VENDOR_ID_ATI: +- +- /* This should be actually default, but +- for 2.6.16 let's do it for ATI only where +- it's really needed. */ +-#ifndef CONFIG_XEN +- if (timer_over_8254 == 1) { +- timer_over_8254 = 0; +- printk(KERN_INFO +- "ATI board detected. Disabling timer routing over 8254.\n"); +- } +-#endif +- return; +- } +- +- +- /* No multi-function device? */ +- type = read_pci_config_byte(num,slot,func, +- PCI_HEADER_TYPE); +- if (!(type & 0x80)) +- break; +- } +- } +- } +-} +- +-static int __init ioapic_pirq_setup(char *str) +-{ +- int i, max; +- int ints[MAX_PIRQS+1]; +- +- get_options(str, ARRAY_SIZE(ints), ints); +- +- for (i = 0; i < MAX_PIRQS; i++) +- pirq_entries[i] = -1; +- +- pirqs_enabled = 1; +- apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n"); +- max = MAX_PIRQS; +- if (ints[0] < MAX_PIRQS) +- max = ints[0]; +- +- for (i = 0; i < max; i++) { +- apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); +- /* +- * PIRQs are mapped upside down, usually. +- */ +- pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; +- } +- return 1; +-} +- +-__setup("pirq=", ioapic_pirq_setup); + + /* + * Find the IRQ entry number of a certain pin. +@@ -478,9 +448,7 @@ static int __init find_isa_irq_pin(int i + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + +- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || +- mp_bus_id_to_type[lbus] == MP_BUS_EISA || +- mp_bus_id_to_type[lbus] == MP_BUS_MCA) && ++ if (test_bit(lbus, mp_bus_not_pci) && + (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_srcbusirq == irq)) + +@@ -496,9 +464,7 @@ static int __init find_isa_irq_apic(int + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + +- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || +- mp_bus_id_to_type[lbus] == MP_BUS_EISA || +- mp_bus_id_to_type[lbus] == MP_BUS_MCA) && ++ if (test_bit(lbus, mp_bus_not_pci) && + (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_srcbusirq == irq)) + break; +@@ -539,7 +505,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, + mp_irqs[i].mpc_dstapic == MP_APIC_ALL) + break; + +- if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) && ++ if (!test_bit(lbus, mp_bus_not_pci) && + !mp_irqs[i].mpc_irqtype && + (bus == lbus) && + (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) { +@@ -562,27 +528,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, + return best_guess; + } + +-/* +- * EISA Edge/Level control register, ELCR +- */ +-static int EISA_ELCR(unsigned int irq) +-{ +- if (irq < 16) { +- unsigned int port = 0x4d0 + (irq >> 3); +- return (inb(port) >> (irq & 7)) & 1; +- } +- apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq); +- return 0; +-} +- +-/* EISA interrupts are always polarity zero and can be edge or level +- * trigger depending on the ELCR value. If an interrupt is listed as +- * EISA conforming in the MP table, that means its trigger type must +- * be read in from the ELCR */ +- +-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq)) +-#define default_EISA_polarity(idx) (0) +- + /* ISA interrupts are always polarity zero edge triggered, + * when listed as conforming in the MP table. */ + +@@ -595,12 +540,6 @@ static int EISA_ELCR(unsigned int irq) + #define default_PCI_trigger(idx) (1) + #define default_PCI_polarity(idx) (1) + +-/* MCA interrupts are always polarity zero level triggered, +- * when listed as conforming in the MP table. */ +- +-#define default_MCA_trigger(idx) (1) +-#define default_MCA_polarity(idx) (0) +- + static int __init MPBIOS_polarity(int idx) + { + int bus = mp_irqs[idx].mpc_srcbus; +@@ -612,38 +551,11 @@ static int __init MPBIOS_polarity(int id + switch (mp_irqs[idx].mpc_irqflag & 3) + { + case 0: /* conforms, ie. bus-type dependent polarity */ +- { +- switch (mp_bus_id_to_type[bus]) +- { +- case MP_BUS_ISA: /* ISA pin */ +- { +- polarity = default_ISA_polarity(idx); +- break; +- } +- case MP_BUS_EISA: /* EISA pin */ +- { +- polarity = default_EISA_polarity(idx); +- break; +- } +- case MP_BUS_PCI: /* PCI pin */ +- { +- polarity = default_PCI_polarity(idx); +- break; +- } +- case MP_BUS_MCA: /* MCA pin */ +- { +- polarity = default_MCA_polarity(idx); +- break; +- } +- default: +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- polarity = 1; +- break; +- } +- } ++ if (test_bit(bus, mp_bus_not_pci)) ++ polarity = default_ISA_polarity(idx); ++ else ++ polarity = default_PCI_polarity(idx); + break; +- } + case 1: /* high active */ + { + polarity = 0; +@@ -681,38 +593,11 @@ static int MPBIOS_trigger(int idx) + switch ((mp_irqs[idx].mpc_irqflag>>2) & 3) + { + case 0: /* conforms, ie. bus-type dependent */ +- { +- switch (mp_bus_id_to_type[bus]) +- { +- case MP_BUS_ISA: /* ISA pin */ +- { +- trigger = default_ISA_trigger(idx); +- break; +- } +- case MP_BUS_EISA: /* EISA pin */ +- { +- trigger = default_EISA_trigger(idx); +- break; +- } +- case MP_BUS_PCI: /* PCI pin */ +- { +- trigger = default_PCI_trigger(idx); +- break; +- } +- case MP_BUS_MCA: /* MCA pin */ +- { +- trigger = default_MCA_trigger(idx); +- break; +- } +- default: +- { +- printk(KERN_WARNING "broken BIOS!!\n"); +- trigger = 1; +- break; +- } +- } ++ if (test_bit(bus, mp_bus_not_pci)) ++ trigger = default_ISA_trigger(idx); ++ else ++ trigger = default_PCI_trigger(idx); + break; +- } + case 1: /* edge */ + { + trigger = 0; +@@ -749,64 +634,6 @@ static inline int irq_trigger(int idx) + return MPBIOS_trigger(idx); + } + +-static int next_irq = 16; +- +-/* +- * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ +- * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number +- * from ACPI, which can reach 800 in large boxen. +- * +- * Compact the sparse GSI space into a sequential IRQ series and reuse +- * vectors if possible. +- */ +-int gsi_irq_sharing(int gsi) +-{ +- int i, tries, vector; +- +- BUG_ON(gsi >= NR_IRQ_VECTORS); +- +- if (platform_legacy_irq(gsi)) +- return gsi; +- +- if (gsi_2_irq[gsi] != 0xFF) +- return (int)gsi_2_irq[gsi]; +- +- tries = NR_IRQS; +- try_again: +- vector = assign_irq_vector(gsi); +- +- /* +- * Sharing vectors means sharing IRQs, so scan irq_vectors for previous +- * use of vector and if found, return that IRQ. However, we never want +- * to share legacy IRQs, which usually have a different trigger mode +- * than PCI. +- */ +- for (i = 0; i < NR_IRQS; i++) +- if (IO_APIC_VECTOR(i) == vector) +- break; +- if (platform_legacy_irq(i)) { +- if (--tries >= 0) { +- IO_APIC_VECTOR(i) = 0; +- goto try_again; +- } +- panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi); +- } +- if (i < NR_IRQS) { +- gsi_2_irq[gsi] = i; +- printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n", +- gsi, vector, i); +- return i; +- } +- +- i = next_irq++; +- BUG_ON(i >= NR_IRQS); +- gsi_2_irq[gsi] = i; +- IO_APIC_VECTOR(i) = vector; +- printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n", +- gsi, vector, i); +- return i; +-} +- + static int pin_2_irq(int idx, int apic, int pin) + { + int irq, i; +@@ -818,49 +645,16 @@ static int pin_2_irq(int idx, int apic, + if (mp_irqs[idx].mpc_dstirq != pin) + printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); + +- switch (mp_bus_id_to_type[bus]) +- { +- case MP_BUS_ISA: /* ISA pin */ +- case MP_BUS_EISA: +- case MP_BUS_MCA: +- { +- irq = mp_irqs[idx].mpc_srcbusirq; +- break; +- } +- case MP_BUS_PCI: /* PCI pin */ +- { +- /* +- * PCI IRQs are mapped in order +- */ +- i = irq = 0; +- while (i < apic) +- irq += nr_ioapic_registers[i++]; +- irq += pin; +- irq = gsi_irq_sharing(irq); +- break; +- } +- default: +- { +- printk(KERN_ERR "unknown bus type %d.\n",bus); +- irq = 0; +- break; +- } +- } +- BUG_ON(irq >= NR_IRQS); +- +- /* +- * PCI IRQ command line redirection. Yes, limits are hardcoded. +- */ +- if ((pin >= 16) && (pin <= 23)) { +- if (pirq_entries[pin-16] != -1) { +- if (!pirq_entries[pin-16]) { +- apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16); +- } else { +- irq = pirq_entries[pin-16]; +- apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n", +- pin-16, irq); +- } +- } ++ if (test_bit(bus, mp_bus_not_pci)) { ++ irq = mp_irqs[idx].mpc_srcbusirq; ++ } else { ++ /* ++ * PCI IRQs are mapped in order ++ */ ++ i = irq = 0; ++ while (i < apic) ++ irq += nr_ioapic_registers[i++]; ++ irq += pin; + } + BUG_ON(irq >= NR_IRQS); + return irq; +@@ -884,43 +678,68 @@ static inline int IO_APIC_irq_trigger(in + } + + /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ +-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; ++static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; + +-int assign_irq_vector(int irq) ++static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) + { +- unsigned long flags; + int vector; + struct physdev_irq irq_op; + +- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); ++ BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); + +- spin_lock_irqsave(&vector_lock, flags); ++ cpus_and(*result, mask, cpu_online_map); + +- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) { +- spin_unlock_irqrestore(&vector_lock, flags); +- return IO_APIC_VECTOR(irq); +- } ++ if (irq_vector[irq] > 0) ++ return irq_vector[irq]; + + irq_op.irq = irq; +- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) { +- spin_unlock_irqrestore(&vector_lock, flags); ++ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) + return -ENOSPC; +- } + + vector = irq_op.vector; +- vector_irq[vector] = irq; +- if (irq != AUTO_ASSIGN) +- IO_APIC_VECTOR(irq) = vector; ++ irq_vector[irq] = vector; + +- spin_unlock_irqrestore(&vector_lock, flags); ++ return vector; ++} + ++static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result) ++{ ++ int vector; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&vector_lock, flags); ++ vector = __assign_irq_vector(irq, mask, result); ++ spin_unlock_irqrestore(&vector_lock, flags); + return vector; + } + +-extern void (*interrupt[NR_IRQS])(void); + #ifndef CONFIG_XEN +-static struct hw_interrupt_type ioapic_level_type; +-static struct hw_interrupt_type ioapic_edge_type; ++void __setup_vector_irq(int cpu) ++{ ++ /* Initialize vector_irq on a new cpu */ ++ /* This function must be called with vector_lock held */ ++ int irq, vector; ++ ++ /* Mark the inuse vectors */ ++ for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) { ++ if (!cpu_isset(cpu, irq_domain[irq])) ++ continue; ++ vector = irq_vector[irq]; ++ per_cpu(vector_irq, cpu)[vector] = irq; ++ } ++ /* Mark the free vectors */ ++ for (vector = 0; vector < NR_VECTORS; ++vector) { ++ irq = per_cpu(vector_irq, cpu)[vector]; ++ if (irq < 0) ++ continue; ++ if (!cpu_isset(cpu, irq_domain[irq])) ++ per_cpu(vector_irq, cpu)[vector] = -1; ++ } ++} ++ ++extern void (*interrupt[NR_IRQS])(void); ++ ++static struct irq_chip ioapic_chip; + + #define IOAPIC_AUTO -1 + #define IOAPIC_EDGE 0 +@@ -928,16 +747,15 @@ static struct hw_interrupt_type ioapic_e + + static void ioapic_register_intr(int irq, int vector, unsigned long trigger) + { +- unsigned idx; +- +- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq; +- + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) +- irq_desc[idx].chip = &ioapic_level_type; +- else +- irq_desc[idx].chip = &ioapic_edge_type; +- set_intr_gate(vector, interrupt[idx]); ++ set_irq_chip_and_handler_name(irq, &ioapic_chip, ++ handle_fasteoi_irq, "fasteoi"); ++ else { ++ irq_desc[irq].status |= IRQ_DELAYED_DISABLE; ++ set_irq_chip_and_handler_name(irq, &ioapic_chip, ++ handle_edge_irq, "edge"); ++ } + } + #else + #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0) +@@ -990,16 +808,21 @@ static void __init setup_IO_APIC_irqs(vo + continue; + + if (IO_APIC_IRQ(irq)) { +- vector = assign_irq_vector(irq); ++ cpumask_t mask; ++ vector = assign_irq_vector(irq, TARGET_CPUS, &mask); ++ if (vector < 0) ++ continue; ++ ++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); + entry.vector = vector; + + ioapic_register_intr(irq, vector, IOAPIC_AUTO); + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } ++ ioapic_write_entry(apic, pin, entry); ++ + spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); +- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + } +@@ -1042,7 +865,7 @@ static void __init setup_ExtINT_IRQ0_pin + * The timer IRQ doesn't have to know that behind the + * scene we have a 8259A-master in AEOI mode ... + */ +- irq_desc[0].chip = &ioapic_edge_type; ++ set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); + + /* + * Add it to the IO-APIC irq-routing table: +@@ -1138,10 +961,7 @@ void __apicdebuginit print_IO_APIC(void) + for (i = 0; i <= reg_01.bits.entries; i++) { + struct IO_APIC_route_entry entry; + +- spin_lock_irqsave(&ioapic_lock, flags); +- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); +- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ entry = ioapic_read_entry(apic, i); + + printk(KERN_DEBUG " %02x %03X %02X ", + i, +@@ -1161,17 +981,12 @@ void __apicdebuginit print_IO_APIC(void) + ); + } + } +- if (use_pci_vector()) +- printk(KERN_INFO "Using vector-based indexing\n"); + printk(KERN_DEBUG "IRQ to pin mappings:\n"); + for (i = 0; i < NR_IRQS; i++) { + struct irq_pin_list *entry = irq_2_pin + i; + if (entry->pin < 0) + continue; +- if (use_pci_vector() && !platform_legacy_irq(i)) +- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i)); +- else +- printk(KERN_DEBUG "IRQ%d ", i); ++ printk(KERN_DEBUG "IRQ%d ", i); + for (;;) { + printk("-> %d:%d", entry->apic, entry->pin); + if (!entry->next) +@@ -1342,9 +1157,6 @@ static void __init enable_IO_APIC(void) + irq_2_pin[i].pin = -1; + irq_2_pin[i].next = 0; + } +- if (!pirqs_enabled) +- for (i = 0; i < MAX_PIRQS; i++) +- pirq_entries[i] = -1; + + /* + * The number of IO-APIC IRQ registers (== #pins): +@@ -1361,11 +1173,7 @@ static void __init enable_IO_APIC(void) + /* See if any of the pins is in ExtINT mode */ + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; +- spin_lock_irqsave(&ioapic_lock, flags); +- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); +- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- ++ entry = ioapic_read_entry(apic, pin); + + /* If the interrupt line is enabled and in ExtInt mode + * I have found the pin where the i8259 is connected. +@@ -1419,7 +1227,6 @@ void disable_IO_APIC(void) + */ + if (ioapic_i8259.pin != -1) { + struct IO_APIC_route_entry entry; +- unsigned long flags; + + memset(&entry, 0, sizeof(entry)); + entry.mask = 0; /* Enabled */ +@@ -1436,12 +1243,7 @@ void disable_IO_APIC(void) + /* + * Add it to the IO-APIC irq-routing table: + */ +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, +- *(((int *)&entry)+1)); +- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, +- *(((int *)&entry)+0)); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); + } + + disconnect_bsp_APIC(ioapic_i8259.pin != -1); +@@ -1449,76 +1251,6 @@ void disable_IO_APIC(void) + } + + /* +- * function to set the IO-APIC physical IDs based on the +- * values stored in the MPC table. +- * +- * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 +- */ +- +-#ifndef CONFIG_XEN +-static void __init setup_ioapic_ids_from_mpc (void) +-{ +- union IO_APIC_reg_00 reg_00; +- int apic; +- int i; +- unsigned char old_id; +- unsigned long flags; +- +- /* +- * Set the IOAPIC ID to the value stored in the MPC table. +- */ +- for (apic = 0; apic < nr_ioapics; apic++) { +- +- /* Read the register 0 value */ +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(apic, 0); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- old_id = mp_ioapics[apic].mpc_apicid; +- +- +- printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid); +- +- +- /* +- * We need to adjust the IRQ routing table +- * if the ID changed. +- */ +- if (old_id != mp_ioapics[apic].mpc_apicid) +- for (i = 0; i < mp_irq_entries; i++) +- if (mp_irqs[i].mpc_dstapic == old_id) +- mp_irqs[i].mpc_dstapic +- = mp_ioapics[apic].mpc_apicid; +- +- /* +- * Read the right value from the MPC table and +- * write it into the ID register. +- */ +- apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", +- mp_ioapics[apic].mpc_apicid); +- +- reg_00.bits.ID = mp_ioapics[apic].mpc_apicid; +- spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(apic, 0, reg_00.raw); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- +- /* +- * Sanity check +- */ +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_00.raw = io_apic_read(apic, 0); +- spin_unlock_irqrestore(&ioapic_lock, flags); +- if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid) +- printk("could not set ID!\n"); +- else +- apic_printk(APIC_VERBOSE," ok.\n"); +- } +-} +-#else +-static void __init setup_ioapic_ids_from_mpc(void) { } +-#endif +- +-/* + * There is a nasty bug in some older SMP boards, their mptable lies + * about the timer IRQ. We do the following to work around the situation: + * +@@ -1572,7 +1304,7 @@ static int __init timer_irq_works(void) + * an edge even if it isn't on the 8259A... + */ + +-static unsigned int startup_edge_ioapic_irq(unsigned int irq) ++static unsigned int startup_ioapic_irq(unsigned int irq) + { + int was_pending = 0; + unsigned long flags; +@@ -1589,107 +1321,19 @@ static unsigned int startup_edge_ioapic_ + return was_pending; + } + +-/* +- * Once we have recorded IRQ_PENDING already, we can mask the +- * interrupt for real. This prevents IRQ storms from unhandled +- * devices. +- */ +-static void ack_edge_ioapic_irq(unsigned int irq) +-{ +- move_irq(irq); +- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) +- == (IRQ_PENDING | IRQ_DISABLED)) +- mask_IO_APIC_irq(irq); +- ack_APIC_irq(); +-} +- +-/* +- * Level triggered interrupts can just be masked, +- * and shutting down and starting up the interrupt +- * is the same as enabling and disabling them -- except +- * with a startup need to return a "was pending" value. +- * +- * Level triggered interrupts are special because we +- * do not touch any IO-APIC register while handling +- * them. We ack the APIC in the end-IRQ handler, not +- * in the start-IRQ-handler. Protection against reentrance +- * from the same interrupt is still provided, both by the +- * generic IRQ layer and by the fact that an unacked local +- * APIC does not accept IRQs. +- */ +-static unsigned int startup_level_ioapic_irq (unsigned int irq) +-{ +- unmask_IO_APIC_irq(irq); +- +- return 0; /* don't check for pending */ +-} +- +-static void end_level_ioapic_irq (unsigned int irq) +-{ +- move_irq(irq); +- ack_APIC_irq(); +-} +- +-#ifdef CONFIG_PCI_MSI +-static unsigned int startup_edge_ioapic_vector(unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- return startup_edge_ioapic_irq(irq); +-} +- +-static void ack_edge_ioapic_vector(unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- move_native_irq(vector); +- ack_edge_ioapic_irq(irq); +-} +- +-static unsigned int startup_level_ioapic_vector (unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- return startup_level_ioapic_irq (irq); +-} +- +-static void end_level_ioapic_vector (unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- move_native_irq(vector); +- end_level_ioapic_irq(irq); +-} +- +-static void mask_IO_APIC_vector (unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- mask_IO_APIC_irq(irq); +-} +- +-static void unmask_IO_APIC_vector (unsigned int vector) +-{ +- int irq = vector_to_irq(vector); +- +- unmask_IO_APIC_irq(irq); +-} +- +-#ifdef CONFIG_SMP +-static void set_ioapic_affinity_vector (unsigned int vector, +- cpumask_t cpu_mask) ++static int ioapic_retrigger_irq(unsigned int irq) + { +- int irq = vector_to_irq(vector); ++ cpumask_t mask; ++ unsigned vector; ++ unsigned long flags; + +- set_native_irq_info(vector, cpu_mask); +- set_ioapic_affinity_irq(irq, cpu_mask); +-} +-#endif // CONFIG_SMP +-#endif // CONFIG_PCI_MSI ++ spin_lock_irqsave(&vector_lock, flags); ++ vector = irq_vector[irq]; ++ cpus_clear(mask); ++ cpu_set(first_cpu(irq_domain[irq]), mask); + +-static int ioapic_retrigger(unsigned int irq) +-{ +- send_IPI_self(IO_APIC_VECTOR(irq)); ++ send_IPI_mask(mask, vector); ++ spin_unlock_irqrestore(&vector_lock, flags); + + return 1; + } +@@ -1703,32 +1347,47 @@ static int ioapic_retrigger(unsigned int + * races. + */ + +-static struct hw_interrupt_type ioapic_edge_type __read_mostly = { +- .typename = "IO-APIC-edge", +- .startup = startup_edge_ioapic, +- .shutdown = shutdown_edge_ioapic, +- .enable = enable_edge_ioapic, +- .disable = disable_edge_ioapic, +- .ack = ack_edge_ioapic, +- .end = end_edge_ioapic, +-#ifdef CONFIG_SMP +- .set_affinity = set_ioapic_affinity, ++static void ack_apic_edge(unsigned int irq) ++{ ++ move_native_irq(irq); ++ ack_APIC_irq(); ++} ++ ++static void ack_apic_level(unsigned int irq) ++{ ++ int do_unmask_irq = 0; ++ ++#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE) ++ /* If we are moving the irq we need to mask it */ ++ if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { ++ do_unmask_irq = 1; ++ mask_IO_APIC_irq(irq); ++ } + #endif +- .retrigger = ioapic_retrigger, +-}; + +-static struct hw_interrupt_type ioapic_level_type __read_mostly = { +- .typename = "IO-APIC-level", +- .startup = startup_level_ioapic, +- .shutdown = shutdown_level_ioapic, +- .enable = enable_level_ioapic, +- .disable = disable_level_ioapic, +- .ack = mask_and_ack_level_ioapic, +- .end = end_level_ioapic, ++ /* ++ * We must acknowledge the irq before we move it or the acknowledge will ++ * not propogate properly. ++ */ ++ ack_APIC_irq(); ++ ++ /* Now we can move and renable the irq */ ++ move_masked_irq(irq); ++ if (unlikely(do_unmask_irq)) ++ unmask_IO_APIC_irq(irq); ++} ++ ++static struct irq_chip ioapic_chip __read_mostly = { ++ .name = "IO-APIC", ++ .startup = startup_ioapic_irq, ++ .mask = mask_IO_APIC_irq, ++ .unmask = unmask_IO_APIC_irq, ++ .ack = ack_apic_edge, ++ .eoi = ack_apic_level, + #ifdef CONFIG_SMP +- .set_affinity = set_ioapic_affinity, ++ .set_affinity = set_ioapic_affinity_irq, + #endif +- .retrigger = ioapic_retrigger, ++ .retrigger = ioapic_retrigger_irq, + }; + #endif /* !CONFIG_XEN */ + +@@ -1749,12 +1408,7 @@ static inline void init_IO_APIC_traps(vo + */ + for (irq = 0; irq < NR_IRQS ; irq++) { + int tmp = irq; +- if (use_pci_vector()) { +- if (!platform_legacy_irq(tmp)) +- if ((tmp = vector_to_irq(tmp)) == -1) +- continue; +- } +- if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) { ++ if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 +@@ -1765,7 +1419,7 @@ static inline void init_IO_APIC_traps(vo + #ifndef CONFIG_XEN + else + /* Strange. Oh, well.. */ +- irq_desc[irq].chip = &no_irq_type; ++ irq_desc[irq].chip = &no_irq_chip; + #endif + } + } +@@ -1886,8 +1540,6 @@ static inline void unlock_ExtINT_logic(v + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +-int timer_uses_ioapic_pin_0; +- + /* + * This code may look a bit paranoid, but it's supposed to cooperate with + * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ +@@ -1900,13 +1552,13 @@ static inline void check_timer(void) + { + int apic1, pin1, apic2, pin2; + int vector; ++ cpumask_t mask; + + /* + * get/set the timer IRQ vector: + */ + disable_8259A_irq(0); +- vector = assign_irq_vector(0); +- set_intr_gate(vector, interrupt[0]); ++ vector = assign_irq_vector(0, TARGET_CPUS, &mask); + + /* + * Subtle, code in do_timer_interrupt() expects an AEOI +@@ -1925,9 +1577,6 @@ static inline void check_timer(void) + pin2 = ioapic_i8259.pin; + apic2 = ioapic_i8259.apic; + +- if (pin1 == 0) +- timer_uses_ioapic_pin_0 = 1; +- + apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", + vector, apic1, pin1, apic2, pin2); + +@@ -2042,11 +1691,6 @@ void __init setup_IO_APIC(void) + + apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); + +- /* +- * Set up the IO-APIC IRQ routing table. +- */ +- if (!acpi_ioapic) +- setup_ioapic_ids_from_mpc(); + #ifndef CONFIG_XEN + sync_Arb_IDs(); + #endif /* !CONFIG_XEN */ +@@ -2067,17 +1711,12 @@ static int ioapic_suspend(struct sys_dev + { + struct IO_APIC_route_entry *entry; + struct sysfs_ioapic_data *data; +- unsigned long flags; + int i; + + data = container_of(dev, struct sysfs_ioapic_data, dev); + entry = data->entry; +- spin_lock_irqsave(&ioapic_lock, flags); +- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { +- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); +- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); +- } +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) ++ *entry = ioapic_read_entry(dev->id, i); + + return 0; + } +@@ -2099,11 +1738,9 @@ static int ioapic_resume(struct sys_devi + reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; + io_apic_write(dev->id, 0, reg_00.raw); + } +- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { +- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); +- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); +- } + spin_unlock_irqrestore(&ioapic_lock, flags); ++ for (i = 0; i < nr_ioapic_registers[dev->id]; i++) ++ ioapic_write_entry(dev->id, i, entry[i]); + + return 0; + } +@@ -2149,26 +1786,254 @@ static int __init ioapic_init_sysfs(void + + device_initcall(ioapic_init_sysfs); + +-/* -------------------------------------------------------------------------- +- ACPI-based IOAPIC Configuration +- -------------------------------------------------------------------------- */ ++#ifndef CONFIG_XEN ++/* ++ * Dynamic irq allocate and deallocation ++ */ ++int create_irq(void) ++{ ++ /* Allocate an unused irq */ ++ int irq; ++ int new; ++ int vector = 0; ++ unsigned long flags; ++ cpumask_t mask; + +-#ifdef CONFIG_ACPI ++ irq = -ENOSPC; ++ spin_lock_irqsave(&vector_lock, flags); ++ for (new = (NR_IRQS - 1); new >= 0; new--) { ++ if (platform_legacy_irq(new)) ++ continue; ++ if (irq_vector[new] != 0) ++ continue; ++ vector = __assign_irq_vector(new, TARGET_CPUS, &mask); ++ if (likely(vector > 0)) ++ irq = new; ++ break; ++ } ++ spin_unlock_irqrestore(&vector_lock, flags); + +-#define IO_APIC_MAX_ID 0xFE ++ if (irq >= 0) { ++ dynamic_irq_init(irq); ++ } ++ return irq; ++} + +-int __init io_apic_get_version (int ioapic) ++void destroy_irq(unsigned int irq) + { +- union IO_APIC_reg_01 reg_01; + unsigned long flags; + +- spin_lock_irqsave(&ioapic_lock, flags); +- reg_01.raw = io_apic_read(ioapic, 1); +- spin_unlock_irqrestore(&ioapic_lock, flags); ++ dynamic_irq_cleanup(irq); ++ ++ spin_lock_irqsave(&vector_lock, flags); ++ irq_vector[irq] = 0; ++ spin_unlock_irqrestore(&vector_lock, flags); ++} ++#endif ++ ++/* ++ * MSI mesage composition ++ */ ++#ifdef CONFIG_PCI_MSI ++static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) ++{ ++ int vector; ++ unsigned dest; ++ cpumask_t tmp; ++ ++ vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); ++ if (vector >= 0) { ++ dest = cpu_mask_to_apicid(tmp); ++ ++ msg->address_hi = MSI_ADDR_BASE_HI; ++ msg->address_lo = ++ MSI_ADDR_BASE_LO | ++ ((INT_DEST_MODE == 0) ? ++ MSI_ADDR_DEST_MODE_PHYSICAL: ++ MSI_ADDR_DEST_MODE_LOGICAL) | ++ ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ MSI_ADDR_REDIRECTION_CPU: ++ MSI_ADDR_REDIRECTION_LOWPRI) | ++ MSI_ADDR_DEST_ID(dest); ++ ++ msg->data = ++ MSI_DATA_TRIGGER_EDGE | ++ MSI_DATA_LEVEL_ASSERT | ++ ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ MSI_DATA_DELIVERY_FIXED: ++ MSI_DATA_DELIVERY_LOWPRI) | ++ MSI_DATA_VECTOR(vector); ++ } ++ return vector; ++} ++ ++#ifdef CONFIG_SMP ++static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) ++{ ++ struct msi_msg msg; ++ unsigned int dest; ++ cpumask_t tmp; ++ int vector; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ tmp = TARGET_CPUS; ++ ++ cpus_and(mask, tmp, CPU_MASK_ALL); ++ ++ vector = assign_irq_vector(irq, mask, &tmp); ++ if (vector < 0) ++ return; ++ ++ dest = cpu_mask_to_apicid(tmp); ++ ++ read_msi_msg(irq, &msg); ++ ++ msg.data &= ~MSI_DATA_VECTOR_MASK; ++ msg.data |= MSI_DATA_VECTOR(vector); ++ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; ++ msg.address_lo |= MSI_ADDR_DEST_ID(dest); ++ ++ write_msi_msg(irq, &msg); ++ set_native_irq_info(irq, mask); ++} ++#endif /* CONFIG_SMP */ ++ ++/* ++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, ++ * which implement the MSI or MSI-X Capability Structure. ++ */ ++static struct irq_chip msi_chip = { ++ .name = "PCI-MSI", ++ .unmask = unmask_msi_irq, ++ .mask = mask_msi_irq, ++ .ack = ack_apic_edge, ++#ifdef CONFIG_SMP ++ .set_affinity = set_msi_irq_affinity, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++ ++int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev) ++{ ++ struct msi_msg msg; ++ int ret; ++ ret = msi_compose_msg(dev, irq, &msg); ++ if (ret < 0) ++ return ret; ++ ++ write_msi_msg(irq, &msg); ++ ++ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); ++ ++ return 0; ++} ++ ++void arch_teardown_msi_irq(unsigned int irq) ++{ ++ return; ++} ++ ++#endif /* CONFIG_PCI_MSI */ ++ ++/* ++ * Hypertransport interrupt support ++ */ ++#ifdef CONFIG_HT_IRQ ++ ++#ifdef CONFIG_SMP ++ ++static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) ++{ ++ struct ht_irq_msg msg; ++ fetch_ht_irq_msg(irq, &msg); ++ ++ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); ++ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); + +- return reg_01.bits.version; ++ msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); ++ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); ++ ++ write_ht_irq_msg(irq, &msg); + } + ++static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) ++{ ++ unsigned int dest; ++ cpumask_t tmp; ++ int vector; ++ ++ cpus_and(tmp, mask, cpu_online_map); ++ if (cpus_empty(tmp)) ++ tmp = TARGET_CPUS; ++ ++ cpus_and(mask, tmp, CPU_MASK_ALL); ++ ++ vector = assign_irq_vector(irq, mask, &tmp); ++ if (vector < 0) ++ return; ++ ++ dest = cpu_mask_to_apicid(tmp); ++ ++ target_ht_irq(irq, dest, vector); ++ set_native_irq_info(irq, mask); ++} ++#endif ++ ++static struct irq_chip ht_irq_chip = { ++ .name = "PCI-HT", ++ .mask = mask_ht_irq, ++ .unmask = unmask_ht_irq, ++ .ack = ack_apic_edge, ++#ifdef CONFIG_SMP ++ .set_affinity = set_ht_irq_affinity, ++#endif ++ .retrigger = ioapic_retrigger_irq, ++}; ++ ++int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) ++{ ++ int vector; ++ cpumask_t tmp; ++ ++ vector = assign_irq_vector(irq, TARGET_CPUS, &tmp); ++ if (vector >= 0) { ++ struct ht_irq_msg msg; ++ unsigned dest; ++ ++ dest = cpu_mask_to_apicid(tmp); ++ ++ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); ++ ++ msg.address_lo = ++ HT_IRQ_LOW_BASE | ++ HT_IRQ_LOW_DEST_ID(dest) | ++ HT_IRQ_LOW_VECTOR(vector) | ++ ((INT_DEST_MODE == 0) ? ++ HT_IRQ_LOW_DM_PHYSICAL : ++ HT_IRQ_LOW_DM_LOGICAL) | ++ HT_IRQ_LOW_RQEOI_EDGE | ++ ((INT_DELIVERY_MODE != dest_LowestPrio) ? ++ HT_IRQ_LOW_MT_FIXED : ++ HT_IRQ_LOW_MT_ARBITRATED) | ++ HT_IRQ_LOW_IRQ_MASKED; ++ ++ write_ht_irq_msg(irq, &msg); ++ ++ set_irq_chip_and_handler_name(irq, &ht_irq_chip, ++ handle_edge_irq, "edge"); ++ } ++ return vector; ++} ++#endif /* CONFIG_HT_IRQ */ ++ ++/* -------------------------------------------------------------------------- ++ ACPI-based IOAPIC Configuration ++ -------------------------------------------------------------------------- */ ++ ++#ifdef CONFIG_ACPI ++ ++#define IO_APIC_MAX_ID 0xFE + + int __init io_apic_get_redir_entries (int ioapic) + { +@@ -2187,6 +2052,8 @@ int io_apic_set_pci_routing (int ioapic, + { + struct IO_APIC_route_entry entry; + unsigned long flags; ++ int vector; ++ cpumask_t mask; + + if (!IO_APIC_IRQ(irq)) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", +@@ -2195,6 +2062,17 @@ int io_apic_set_pci_routing (int ioapic, + } + + /* ++ * IRQs < 16 are already in the irq_2_pin[] map ++ */ ++ if (irq >= 16) ++ add_pin_to_irq(irq, ioapic, pin); ++ ++ ++ vector = assign_irq_vector(irq, TARGET_CPUS, &mask); ++ if (vector < 0) ++ return vector; ++ ++ /* + * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. + * Note that we mask (disable) IRQs now -- these get enabled when the + * corresponding device driver registers for this IRQ. +@@ -2204,19 +2082,11 @@ int io_apic_set_pci_routing (int ioapic, + + entry.delivery_mode = INT_DELIVERY_MODE; + entry.dest_mode = INT_DEST_MODE; +- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); ++ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); + entry.trigger = edge_level; + entry.polarity = active_high_low; + entry.mask = 1; /* Disabled (masked) */ +- +- irq = gsi_irq_sharing(irq); +- /* +- * IRQs < 16 are already in the irq_2_pin[] map +- */ +- if (irq >= 16) +- add_pin_to_irq(irq, ioapic, pin); +- +- entry.vector = assign_irq_vector(irq); ++ entry.vector = vector & 0xff; + + apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> " + "IRQ %d Mode:%i Active:%i)\n", ioapic, +@@ -2228,10 +2098,10 @@ int io_apic_set_pci_routing (int ioapic, + if (!ioapic && (irq < 16)) + disable_8259A_irq(irq); + ++ ioapic_write_entry(ioapic, pin, entry); ++ + spin_lock_irqsave(&ioapic_lock, flags); +- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); +- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); +- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); ++ set_native_irq_info(irq, TARGET_CPUS); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return 0; +Index: 10.3-2007-11-26/arch/x86_64/kernel/ioport-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/ioport-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/ioport-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -58,6 +58,7 @@ asmlinkage long sys_ioperm(unsigned long + + memset(bitmap, 0xff, IO_BITMAP_BYTES); + t->io_bitmap_ptr = bitmap; ++ set_thread_flag(TIF_IO_BITMAP); + + set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap); + set_iobitmap.nr_ports = IO_BITMAP_BITS; +Index: 10.3-2007-11-26/arch/x86_64/kernel/irq-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/irq-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/irq-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -20,11 +20,6 @@ + #include <asm/idle.h> + + atomic_t irq_err_count; +-#ifdef CONFIG_X86_IO_APIC +-#ifdef APIC_MISMATCH_DEBUG +-atomic_t irq_mis_count; +-#endif +-#endif + + #ifdef CONFIG_DEBUG_STACKOVERFLOW + /* +@@ -79,7 +74,8 @@ int show_interrupts(struct seq_file *p, + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + #endif +- seq_printf(p, " %14s", irq_desc[i].chip->typename); ++ seq_printf(p, " %8s", irq_desc[i].chip->name); ++ seq_printf(p, "-%-8s", irq_desc[i].name); + + seq_printf(p, " %s", action->name); + for (action=action->next; action; action = action->next) +@@ -99,11 +95,6 @@ skip: + seq_putc(p, '\n'); + #endif + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +-#ifdef CONFIG_X86_IO_APIC +-#ifdef APIC_MISMATCH_DEBUG +- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +-#endif +-#endif + } + return 0; + } +@@ -114,24 +105,28 @@ skip: + * handlers). + */ + asmlinkage unsigned int do_IRQ(struct pt_regs *regs) +-{ ++{ ++ struct pt_regs *old_regs = set_irq_regs(regs); ++ + /* high bit used in ret_from_ code */ + unsigned irq = ~regs->orig_rax; + +- if (unlikely(irq >= NR_IRQS)) { +- printk(KERN_EMERG "%s: cannot handle IRQ %d\n", +- __FUNCTION__, irq); +- BUG(); +- } +- + exit_idle(); + irq_enter(); ++ + #ifdef CONFIG_DEBUG_STACKOVERFLOW + stack_overflow_check(regs); + #endif +- __do_IRQ(irq, regs); ++ ++ if (likely(irq < NR_IRQS)) ++ generic_handle_irq(irq); ++ else ++ printk(KERN_EMERG "%s: %d.%d No irq handler for irq\n", ++ __func__, smp_processor_id(), irq); ++ + irq_exit(); + ++ set_irq_regs(old_regs); + return 1; + } + +@@ -192,6 +187,6 @@ EXPORT_SYMBOL(do_softirq); + */ + void ack_bad_irq(unsigned int irq) + { +- printk("unexpected IRQ trap at vector %02x\n", irq); ++ printk("unexpected IRQ trap at irq %02x\n", irq); + } + #endif +Index: 10.3-2007-11-26/arch/x86_64/kernel/machine_kexec.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/machine_kexec.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/machine_kexec.c 2007-10-22 13:53:08.000000000 +0200 +@@ -286,6 +286,7 @@ NORET_TYPE void machine_kexec(struct kim + */ + static int __init setup_crashkernel(char *arg) + { ++#ifndef CONFIG_XEN + unsigned long size, base; + char *p; + if (!arg) +@@ -301,6 +302,10 @@ static int __init setup_crashkernel(char + crashk_res.start = base; + crashk_res.end = base + size - 1; + } ++#else ++ printk("Ignoring crashkernel command line, " ++ "parameter will be supplied by xen\n"); ++#endif + return 0; + } + early_param("crashkernel", setup_crashkernel); +Index: 10.3-2007-11-26/arch/x86_64/kernel/mpparse-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/mpparse-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/mpparse-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -41,8 +41,7 @@ int acpi_found_madt; + * Various Linux-internal data structures created from the + * MP-table. + */ +-unsigned char apic_version [MAX_APICS]; +-unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; ++DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; + + static int mp_current_pci_id = 0; +@@ -56,7 +55,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ + int mp_irq_entries; + + int nr_ioapics; +-int pic_mode; + unsigned long mp_lapic_addr = 0; + + +@@ -71,19 +69,6 @@ unsigned disabled_cpus __initdata; + /* Bitmask of physically existing CPUs */ + physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; + +-/* ACPI MADT entry parsing functions */ +-#ifdef CONFIG_ACPI +-extern struct acpi_boot_flags acpi_boot; +-#ifdef CONFIG_X86_LOCAL_APIC +-extern int acpi_parse_lapic (acpi_table_entry_header *header); +-extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header); +-extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header); +-#endif /*CONFIG_X86_LOCAL_APIC*/ +-#ifdef CONFIG_X86_IO_APIC +-extern int acpi_parse_ioapic (acpi_table_entry_header *header); +-#endif /*CONFIG_X86_IO_APIC*/ +-#endif /*CONFIG_ACPI*/ +- + u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; + + +@@ -109,24 +94,20 @@ static int __init mpf_checksum(unsigned + static void __cpuinit MP_processor_info (struct mpc_config_processor *m) + { + int cpu; +- unsigned char ver; + cpumask_t tmp_map; ++ char *bootup_cpu = ""; + + if (!(m->mpc_cpuflag & CPU_ENABLED)) { + disabled_cpus++; + return; + } +- +- printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", +- m->mpc_apicid, +- (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8, +- (m->mpc_cpufeature & CPU_MODEL_MASK)>>4, +- m->mpc_apicver); +- + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { +- Dprintk(" Bootup CPU\n"); ++ bootup_cpu = " (Bootup-CPU)"; + boot_cpu_id = m->mpc_apicid; + } ++ ++ printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu); ++ + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); +@@ -137,24 +118,7 @@ static void __cpuinit MP_processor_info + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + +-#if MAX_APICS < 255 +- if ((int)m->mpc_apicid > MAX_APICS) { +- printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", +- m->mpc_apicid, MAX_APICS); +- return; +- } +-#endif +- ver = m->mpc_apicver; +- + physid_set(m->mpc_apicid, phys_cpu_present_map); +- /* +- * Validate version +- */ +- if (ver == 0x0) { +- printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); +- ver = 0x10; +- } +- apic_version[m->mpc_apicid] = ver; + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + /* + * bios_cpu_apicid is required to have processors listed +@@ -185,37 +149,42 @@ static void __init MP_bus_info (struct m + Dprintk("Bus #%d is %s\n", m->mpc_busid, str); + + if (strncmp(str, "ISA", 3) == 0) { +- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; +- } else if (strncmp(str, "EISA", 4) == 0) { +- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; ++ set_bit(m->mpc_busid, mp_bus_not_pci); + } else if (strncmp(str, "PCI", 3) == 0) { +- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; ++ clear_bit(m->mpc_busid, mp_bus_not_pci); + mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; + mp_current_pci_id++; +- } else if (strncmp(str, "MCA", 3) == 0) { +- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA; + } else { + printk(KERN_ERR "Unknown bustype %s\n", str); + } + } + ++static int bad_ioapic(unsigned long address) ++{ ++ if (nr_ioapics >= MAX_IO_APICS) { ++ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " ++ "(found %d)\n", MAX_IO_APICS, nr_ioapics); ++ panic("Recompile kernel with bigger MAX_IO_APICS!\n"); ++ } ++ if (!address) { ++ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" ++ " found in table, skipping!\n"); ++ return 1; ++ } ++ return 0; ++} ++ + static void __init MP_ioapic_info (struct mpc_config_ioapic *m) + { + if (!(m->mpc_flags & MPC_APIC_USABLE)) + return; + +- printk("I/O APIC #%d Version %d at 0x%X.\n", +- m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); +- if (nr_ioapics >= MAX_IO_APICS) { +- printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n", +- MAX_IO_APICS, nr_ioapics); +- panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); +- } +- if (!m->mpc_apicaddr) { +- printk(KERN_ERR "WARNING: bogus zero I/O APIC address" +- " found in MP table, skipping!\n"); ++ printk("I/O APIC #%d at 0x%X.\n", ++ m->mpc_apicid, m->mpc_apicaddr); ++ ++ if (bad_ioapic(m->mpc_apicaddr)) + return; +- } ++ + mp_ioapics[nr_ioapics] = *m; + nr_ioapics++; + } +@@ -239,19 +208,6 @@ static void __init MP_lintsrc_info (stru + m->mpc_irqtype, m->mpc_irqflag & 3, + (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, + m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); +- /* +- * Well it seems all SMP boards in existence +- * use ExtINT/LVT1 == LINT0 and +- * NMI/LVT2 == LINT1 - the following check +- * will show us if this assumptions is false. +- * Until then we do not have to add baggage. +- */ +- if ((m->mpc_irqtype == mp_ExtINT) && +- (m->mpc_destapiclint != 0)) +- BUG(); +- if ((m->mpc_irqtype == mp_NMI) && +- (m->mpc_destapiclint != 1)) +- BUG(); + } + + /* +@@ -265,7 +221,7 @@ static int __init smp_read_mpc(struct mp + unsigned char *mpt=((unsigned char *)mpc)+count; + + if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) { +- printk("SMP mptable: bad signature [%c%c%c%c]!\n", ++ printk("MPTABLE: bad signature [%c%c%c%c]!\n", + mpc->mpc_signature[0], + mpc->mpc_signature[1], + mpc->mpc_signature[2], +@@ -273,31 +229,31 @@ static int __init smp_read_mpc(struct mp + return 0; + } + if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) { +- printk("SMP mptable: checksum error!\n"); ++ printk("MPTABLE: checksum error!\n"); + return 0; + } + if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) { +- printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n", ++ printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n", + mpc->mpc_spec); + return 0; + } + if (!mpc->mpc_lapic) { +- printk(KERN_ERR "SMP mptable: null local APIC address!\n"); ++ printk(KERN_ERR "MPTABLE: null local APIC address!\n"); + return 0; + } + memcpy(str,mpc->mpc_oem,8); +- str[8]=0; +- printk(KERN_INFO "OEM ID: %s ",str); ++ str[8] = 0; ++ printk(KERN_INFO "MPTABLE: OEM ID: %s ",str); + + memcpy(str,mpc->mpc_productid,12); +- str[12]=0; +- printk("Product ID: %s ",str); ++ str[12] = 0; ++ printk("MPTABLE: Product ID: %s ",str); + +- printk("APIC at: 0x%X\n",mpc->mpc_lapic); ++ printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic); + + /* save the local APIC address, it might be non-default */ + if (!acpi_lapic) +- mp_lapic_addr = mpc->mpc_lapic; ++ mp_lapic_addr = mpc->mpc_lapic; + + /* + * Now process the configuration blocks. +@@ -309,7 +265,7 @@ static int __init smp_read_mpc(struct mp + struct mpc_config_processor *m= + (struct mpc_config_processor *)mpt; + if (!acpi_lapic) +- MP_processor_info(m); ++ MP_processor_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; +@@ -328,8 +284,8 @@ static int __init smp_read_mpc(struct mp + struct mpc_config_ioapic *m= + (struct mpc_config_ioapic *)mpt; + MP_ioapic_info(m); +- mpt+=sizeof(*m); +- count+=sizeof(*m); ++ mpt += sizeof(*m); ++ count += sizeof(*m); + break; + } + case MP_INTSRC: +@@ -338,8 +294,8 @@ static int __init smp_read_mpc(struct mp + (struct mpc_config_intsrc *)mpt; + + MP_intsrc_info(m); +- mpt+=sizeof(*m); +- count+=sizeof(*m); ++ mpt += sizeof(*m); ++ count += sizeof(*m); + break; + } + case MP_LINTSRC: +@@ -347,15 +303,15 @@ static int __init smp_read_mpc(struct mp + struct mpc_config_lintsrc *m= + (struct mpc_config_lintsrc *)mpt; + MP_lintsrc_info(m); +- mpt+=sizeof(*m); +- count+=sizeof(*m); ++ mpt += sizeof(*m); ++ count += sizeof(*m); + break; + } + } + } + clustered_apic_check(); + if (!num_processors) +- printk(KERN_ERR "SMP mptable: no processors registered!\n"); ++ printk(KERN_ERR "MPTABLE: no processors registered!\n"); + return num_processors; + } + +@@ -451,13 +407,10 @@ static inline void __init construct_defa + * 2 CPUs, numbered 0 & 1. + */ + processor.mpc_type = MP_PROCESSOR; +- /* Either an integrated APIC or a discrete 82489DX. */ +- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ processor.mpc_apicver = 0; + processor.mpc_cpuflag = CPU_ENABLED; +- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | +- (boot_cpu_data.x86_model << 4) | +- boot_cpu_data.x86_mask; +- processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; ++ processor.mpc_cpufeature = 0; ++ processor.mpc_featureflag = 0; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + for (i = 0; i < 2; i++) { +@@ -476,14 +429,6 @@ static inline void __init construct_defa + case 5: + memcpy(bus.mpc_bustype, "ISA ", 6); + break; +- case 2: +- case 6: +- case 3: +- memcpy(bus.mpc_bustype, "EISA ", 6); +- break; +- case 4: +- case 7: +- memcpy(bus.mpc_bustype, "MCA ", 6); + } + MP_bus_info(&bus); + if (mpc_default_type > 4) { +@@ -494,7 +439,7 @@ static inline void __init construct_defa + + ioapic.mpc_type = MP_IOAPIC; + ioapic.mpc_apicid = 2; +- ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01; ++ ioapic.mpc_apicver = 0; + ioapic.mpc_flags = MPC_APIC_USABLE; + ioapic.mpc_apicaddr = 0xFEC00000; + MP_ioapic_info(&ioapic); +@@ -537,13 +482,6 @@ void __init get_smp_config (void) + printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n"); + + printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); +- if (mpf->mpf_feature2 & (1<<7)) { +- printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); +- pic_mode = 1; +- } else { +- printk(KERN_INFO " Virtual Wire compatibility mode.\n"); +- pic_mode = 0; +- } + + /* + * Now see if we need to read further. +@@ -620,7 +558,7 @@ static int __init smp_scan_config (unsig + return 0; + } + +-void __init find_intel_smp (void) ++void __init find_smp_config(void) + { + unsigned int address; + +@@ -637,9 +575,7 @@ void __init find_intel_smp (void) + smp_scan_config(0xF0000,0x10000)) + return; + /* +- * If it is an SMP machine we should know now, unless the +- * configuration is in an EISA/MCA bus machine with an +- * extended bios data area. ++ * If it is an SMP machine we should know now. + * + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E, calculate and scan it here. +@@ -660,64 +596,38 @@ void __init find_intel_smp (void) + printk(KERN_INFO "No mptable found.\n"); + } + +-/* +- * - Intel MP Configuration Table +- */ +-void __init find_smp_config (void) +-{ +-#ifdef CONFIG_X86_LOCAL_APIC +- find_intel_smp(); +-#endif +-} +- +- + /* -------------------------------------------------------------------------- + ACPI-based MP Configuration + -------------------------------------------------------------------------- */ + + #ifdef CONFIG_ACPI + +-void __init mp_register_lapic_address ( +- u64 address) ++void __init mp_register_lapic_address(u64 address) + { + #ifndef CONFIG_XEN + mp_lapic_addr = (unsigned long) address; +- + set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); +- + if (boot_cpu_id == -1U) + boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID)); +- +- Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); + #endif + } + +- +-void __cpuinit mp_register_lapic ( +- u8 id, +- u8 enabled) ++void __cpuinit mp_register_lapic (u8 id, u8 enabled) + { + struct mpc_config_processor processor; + int boot_cpu = 0; + +- if (id >= MAX_APICS) { +- printk(KERN_WARNING "Processor #%d invalid (max %d)\n", +- id, MAX_APICS); +- return; +- } +- +- if (id == boot_cpu_physical_apicid) ++ if (id == boot_cpu_id) + boot_cpu = 1; + + #ifndef CONFIG_XEN + processor.mpc_type = MP_PROCESSOR; + processor.mpc_apicid = id; +- processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); ++ processor.mpc_apicver = 0; + processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0); + processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0); +- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | +- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask; +- processor.mpc_featureflag = boot_cpu_data.x86_capability[0]; ++ processor.mpc_cpufeature = 0; ++ processor.mpc_featureflag = 0; + processor.mpc_reserved[0] = 0; + processor.mpc_reserved[1] = 0; + #endif +@@ -725,8 +635,6 @@ void __cpuinit mp_register_lapic ( + MP_processor_info(&processor); + } + +-#ifdef CONFIG_X86_IO_APIC +- + #define MP_ISA_BUS 0 + #define MP_MAX_IOAPIC_PIN 127 + +@@ -737,11 +645,9 @@ static struct mp_ioapic_routing { + u32 pin_programmed[4]; + } mp_ioapic_routing[MAX_IO_APICS]; + +- +-static int mp_find_ioapic ( +- int gsi) ++static int mp_find_ioapic(int gsi) + { +- int i = 0; ++ int i = 0; + + /* Find the IOAPIC that manages this GSI. */ + for (i = 0; i < nr_ioapics; i++) { +@@ -751,28 +657,15 @@ static int mp_find_ioapic ( + } + + printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi); +- + return -1; + } +- + +-void __init mp_register_ioapic ( +- u8 id, +- u32 address, +- u32 gsi_base) ++void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) + { +- int idx = 0; ++ int idx = 0; + +- if (nr_ioapics >= MAX_IO_APICS) { +- printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " +- "(found %d)\n", MAX_IO_APICS, nr_ioapics); +- panic("Recompile kernel with bigger MAX_IO_APICS!\n"); +- } +- if (!address) { +- printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" +- " found in MADT table, skipping!\n"); ++ if (bad_ioapic(address)) + return; +- } + + idx = nr_ioapics++; + +@@ -784,7 +677,7 @@ void __init mp_register_ioapic ( + set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); + #endif + mp_ioapics[idx].mpc_apicid = id; +- mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); ++ mp_ioapics[idx].mpc_apicver = 0; + + /* + * Build basic IRQ lookup table to facilitate gsi->io_apic lookups +@@ -795,21 +688,15 @@ void __init mp_register_ioapic ( + mp_ioapic_routing[idx].gsi_end = gsi_base + + io_apic_get_redir_entries(idx); + +- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " ++ printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, " + "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, +- mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, ++ mp_ioapics[idx].mpc_apicaddr, + mp_ioapic_routing[idx].gsi_start, + mp_ioapic_routing[idx].gsi_end); +- +- return; + } + +- +-void __init mp_override_legacy_irq ( +- u8 bus_irq, +- u8 polarity, +- u8 trigger, +- u32 gsi) ++void __init ++mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) + { + struct mpc_config_intsrc intsrc; + int ioapic = -1; +@@ -847,22 +734,18 @@ void __init mp_override_legacy_irq ( + mp_irqs[mp_irq_entries] = intsrc; + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!\n"); +- +- return; + } + +- +-void __init mp_config_acpi_legacy_irqs (void) ++void __init mp_config_acpi_legacy_irqs(void) + { + struct mpc_config_intsrc intsrc; +- int i = 0; +- int ioapic = -1; ++ int i = 0; ++ int ioapic = -1; + + /* + * Fabricate the legacy ISA bus (bus #31). + */ +- mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; +- Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); ++ set_bit(MP_ISA_BUS, mp_bus_not_pci); + + /* + * Locate the IOAPIC that manages the ISA IRQs (0-15). +@@ -915,24 +798,13 @@ void __init mp_config_acpi_legacy_irqs ( + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!\n"); + } +- +- return; + } + +-#define MAX_GSI_NUM 4096 +- + int mp_register_gsi(u32 gsi, int triggering, int polarity) + { +- int ioapic = -1; +- int ioapic_pin = 0; +- int idx, bit = 0; +- static int pci_irq = 16; +- /* +- * Mapping between Global System Interrupts, which +- * represent all possible interrupts, to the IRQs +- * assigned to actual devices. +- */ +- static int gsi_to_irq[MAX_GSI_NUM]; ++ int ioapic = -1; ++ int ioapic_pin = 0; ++ int idx, bit = 0; + + if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) + return gsi; +@@ -965,47 +837,14 @@ int mp_register_gsi(u32 gsi, int trigger + if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { + Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", + mp_ioapic_routing[ioapic].apic_id, ioapic_pin); +- return gsi_to_irq[gsi]; ++ return gsi; + } + + mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); + +- if (triggering == ACPI_LEVEL_SENSITIVE) { +- /* +- * For PCI devices assign IRQs in order, avoiding gaps +- * due to unused I/O APIC pins. +- */ +- int irq = gsi; +- if (gsi < MAX_GSI_NUM) { +- /* +- * Retain the VIA chipset work-around (gsi > 15), but +- * avoid a problem where the 8254 timer (IRQ0) is setup +- * via an override (so it's not on pin 0 of the ioapic), +- * and at the same time, the pin 0 interrupt is a PCI +- * type. The gsi > 15 test could cause these two pins +- * to be shared as IRQ0, and they are not shareable. +- * So test for this condition, and if necessary, avoid +- * the pin collision. +- */ +- if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) +- gsi = pci_irq++; +- /* +- * Don't assign IRQ used by ACPI SCI +- */ +- if (gsi == acpi_fadt.sci_int) +- gsi = pci_irq++; +- gsi_to_irq[irq] = gsi; +- } else { +- printk(KERN_ERR "GSI %u is too high\n", gsi); +- return gsi; +- } +- } +- + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, + triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, + polarity == ACPI_ACTIVE_HIGH ? 0 : 1); + return gsi; + } +- +-#endif /*CONFIG_X86_IO_APIC*/ + #endif /*CONFIG_ACPI*/ +Index: 10.3-2007-11-26/arch/x86_64/kernel/pci-swiotlb-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/pci-swiotlb-xen.c 2007-05-31 14:39:08.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/pci-swiotlb-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -3,7 +3,8 @@ + #include <linux/pci.h> + #include <linux/cache.h> + #include <linux/module.h> +-#include <asm/dma-mapping.h> ++#include <linux/dma-mapping.h> ++ + #include <asm/proto.h> + #include <asm/swiotlb.h> + #include <asm/dma.h> +Index: 10.3-2007-11-26/arch/x86_64/kernel/process-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/process-xen.c 2007-05-31 14:39:08.000000000 +0200 ++++ 10.3-2007-11-26/arch/x86_64/kernel/process-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -89,25 +89,24 @@ void idle_notifier_unregister(struct not + } + EXPORT_SYMBOL(idle_notifier_unregister); + +-enum idle_state { CPU_IDLE, CPU_NOT_IDLE }; +-static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE; +- + void enter_idle(void) + { +- __get_cpu_var(idle_state) = CPU_IDLE; ++ write_pda(isidle, 1); + atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); + } + + static void __exit_idle(void) + { +- __get_cpu_var(idle_state) = CPU_NOT_IDLE; ++ if (test_and_clear_bit_pda(0, isidle) == 0) ++ return; + atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); + } + + /* Called from interrupts to signify idle end */ + void exit_idle(void) + { +- if (current->pid | read_pda(irqcount)) ++ /* idle loop has pid 0 */ ++ if (current->pid) + return; + __exit_idle(); + } +@@ -184,6 +183,9 @@ void cpu_idle (void) + play_dead(); + enter_idle(); + idle(); ++ /* In many cases the interrupt that ended idle ++ has already called exit_idle. But some idle ++ loops can be woken up without interrupt. */ + __exit_idle(); + } + +@@ -196,7 +198,7 @@ void cpu_idle (void) + void cpu_idle_wait(void) + { + unsigned int cpu, this_cpu = get_cpu(); +- cpumask_t map; ++ cpumask_t map, tmp = current->cpus_allowed; + + set_cpus_allowed(current, cpumask_of_cpu(this_cpu)); + put_cpu(); +@@ -219,6 +221,8 @@ void cpu_idle_wait(void) + } + cpus_and(map, map, cpu_online_map); + } while (!cpus_empty(map)); ++ ++ set_cpus_allowed(current, tmp); + } + EXPORT_SYMBOL_GPL(cpu_idle_wait); + +@@ -250,9 +254,9 @@ void __show_regs(struct pt_regs * regs) + print_modules(); + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), +- system_utsname.release, +- (int)strcspn(system_utsname.version, " "), +- system_utsname.version); ++ init_utsname()->release, ++ (int)strcspn(init_utsname()->version, " "), ++ init_utsname()->version); + printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); + printk_address(regs->rip); + printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, +@@ -310,6 +314,7 @@ void exit_thread(void) + + kfree(t->io_bitmap_ptr); + t->io_bitmap_ptr = NULL; ++ clear_thread_flag(TIF_IO_BITMAP); + /* + * Careful, clear this in the TSS too: + */ +@@ -339,6 +344,7 @@ void flush_thread(void) + if (t->flags & _TIF_IA32) + current_thread_info()->status |= TS_COMPAT; + } ++ t->flags &= ~_TIF_DEBUG; + + tsk->thread.debugreg0 = 0; + tsk->thread.debugreg1 = 0; +@@ -431,7 +437,7 @@ int copy_thread(int nr, unsigned long cl + asm("mov %%es,%0" : "=m" (p->thread.es)); + asm("mov %%ds,%0" : "=m" (p->thread.ds)); + +- if (unlikely(me->thread.io_bitmap_ptr != NULL)) { ++ if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { + p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); + if (!p->thread.io_bitmap_ptr) { + p->thread.io_bitmap_max = 0; +@@ -439,6 +445,7 @@ int copy_thread(int nr, unsigned long cl + } + memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, + IO_BITMAP_BYTES); ++ set_tsk_thread_flag(p, TIF_IO_BITMAP); + } + + /* +@@ -466,6 +473,30 @@ out: + } + + /* ++ * This special macro can be used to load a debugging register ++ */ ++#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) ++ ++static inline void __switch_to_xtra(struct task_struct *prev_p, ++ struct task_struct *next_p) ++{ ++ struct thread_struct *prev, *next; ++ ++ prev = &prev_p->thread, ++ next = &next_p->thread; ++ ++ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { ++ loaddebug(next, 0); ++ loaddebug(next, 1); ++ loaddebug(next, 2); ++ loaddebug(next, 3); ++ /* no 4 and 5 */ ++ loaddebug(next, 6); ++ loaddebug(next, 7); ++ } ++} ++ ++/* + * switch_to(x,y) should switch tasks from x to y. + * + * This could still be optimized: +@@ -487,6 +518,10 @@ __switch_to(struct task_struct *prev_p, + struct physdev_set_iobitmap iobmp_op; + multicall_entry_t _mcl[8], *mcl = _mcl; + ++ /* we're going to use this soon, after a few expensive things */ ++ if (next_p->fpu_counter>5) ++ prefetch(&next->i387.fxsave); ++ + /* + * This is basically '__unlazy_fpu', except that we queue a + * multicall to indicate FPU task switch, rather than +@@ -499,7 +534,8 @@ __switch_to(struct task_struct *prev_p, + mcl->op = __HYPERVISOR_fpu_taskswitch; + mcl->args[0] = 1; + mcl++; +- } ++ } else ++ prev_p->fpu_counter = 0; + + /* + * Reload esp0, LDT and the page table pointer: +@@ -576,21 +612,29 @@ __switch_to(struct task_struct *prev_p, + write_pda(oldrsp, next->userrsp); + write_pda(pcurrent, next_p); + write_pda(kernelstack, +- task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); ++ (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); ++#ifdef CONFIG_CC_STACKPROTECTOR ++ write_pda(stack_canary, next_p->stack_canary); ++ ++ /* ++ * Build time only check to make sure the stack_canary is at ++ * offset 40 in the pda; this is a gcc ABI requirement ++ */ ++ BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40); ++#endif + + /* + * Now maybe reload the debug registers + */ +- if (unlikely(next->debugreg7)) { +- set_debugreg(next->debugreg0, 0); +- set_debugreg(next->debugreg1, 1); +- set_debugreg(next->debugreg2, 2); +- set_debugreg(next->debugreg3, 3); +- /* no 4 and 5 */ +- set_debugreg(next->debugreg6, 6); +- set_debugreg(next->debugreg7, 7); +- } ++ if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW)) ++ __switch_to_xtra(prev_p, next_p); + ++ /* If the task has used fpu the last 5 timeslices, just do a full ++ * restore of the math state immediately to avoid the trap; the ++ * chances of needing FPU soon are obviously high now ++ */ ++ if (next_p->fpu_counter>5) ++ math_state_restore(); + return prev_p; + } + +@@ -810,7 +854,7 @@ int dump_task_regs(struct task_struct *t + + unsigned long arch_align_stack(unsigned long sp) + { +- if (randomize_va_space) ++ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + sp -= get_random_int() % 8192; + return sp & ~0xf; + } +Index: 10.3-2007-11-26/arch/x86_64/kernel/setup-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/setup-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/setup-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -118,16 +118,6 @@ EXPORT_SYMBOL(boot_cpu_data); + + unsigned long mmu_cr4_features; + +-int acpi_disabled; +-EXPORT_SYMBOL(acpi_disabled); +-#ifdef CONFIG_ACPI +-extern int __initdata acpi_ht; +-extern acpi_interrupt_flags acpi_sci_flags; +-int __initdata acpi_force = 0; +-#endif +- +-int acpi_numa __initdata; +- + /* Boot loader ID as an integer, for the benefit of proc_dointvec */ + int bootloader_type; + +@@ -151,10 +141,6 @@ struct sys_desc_table_struct { + + struct edid_info edid_info; + EXPORT_SYMBOL_GPL(edid_info); +-struct e820map e820; +-#ifdef CONFIG_XEN +-struct e820map machine_e820; +-#endif + + extern int root_mountflags; + +@@ -181,9 +167,6 @@ struct resource standard_io_resources[] + .flags = IORESOURCE_BUSY | IORESOURCE_IO } + }; + +-#define STANDARD_IO_RESOURCES \ +- (sizeof standard_io_resources / sizeof standard_io_resources[0]) +- + #define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM) + + struct resource data_resource = { +@@ -230,9 +213,6 @@ static struct resource adapter_rom_resou + .flags = IORESOURCE_ROM } + }; + +-#define ADAPTER_ROM_RESOURCES \ +- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0]) +- + static struct resource video_rom_resource = { + .name = "Video ROM", + .start = 0xc0000, +@@ -309,7 +289,8 @@ static void __init probe_roms(void) + } + + /* check for adapter roms on 2k boundaries */ +- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) { ++ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; ++ start += 2048) { + rom = isa_bus_to_virt(start); + if (!romsignature(rom)) + continue; +@@ -329,186 +310,21 @@ static void __init probe_roms(void) + } + } + +-/* Check for full argument with no trailing characters */ +-static int fullarg(char *p, char *arg) ++#ifdef CONFIG_PROC_VMCORE ++/* elfcorehdr= specifies the location of elf core header ++ * stored by the crashed kernel. This option will be passed ++ * by kexec loader to the capture kernel. ++ */ ++static int __init setup_elfcorehdr(char *arg) + { +- int l = strlen(arg); +- return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l])); ++ char *end; ++ if (!arg) ++ return -EINVAL; ++ elfcorehdr_addr = memparse(arg, &end); ++ return end > arg ? 0 : -EINVAL; + } +- +-static __init void parse_cmdline_early (char ** cmdline_p) +-{ +- char c = ' ', *to = command_line, *from = COMMAND_LINE; +- int len = 0; +- int userdef = 0; +- +- for (;;) { +- if (c != ' ') +- goto next_char; +- +-#ifdef CONFIG_SMP +- /* +- * If the BIOS enumerates physical processors before logical, +- * maxcpus=N at enumeration-time can be used to disable HT. +- */ +- else if (!memcmp(from, "maxcpus=", 8)) { +- extern unsigned int maxcpus; +- +- maxcpus = simple_strtoul(from + 8, NULL, 0); +- } +-#endif +-#ifdef CONFIG_ACPI +- /* "acpi=off" disables both ACPI table parsing and interpreter init */ +- if (fullarg(from,"acpi=off")) +- disable_acpi(); +- +- if (fullarg(from, "acpi=force")) { +- /* add later when we do DMI horrors: */ +- acpi_force = 1; +- acpi_disabled = 0; +- } +- +- /* acpi=ht just means: do ACPI MADT parsing +- at bootup, but don't enable the full ACPI interpreter */ +- if (fullarg(from, "acpi=ht")) { +- if (!acpi_force) +- disable_acpi(); +- acpi_ht = 1; +- } +- else if (fullarg(from, "pci=noacpi")) +- acpi_disable_pci(); +- else if (fullarg(from, "acpi=noirq")) +- acpi_noirq_set(); +- +- else if (fullarg(from, "acpi_sci=edge")) +- acpi_sci_flags.trigger = 1; +- else if (fullarg(from, "acpi_sci=level")) +- acpi_sci_flags.trigger = 3; +- else if (fullarg(from, "acpi_sci=high")) +- acpi_sci_flags.polarity = 1; +- else if (fullarg(from, "acpi_sci=low")) +- acpi_sci_flags.polarity = 3; +- +- /* acpi=strict disables out-of-spec workarounds */ +- else if (fullarg(from, "acpi=strict")) { +- acpi_strict = 1; +- } +-#ifdef CONFIG_X86_IO_APIC +- else if (fullarg(from, "acpi_skip_timer_override")) +- acpi_skip_timer_override = 1; +-#endif +-#endif +- +-#ifndef CONFIG_XEN +- if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) { +- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); +- disable_apic = 1; +- } +- +- if (fullarg(from, "noapic")) +- skip_ioapic_setup = 1; +- +- if (fullarg(from,"apic")) { +- skip_ioapic_setup = 0; +- ioapic_force = 1; +- } ++early_param("elfcorehdr", setup_elfcorehdr); + #endif +- +- if (!memcmp(from, "mem=", 4)) +- parse_memopt(from+4, &from); +- +- if (!memcmp(from, "memmap=", 7)) { +- /* exactmap option is for used defined memory */ +- if (!memcmp(from+7, "exactmap", 8)) { +-#ifdef CONFIG_CRASH_DUMP +- /* If we are doing a crash dump, we +- * still need to know the real mem +- * size before original memory map is +- * reset. +- */ +- saved_max_pfn = e820_end_of_ram(); +-#endif +- from += 8+7; +- end_pfn_map = 0; +- e820.nr_map = 0; +- userdef = 1; +- } +- else { +- parse_memmapopt(from+7, &from); +- userdef = 1; +- } +- } +- +-#ifdef CONFIG_NUMA +- if (!memcmp(from, "numa=", 5)) +- numa_setup(from+5); +-#endif +- +- if (!memcmp(from,"iommu=",6)) { +- iommu_setup(from+6); +- } +- +- if (fullarg(from,"oops=panic")) +- panic_on_oops = 1; +- +- if (!memcmp(from, "noexec=", 7)) +- nonx_setup(from + 7); +- +-#ifdef CONFIG_KEXEC +- /* crashkernel=size@addr specifies the location to reserve for +- * a crash kernel. By reserving this memory we guarantee +- * that linux never set's it up as a DMA target. +- * Useful for holding code to do something appropriate +- * after a kernel panic. +- */ +- else if (!memcmp(from, "crashkernel=", 12)) { +-#ifndef CONFIG_XEN +- unsigned long size, base; +- size = memparse(from+12, &from); +- if (*from == '@') { +- base = memparse(from+1, &from); +- /* FIXME: Do I want a sanity check +- * to validate the memory range? +- */ +- crashk_res.start = base; +- crashk_res.end = base + size - 1; +- } +-#else +- printk("Ignoring crashkernel command line, " +- "parameter will be supplied by xen\n"); +-#endif +- } +-#endif +- +-#ifdef CONFIG_PROC_VMCORE +- /* elfcorehdr= specifies the location of elf core header +- * stored by the crashed kernel. This option will be passed +- * by kexec loader to the capture kernel. +- */ +- else if(!memcmp(from, "elfcorehdr=", 11)) +- elfcorehdr_addr = memparse(from+11, &from); +-#endif +- +-#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN) +- else if (!memcmp(from, "additional_cpus=", 16)) +- setup_additional_cpus(from+16); +-#endif +- +- next_char: +- c = *(from++); +- if (!c) +- break; +- if (COMMAND_LINE_SIZE <= ++len) +- break; +- *(to++) = c; +- } +- if (userdef) { +- printk(KERN_INFO "user-defined physical RAM map:\n"); +- e820_print_map("user"); +- } +- *to = '\0'; +- *cmdline_p = command_line; +-} + + #ifndef CONFIG_NUMA + static void __init +@@ -521,10 +337,11 @@ contig_initmem_init(unsigned long start_ + if (bootmap == -1L) + panic("Cannot find bootmem map of size %ld\n",bootmap_size); + bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); ++ e820_register_active_regions(0, start_pfn, end_pfn); + #ifdef CONFIG_XEN +- e820_bootmem_free(NODE_DATA(0), 0, xen_start_info->nr_pages<<PAGE_SHIFT); ++ free_bootmem_with_active_regions(0, xen_start_info->nr_pages); + #else +- e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT); ++ free_bootmem_with_active_regions(0, end_pfn); + #endif + reserve_bootmem(bootmap, bootmap_size); + } +@@ -587,6 +404,10 @@ static void discover_ebda(void) + void __init setup_arch(char **cmdline_p) + { + #ifdef CONFIG_XEN ++ extern struct e820map machine_e820; ++ ++ printk(KERN_INFO "Command line: %s\n", saved_command_line); ++ + /* Register a call for panic conditions. */ + atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); + +@@ -633,6 +454,8 @@ void __init setup_arch(char **cmdline_p) + + ARCH_SETUP + #else ++ printk(KERN_INFO "Command line: %s\n", saved_command_line); ++ + ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); + screen_info = SCREEN_INFO; + edid_info = EDID_INFO; +@@ -660,16 +483,22 @@ void __init setup_arch(char **cmdline_p) + data_resource.start = virt_to_phys(&_etext); + data_resource.end = virt_to_phys(&_edata)-1; + +- parse_cmdline_early(cmdline_p); +- + early_identify_cpu(&boot_cpu_data); + ++ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); ++ *cmdline_p = command_line; ++ ++ parse_early_param(); ++ ++ finish_e820_parsing(); ++ ++ e820_register_active_regions(0, 0, -1UL); + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ + end_pfn = e820_end_of_ram(); +- num_physpages = end_pfn; /* for pfn_valid */ ++ num_physpages = end_pfn; + + check_efer(); + +@@ -680,6 +509,14 @@ void __init setup_arch(char **cmdline_p) + if (is_initial_xendomain()) + dmi_scan_machine(); + ++ /* How many end-of-memory variables you have, grandma! */ ++ max_low_pfn = end_pfn; ++ max_pfn = end_pfn; ++ high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1; ++ ++ /* Remove active ranges so rediscovery with NUMA-awareness happens */ ++ remove_all_active_ranges(); ++ + #ifdef CONFIG_ACPI_NUMA + /* + * Parse SRAT to discover nodes. +@@ -838,16 +675,16 @@ void __init setup_arch(char **cmdline_p) + + } + ++#ifdef CONFIG_ACPI + if (!is_initial_xendomain()) { + acpi_disabled = 1; +-#ifdef CONFIG_ACPI + acpi_ht = 0; +-#endif + } + #endif ++#endif + +-#ifndef CONFIG_XEN +- check_ioapic(); ++#if defined(CONFIG_PCI) && !defined(CONFIG_XEN) ++ early_quirks(); + #endif + + zap_low_mappings(0); +@@ -907,6 +744,7 @@ void __init setup_arch(char **cmdline_p) + } + #else + e820_reserve_resources(e820.map, e820.nr_map); ++ e820_mark_nosave_regions(); + #endif + + request_resource(&iomem_resource, &video_ram_resource); +@@ -914,7 +752,7 @@ void __init setup_arch(char **cmdline_p) + { + unsigned i; + /* request I/O space for devices used on all i[345]86 PCs */ +- for (i = 0; i < STANDARD_IO_RESOURCES; i++) ++ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) + request_resource(&ioport_resource, &standard_io_resources[i]); + } + +@@ -1099,7 +937,7 @@ static void __init amd_detect_cmp(struct + #endif + } + +-static void __init init_amd(struct cpuinfo_x86 *c) ++static void __cpuinit init_amd(struct cpuinfo_x86 *c) + { + unsigned level; + +@@ -1155,6 +993,12 @@ static void __init init_amd(struct cpuin + + /* Fix cpuid4 emulation for more */ + num_cache_leaves = 3; ++ ++ /* When there is only one core no need to synchronize RDTSC */ ++ if (num_possible_cpus() == 1) ++ set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); ++ else ++ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + } + + static void __cpuinit detect_ht(struct cpuinfo_x86 *c) +@@ -1236,8 +1080,7 @@ static void srat_detect_node(void) + node = first_node(node_online_map); + numa_set_node(cpu, node); + +- if (acpi_numa > 0) +- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); ++ printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); + #endif + } + +@@ -1271,6 +1114,8 @@ static void __cpuinit init_intel(struct + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); ++ if (c->x86 == 6) ++ set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); + set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); + c->x86_max_cores = intel_num_cpu_cores(c); + +@@ -1489,8 +1334,8 @@ static int show_cpuinfo(struct seq_file + + /* Intel-defined (#2) */ + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", +- "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, +- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ++ "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, ++ NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + + /* VIA/Cyrix/Centaur-defined */ +Index: 10.3-2007-11-26/arch/x86_64/kernel/setup64-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/setup64-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/setup64-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -31,7 +31,7 @@ + #include <asm/hypervisor.h> + #endif + +-char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; ++char x86_boot_params[BOOT_PARAM_SIZE] __initdata; + + cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; + +@@ -55,8 +55,10 @@ Control non executable mappings for 64bi + on Enable(default) + off Disable + */ +-int __init nonx_setup(char *str) ++static int __init nonx_setup(char *str) + { ++ if (!str) ++ return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; +@@ -64,9 +66,9 @@ int __init nonx_setup(char *str) + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } +- return 1; ++ return 0; + } +-__setup("noexec=", nonx_setup); /* parsed early actually */ ++early_param("noexec", nonx_setup); + + int force_personality32 = 0; + +@@ -102,12 +104,9 @@ void __init setup_per_cpu_areas(void) + #endif + + /* Copy section for each CPU (we discard the original) */ +- size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); +-#ifdef CONFIG_MODULES +- if (size < PERCPU_ENOUGH_ROOM) +- size = PERCPU_ENOUGH_ROOM; +-#endif ++ size = PERCPU_ENOUGH_ROOM; + ++ printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size); + for_each_cpu_mask (i, cpu_possible_map) { + char *ptr; + +@@ -169,7 +168,10 @@ void pda_init(int cpu) + /* Setup up data that may be needed in __get_free_pages early */ + asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); + #ifndef CONFIG_XEN ++ /* Memory clobbers used to order PDA accessed */ ++ mb(); + wrmsrl(MSR_GS_BASE, pda); ++ mb(); + #else + HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, (unsigned long)pda); + #endif +@@ -234,6 +236,8 @@ void __cpuinit check_efer(void) + } + } + ++unsigned long kernel_eflags; ++ + /* + * cpu_init() initializes state that is per-CPU. Some data is already + * initialized (naturally) in the bootstrap process, such as the GDT +@@ -298,28 +302,17 @@ void __cpuinit cpu_init (void) + * set up and load the per-CPU TSS + */ + for (v = 0; v < N_EXCEPTION_STACKS; v++) { ++ static const unsigned int order[N_EXCEPTION_STACKS] = { ++ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, ++ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER ++ }; + if (cpu) { +- static const unsigned int order[N_EXCEPTION_STACKS] = { +- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, +- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER +- }; +- + estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); + if (!estacks) + panic("Cannot allocate exception stack %ld %d\n", + v, cpu); + } +- switch (v + 1) { +-#if DEBUG_STKSZ > EXCEPTION_STKSZ +- case DEBUG_STACK: +- cpu_pda(cpu)->debugstack = (unsigned long)estacks; +- estacks += DEBUG_STKSZ; +- break; +-#endif +- default: +- estacks += EXCEPTION_STKSZ; +- break; +- } ++ estacks += PAGE_SIZE << order[v]; + orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks; + } + +@@ -358,4 +351,6 @@ void __cpuinit cpu_init (void) + set_debugreg(0UL, 7); + + fpu_init(); ++ ++ raw_local_save_flags(kernel_eflags); + } +Index: 10.3-2007-11-26/arch/x86_64/kernel/smp-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/smp-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/smp-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -394,9 +394,8 @@ int smp_call_function_single (int cpu, v + /* prevent preemption and reschedule on another processor */ + int me = get_cpu(); + if (cpu == me) { +- WARN_ON(1); + put_cpu(); +- return -EBUSY; ++ return 0; + } + spin_lock_bh(&call_lock); + __smp_call_function_single(cpu, func, info, nonatomic, wait); +@@ -526,7 +525,7 @@ void smp_send_stop(void) + #ifndef CONFIG_XEN + asmlinkage void smp_reschedule_interrupt(void) + #else +-asmlinkage irqreturn_t smp_reschedule_interrupt(void) ++asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx) + #endif + { + #ifndef CONFIG_XEN +@@ -539,7 +538,7 @@ asmlinkage irqreturn_t smp_reschedule_in + #ifndef CONFIG_XEN + asmlinkage void smp_call_function_interrupt(void) + #else +-asmlinkage irqreturn_t smp_call_function_interrupt(void) ++asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx) + #endif + { + void (*func) (void *info) = call_data->func; +@@ -570,31 +569,3 @@ asmlinkage irqreturn_t smp_call_function + return IRQ_HANDLED; + #endif + } +- +-int safe_smp_processor_id(void) +-{ +-#ifdef CONFIG_XEN +- return smp_processor_id(); +-#else +- unsigned apicid, i; +- +- if (disable_apic) +- return 0; +- +- apicid = hard_smp_processor_id(); +- if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid) +- return apicid; +- +- for (i = 0; i < NR_CPUS; ++i) { +- if (x86_cpu_to_apicid[i] == apicid) +- return i; +- } +- +- /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI, +- * or called too early. Either way, we must be CPU 0. */ +- if (x86_cpu_to_apicid[0] == BAD_APICID) +- return 0; +- +- return 0; /* Should not happen */ +-#endif +-} +Index: 10.3-2007-11-26/arch/x86_64/kernel/traps-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/traps-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/traps-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -23,6 +23,7 @@ + #include <linux/delay.h> + #include <linux/spinlock.h> + #include <linux/interrupt.h> ++#include <linux/kallsyms.h> + #include <linux/module.h> + #include <linux/moduleparam.h> + #include <linux/nmi.h> +@@ -45,6 +46,7 @@ + #include <asm/pda.h> + #include <asm/proto.h> + #include <asm/nmi.h> ++#include <asm/stacktrace.h> + + asmlinkage void divide_error(void); + asmlinkage void debug(void); +@@ -114,7 +116,6 @@ static int call_trace = 1; + #endif + + #ifdef CONFIG_KALLSYMS +-# include <linux/kallsyms.h> + void printk_address(unsigned long address) + { + unsigned long offset = 0, symsize; +@@ -142,7 +143,7 @@ void printk_address(unsigned long addres + #endif + + static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, +- unsigned *usedp, const char **idp) ++ unsigned *usedp, char **idp) + { + #ifndef CONFIG_X86_NO_TSS + static char ids[][8] = { +@@ -162,26 +163,7 @@ static unsigned long *in_exception_stack + * 'stack' is in one of them: + */ + for (k = 0; k < N_EXCEPTION_STACKS; k++) { +- unsigned long end; +- +- /* +- * set 'end' to the end of the exception stack. +- */ +- switch (k + 1) { +- /* +- * TODO: this block is not needed i think, because +- * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK] +- * properly too. +- */ +-#if DEBUG_STKSZ > EXCEPTION_STKSZ +- case DEBUG_STACK: +- end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; +- break; +-#endif +- default: +- end = per_cpu(orig_ist, cpu).ist[k]; +- break; +- } ++ unsigned long end = per_cpu(orig_ist, cpu).ist[k]; + /* + * Is 'stack' above this exception frame's end? + * If yes then skip to the next frame. +@@ -236,13 +218,19 @@ static unsigned long *in_exception_stack + return NULL; + } + +-static int show_trace_unwind(struct unwind_frame_info *info, void *context) ++struct ops_and_data { ++ struct stacktrace_ops *ops; ++ void *data; ++}; ++ ++static int dump_trace_unwind(struct unwind_frame_info *info, void *context) + { ++ struct ops_and_data *oad = (struct ops_and_data *)context; + int n = 0; + + while (unwind(info) == 0 && UNW_PC(info)) { + n++; +- printk_address(UNW_PC(info)); ++ oad->ops->address(oad->data, UNW_PC(info)); + if (arch_unw_user_mode(info)) + break; + } +@@ -256,13 +244,19 @@ static int show_trace_unwind(struct unwi + * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack + */ + +-void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack) ++static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) + { +- const unsigned cpu = safe_smp_processor_id(); ++ void *t = (void *)tinfo; ++ return p > t && p < t + THREAD_SIZE - 3; ++} ++ ++void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack, ++ struct stacktrace_ops *ops, void *data) ++{ ++ const unsigned cpu = smp_processor_id(); + unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; + unsigned used = 0; +- +- printk("\nCall Trace:\n"); ++ struct thread_info *tinfo; + + if (!tsk) + tsk = current; +@@ -270,32 +264,47 @@ void show_trace(struct task_struct *tsk, + if (call_trace >= 0) { + int unw_ret = 0; + struct unwind_frame_info info; ++ struct ops_and_data oad = { .ops = ops, .data = data }; + + if (regs) { + if (unwind_init_frame_info(&info, tsk, regs) == 0) +- unw_ret = show_trace_unwind(&info, NULL); ++ unw_ret = dump_trace_unwind(&info, &oad); + } else if (tsk == current) +- unw_ret = unwind_init_running(&info, show_trace_unwind, NULL); ++ unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); + else { + if (unwind_init_blocked(&info, tsk) == 0) +- unw_ret = show_trace_unwind(&info, NULL); ++ unw_ret = dump_trace_unwind(&info, &oad); + } + if (unw_ret > 0) { + if (call_trace == 1 && !arch_unw_user_mode(&info)) { +- print_symbol("DWARF2 unwinder stuck at %s\n", ++ ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", + UNW_PC(&info)); + if ((long)UNW_SP(&info) < 0) { +- printk("Leftover inexact backtrace:\n"); ++ ops->warning(data, "Leftover inexact backtrace:\n"); + stack = (unsigned long *)UNW_SP(&info); ++ if (!stack) ++ return; + } else +- printk("Full inexact backtrace again:\n"); ++ ops->warning(data, "Full inexact backtrace again:\n"); + } else if (call_trace >= 1) + return; + else +- printk("Full inexact backtrace again:\n"); ++ ops->warning(data, "Full inexact backtrace again:\n"); + } else +- printk("Inexact backtrace:\n"); ++ ops->warning(data, "Inexact backtrace:\n"); ++ } ++ if (!stack) { ++ unsigned long dummy; ++ stack = &dummy; ++ if (tsk && tsk != current) ++ stack = (unsigned long *)tsk->thread.rsp; + } ++ /* ++ * Align the stack pointer on word boundary, later loops ++ * rely on that (and corruption / debug info bugs can cause ++ * unaligned values here): ++ */ ++ stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1)); + + /* + * Print function call entries within a stack. 'cond' is the +@@ -305,7 +314,9 @@ void show_trace(struct task_struct *tsk, + #define HANDLE_STACK(cond) \ + do while (cond) { \ + unsigned long addr = *stack++; \ +- if (kernel_text_address(addr)) { \ ++ if (oops_in_progress ? \ ++ __kernel_text_address(addr) : \ ++ kernel_text_address(addr)) { \ + /* \ + * If the address is either in the text segment of the \ + * kernel, or in the region which contains vmalloc'ed \ +@@ -314,7 +325,7 @@ void show_trace(struct task_struct *tsk, + * down the cause of the crash will be able to figure \ + * out the call path that was taken. \ + */ \ +- printk_address(addr); \ ++ ops->address(data, addr); \ + } \ + } while (0) + +@@ -323,16 +334,17 @@ void show_trace(struct task_struct *tsk, + * current stack address. If the stacks consist of nested + * exceptions + */ +- for ( ; ; ) { +- const char *id; ++ for (;;) { ++ char *id; + unsigned long *estack_end; + estack_end = in_exception_stack(cpu, (unsigned long)stack, + &used, &id); + + if (estack_end) { +- printk(" <%s>", id); ++ if (ops->stack(data, id) < 0) ++ break; + HANDLE_STACK (stack < estack_end); +- printk(" <EOE>"); ++ ops->stack(data, "<EOE>"); + /* + * We link to the next stack via the + * second-to-last pointer (index -2 to end) in the +@@ -347,7 +359,8 @@ void show_trace(struct task_struct *tsk, + (IRQSTACKSIZE - 64) / sizeof(*irqstack); + + if (stack >= irqstack && stack < irqstack_end) { +- printk(" <IRQ>"); ++ if (ops->stack(data, "IRQ") < 0) ++ break; + HANDLE_STACK (stack < irqstack_end); + /* + * We link to the next stack (which would be +@@ -356,7 +369,7 @@ void show_trace(struct task_struct *tsk, + */ + stack = (unsigned long *) (irqstack_end[-1]); + irqstack_end = NULL; +- printk(" <EOI>"); ++ ops->stack(data, "EOI"); + continue; + } + } +@@ -364,19 +377,58 @@ void show_trace(struct task_struct *tsk, + } + + /* +- * This prints the process stack: ++ * This handles the process stack: + */ +- HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0); ++ tinfo = current_thread_info(); ++ HANDLE_STACK (valid_stack_ptr(tinfo, stack)); + #undef HANDLE_STACK ++} ++EXPORT_SYMBOL(dump_trace); ++ ++static void ++print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) ++{ ++ print_symbol(msg, symbol); ++ printk("\n"); ++} ++ ++static void print_trace_warning(void *data, char *msg) ++{ ++ printk("%s\n", msg); ++} ++ ++static int print_trace_stack(void *data, char *name) ++{ ++ printk(" <%s> ", name); ++ return 0; ++} ++ ++static void print_trace_address(void *data, unsigned long addr) ++{ ++ printk_address(addr); ++} ++ ++static struct stacktrace_ops print_trace_ops = { ++ .warning = print_trace_warning, ++ .warning_symbol = print_trace_warning_symbol, ++ .stack = print_trace_stack, ++ .address = print_trace_address, ++}; + ++void ++show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack) ++{ ++ printk("\nCall Trace:\n"); ++ dump_trace(tsk, regs, stack, &print_trace_ops, NULL); + printk("\n"); + } + +-static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp) ++static void ++_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp) + { + unsigned long *stack; + int i; +- const int cpu = safe_smp_processor_id(); ++ const int cpu = smp_processor_id(); + unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); + unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + +@@ -430,7 +482,7 @@ void show_registers(struct pt_regs *regs + int i; + int in_kernel = !user_mode(regs); + unsigned long rsp; +- const int cpu = safe_smp_processor_id(); ++ const int cpu = smp_processor_id(); + struct task_struct *cur = cpu_pda(cpu)->pcurrent; + + rsp = regs->rsp; +@@ -505,9 +557,11 @@ static unsigned int die_nest_count; + + unsigned __kprobes long oops_begin(void) + { +- int cpu = safe_smp_processor_id(); ++ int cpu = smp_processor_id(); + unsigned long flags; + ++ oops_enter(); ++ + /* racy, but better than risking deadlock. */ + local_irq_save(flags); + if (!spin_trylock(&die_lock)) { +@@ -536,6 +590,7 @@ void __kprobes oops_end(unsigned long fl + spin_unlock_irqrestore(&die_lock, flags); + if (panic_on_oops) + panic("Fatal exception"); ++ oops_exit(); + } + + void __kprobes __die(const char * str, struct pt_regs * regs, long err) +@@ -573,7 +628,7 @@ void die(const char * str, struct pt_reg + } + + #ifdef CONFIG_X86_LOCAL_APIC +-void __kprobes die_nmi(char *str, struct pt_regs *regs) ++void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) + { + unsigned long flags = oops_begin(); + +@@ -581,13 +636,12 @@ void __kprobes die_nmi(char *str, struct + * We are in trouble anyway, lets at least try + * to get a message out. + */ +- printk(str, safe_smp_processor_id()); ++ printk(str, smp_processor_id()); + show_registers(regs); + if (kexec_should_crash(current)) + crash_kexec(regs); +- if (panic_on_timeout || panic_on_oops) +- panic("nmi watchdog"); +- printk("console shuts up ...\n"); ++ if (do_panic || panic_on_oops) ++ panic("Non maskable interrupt"); + oops_end(flags); + nmi_exit(); + local_irq_enable(); +@@ -734,8 +788,15 @@ asmlinkage void __kprobes do_general_pro + static __kprobes void + mem_parity_error(unsigned char reason, struct pt_regs * regs) + { +- printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); +- printk("You probably have a hardware problem with your RAM chips\n"); ++ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", ++ reason); ++ printk(KERN_EMERG "You probably have a hardware problem with your " ++ "RAM chips\n"); ++ ++ if (panic_on_unrecovered_nmi) ++ panic("NMI: Not continuing"); ++ ++ printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + + #if 0 /* XEN */ + /* Clear and disable the memory parity error line. */ +@@ -762,9 +823,15 @@ io_check_error(unsigned char reason, str + + static __kprobes void + unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +-{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason); +- printk("Dazed and confused, but trying to continue\n"); +- printk("Do you have a strange power saving mode enabled?\n"); ++{ ++ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", ++ reason); ++ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); ++ ++ if (panic_on_unrecovered_nmi) ++ panic("NMI: Not continuing"); ++ ++ printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + } + + /* Runs on IST stack. This code must keep interrupts off all the time. +@@ -789,12 +856,12 @@ asmlinkage __kprobes void default_do_nmi + * Ok, so this is none of the documented NMI sources, + * so it must be the NMI watchdog. + */ +- if (nmi_watchdog > 0) { +- nmi_watchdog_tick(regs,reason); ++ if (nmi_watchdog_tick(regs,reason)) + return; +- } + #endif +- unknown_nmi_error(reason, regs); ++ if (!do_nmi_callback(regs,cpu)) ++ unknown_nmi_error(reason, regs); ++ + return; + } + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) +@@ -1081,6 +1148,7 @@ asmlinkage void math_state_restore(void) + init_fpu(me); + restore_fpu_checking(&me->thread.i387.fxsave); + task_thread_info(me)->status |= TS_USEDFPU; ++ me->fpu_counter++; + } + + +@@ -1143,24 +1211,30 @@ void smp_trap_init(trap_info_t *trap_ctx + } + + +-/* Actual parsing is done early in setup.c. */ +-static int __init oops_dummy(char *s) ++static int __init oops_setup(char *s) + { +- panic_on_oops = 1; +- return 1; ++ if (!s) ++ return -EINVAL; ++ if (!strcmp(s, "panic")) ++ panic_on_oops = 1; ++ return 0; + } +-__setup("oops=", oops_dummy); ++early_param("oops", oops_setup); + + static int __init kstack_setup(char *s) + { ++ if (!s) ++ return -EINVAL; + kstack_depth_to_print = simple_strtoul(s,NULL,0); +- return 1; ++ return 0; + } +-__setup("kstack=", kstack_setup); ++early_param("kstack", kstack_setup); + + #ifdef CONFIG_STACK_UNWIND + static int __init call_trace_setup(char *s) + { ++ if (!s) ++ return -EINVAL; + if (strcmp(s, "old") == 0) + call_trace = -1; + else if (strcmp(s, "both") == 0) +@@ -1169,7 +1243,7 @@ static int __init call_trace_setup(char + call_trace = 1; + else if (strcmp(s, "new") == 0) + call_trace = 2; +- return 1; ++ return 0; + } +-__setup("call_trace=", call_trace_setup); ++early_param("call_trace", call_trace_setup); + #endif +Index: 10.3-2007-11-26/arch/x86_64/kernel/vsyscall-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/kernel/vsyscall-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/kernel/vsyscall-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -26,6 +26,10 @@ + #include <linux/seqlock.h> + #include <linux/jiffies.h> + #include <linux/sysctl.h> ++#include <linux/getcpu.h> ++#include <linux/cpu.h> ++#include <linux/smp.h> ++#include <linux/notifier.h> + + #include <asm/vsyscall.h> + #include <asm/pgtable.h> +@@ -33,11 +37,15 @@ + #include <asm/fixmap.h> + #include <asm/errno.h> + #include <asm/io.h> ++#include <asm/segment.h> ++#include <asm/desc.h> ++#include <asm/topology.h> + + #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) + + int __sysctl_vsyscall __section_sysctl_vsyscall = 1; + seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; ++int __vgetcpu_mode __section_vgetcpu_mode; + + #include <asm/unistd.h> + +@@ -61,8 +69,7 @@ static __always_inline void do_vgettimeo + sequence = read_seqbegin(&__xtime_lock); + + sec = __xtime.tv_sec; +- usec = (__xtime.tv_nsec / 1000) + +- (__jiffies - __wall_jiffies) * (1000000 / HZ); ++ usec = __xtime.tv_nsec / 1000; + + if (__vxtime.mode != VXTIME_HPET) { + t = get_cycles_sync(); +@@ -72,7 +79,8 @@ static __always_inline void do_vgettimeo + __vxtime.tsc_quot) >> 32; + /* See comment in x86_64 do_gettimeofday. */ + } else { +- usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) - ++ usec += ((readl((void __iomem *) ++ fix_to_virt(VSYSCALL_HPET) + 0xf0) - + __vxtime.last) * __vxtime.quot) >> 32; + } + } while (read_seqretry(&__xtime_lock, sequence)); +@@ -127,9 +135,46 @@ time_t __vsyscall(1) vtime(time_t *t) + return __xtime.tv_sec; + } + +-long __vsyscall(2) venosys_0(void) +-{ +- return -ENOSYS; ++/* Fast way to get current CPU and node. ++ This helps to do per node and per CPU caches in user space. ++ The result is not guaranteed without CPU affinity, but usually ++ works out because the scheduler tries to keep a thread on the same ++ CPU. ++ ++ tcache must point to a two element sized long array. ++ All arguments can be NULL. */ ++long __vsyscall(2) ++vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache) ++{ ++ unsigned int dummy, p; ++ unsigned long j = 0; ++ ++ /* Fast cache - only recompute value once per jiffies and avoid ++ relatively costly rdtscp/cpuid otherwise. ++ This works because the scheduler usually keeps the process ++ on the same CPU and this syscall doesn't guarantee its ++ results anyways. ++ We do this here because otherwise user space would do it on ++ its own in a likely inferior way (no access to jiffies). ++ If you don't like it pass NULL. */ ++ if (tcache && tcache->blob[0] == (j = __jiffies)) { ++ p = tcache->blob[1]; ++ } else if (__vgetcpu_mode == VGETCPU_RDTSCP) { ++ /* Load per CPU data from RDTSCP */ ++ rdtscp(dummy, dummy, p); ++ } else { ++ /* Load per CPU data from GDT */ ++ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG)); ++ } ++ if (tcache) { ++ tcache->blob[0] = j; ++ tcache->blob[1] = p; ++ } ++ if (cpu) ++ *cpu = p & 0xfff; ++ if (node) ++ *node = p >> 12; ++ return 0; + } + + long __vsyscall(3) venosys_1(void) +@@ -149,7 +194,8 @@ static int vsyscall_sysctl_change(ctl_ta + void __user *buffer, size_t *lenp, loff_t *ppos) + { + extern u16 vsysc1, vsysc2; +- u16 *map1, *map2; ++ u16 __iomem *map1; ++ u16 __iomem *map2; + int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); + if (!write) + return ret; +@@ -164,11 +210,11 @@ static int vsyscall_sysctl_change(ctl_ta + goto out; + } + if (!sysctl_vsyscall) { +- *map1 = SYSCALL; +- *map2 = SYSCALL; ++ writew(SYSCALL, map1); ++ writew(SYSCALL, map2); + } else { +- *map1 = NOP2; +- *map2 = NOP2; ++ writew(NOP2, map1); ++ writew(NOP2, map2); + } + iounmap(map2); + out: +@@ -200,6 +246,45 @@ static ctl_table kernel_root_table2[] = + + #endif + ++/* Assume __initcall executes before all user space. Hopefully kmod ++ doesn't violate that. We'll find out if it does. */ ++static void __cpuinit vsyscall_set_cpu(int cpu) ++{ ++ unsigned long d; ++ unsigned long node = 0; ++#ifdef CONFIG_NUMA ++ node = cpu_to_node[cpu]; ++#endif ++ if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) ++ write_rdtscp_aux((node << 12) | cpu); ++ ++ /* Store cpu number in limit so that it can be loaded quickly ++ in user space in vgetcpu. ++ 12 bits for the CPU and 8 bits for the node. */ ++ d = 0x0f40000000000ULL; ++ d |= cpu; ++ d |= (node & 0xf) << 12; ++ d |= (node >> 4) << 48; ++ HYPERVISOR_update_descriptor(virt_to_machine(&cpu_gdt(cpu)[GDT_ENTRY_PER_CPU]), d); ++} ++ ++static void __cpuinit cpu_vsyscall_init(void *arg) ++{ ++ /* preemption should be already off */ ++ vsyscall_set_cpu(raw_smp_processor_id()); ++} ++ ++#ifdef CONFIG_HOTPLUG_CPU ++static int __cpuinit ++cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) ++{ ++ long cpu = (long)arg; ++ if (action == CPU_ONLINE) ++ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); ++ return NOTIFY_DONE; ++} ++#endif ++ + static void __init map_vsyscall(void) + { + extern char __vsyscall_0; +@@ -225,14 +310,21 @@ static int __init vsyscall_init(void) + VSYSCALL_ADDR(__NR_vgettimeofday))); + BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime)); + BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE))); ++ BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu)); + map_vsyscall(); + #ifdef CONFIG_XEN + map_vsyscall_user(); + sysctl_vsyscall = 0; /* disable vgettimeofay() */ ++ if (boot_cpu_has(X86_FEATURE_RDTSCP)) ++ vgetcpu_mode = VGETCPU_RDTSCP; ++ else ++ vgetcpu_mode = VGETCPU_LSL; + #endif + #ifdef CONFIG_SYSCTL + register_sysctl_table(kernel_root_table2, 0); + #endif ++ on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); ++ hotcpu_notifier(cpu_vsyscall_notifier, 0); + return 0; + } + +Index: 10.3-2007-11-26/arch/x86_64/mm/fault-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/mm/fault-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/mm/fault-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -40,8 +40,7 @@ + #define PF_RSVD (1<<3) + #define PF_INSTR (1<<4) + +-#ifdef CONFIG_KPROBES +-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); ++static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); + + /* Hook to register for page fault notifications */ + int register_page_fault_notifier(struct notifier_block *nb) +@@ -49,11 +48,13 @@ int register_page_fault_notifier(struct + vmalloc_sync_all(); + return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); + } ++EXPORT_SYMBOL_GPL(register_page_fault_notifier); + + int unregister_page_fault_notifier(struct notifier_block *nb) + { + return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); + } ++EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + + static inline int notify_page_fault(enum die_val val, const char *str, + struct pt_regs *regs, long err, int trap, int sig) +@@ -67,13 +68,6 @@ static inline int notify_page_fault(enum + }; + return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); + } +-#else +-static inline int notify_page_fault(enum die_val val, const char *str, +- struct pt_regs *regs, long err, int trap, int sig) +-{ +- return NOTIFY_DONE; +-} +-#endif + + void bust_spinlocks(int yes) + { +@@ -102,7 +96,7 @@ void bust_spinlocks(int yes) + static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, + unsigned long error_code) + { +- unsigned char *instr; ++ unsigned char __user *instr; + int scan_more = 1; + int prefetch = 0; + unsigned char *max_instr; +@@ -111,7 +105,7 @@ static noinline int is_prefetch(struct p + if (error_code & PF_INSTR) + return 0; + +- instr = (unsigned char *)convert_rip_to_linear(current, regs); ++ instr = (unsigned char __user *)convert_rip_to_linear(current, regs); + max_instr = instr + 15; + + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) +@@ -122,7 +116,7 @@ static noinline int is_prefetch(struct p + unsigned char instr_hi; + unsigned char instr_lo; + +- if (__get_user(opcode, instr)) ++ if (__get_user(opcode, (char __user *)instr)) + break; + + instr_hi = opcode & 0xf0; +@@ -160,7 +154,7 @@ static noinline int is_prefetch(struct p + case 0x00: + /* Prefetch instruction is 0x0F0D or 0x0F18 */ + scan_more = 0; +- if (__get_user(opcode, instr)) ++ if (__get_user(opcode, (char __user *)instr)) + break; + prefetch = (instr_lo == 0xF) && + (opcode == 0x0D || opcode == 0x18); +@@ -176,7 +170,7 @@ static noinline int is_prefetch(struct p + static int bad_address(void *p) + { + unsigned long dummy; +- return __get_user(dummy, (unsigned long *)p); ++ return __get_user(dummy, (unsigned long __user *)p); + } + + void dump_pagetable(unsigned long address) +@@ -248,7 +242,7 @@ static int is_errata93(struct pt_regs *r + + int unhandled_signal(struct task_struct *tsk, int sig) + { +- if (tsk->pid == 1) ++ if (is_init(tsk)) + return 1; + if (tsk->ptrace & PT_PTRACED) + return 0; +@@ -300,7 +294,7 @@ static int vmalloc_fault(unsigned long a + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else +- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); ++ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + + /* Below here mismatches are bugs because these lower tables + are shared */ +@@ -309,7 +303,7 @@ static int vmalloc_fault(unsigned long a + pud_ref = pud_offset(pgd_ref, address); + if (pud_none(*pud_ref)) + return -1; +- if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref)) ++ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref)) + BUG(); + pmd = pmd_offset(pud, address); + pmd_ref = pmd_offset(pud_ref, address); +@@ -531,7 +525,7 @@ good_area: + case PF_PROT: /* read, present */ + goto bad_area; + case 0: /* read, not present */ +- if (!(vma->vm_flags & (VM_READ | VM_EXEC))) ++ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) + goto bad_area; + } + +@@ -647,7 +641,7 @@ no_context: + */ + out_of_memory: + up_read(&mm->mmap_sem); +- if (current->pid == 1) { ++ if (is_init(current)) { + yield(); + goto again; + } +@@ -702,7 +696,7 @@ void vmalloc_sync_all(void) + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else +- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); ++ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); + } + spin_unlock(&pgd_lock); + set_bit(pgd_index(address), insync); +Index: 10.3-2007-11-26/arch/x86_64/mm/init-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/mm/init-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/mm/init-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -61,8 +61,6 @@ EXPORT_SYMBOL(__kernel_page_user); + + extern unsigned long *contiguous_bitmap; + +-static unsigned long dma_reserve __initdata; +- + DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + extern unsigned long start_pfn; + +@@ -440,7 +438,6 @@ __init void *early_ioremap(unsigned long + + /* actually usually some more */ + if (size >= LARGE_PAGE_SIZE) { +- printk("SMBIOS area too long %lu\n", size); + return NULL; + } + set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); +@@ -462,13 +459,15 @@ __init void early_iounmap(void *addr, un + #endif + + static void __meminit +-phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) ++phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) + { +- int i, k; ++ int i = pmd_index(address); + +- for (i = 0; i < PTRS_PER_PMD; pmd++, i++) { ++ for (; i < PTRS_PER_PMD; i++) { + unsigned long pte_phys; ++ pmd_t *pmd = pmd_page + pmd_index(address); + pte_t *pte, *pte_save; ++ int k; + + if (address >= end) { + if (!after_bootmem) +@@ -476,6 +475,10 @@ phys_pmd_init(pmd_t *pmd, unsigned long + set_pmd(pmd, __pmd(0)); + break; + } ++ ++ if (pmd_val(*pmd)) ++ continue; ++ + pte = alloc_static_page(&pte_phys); + pte_save = pte; + for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) { +@@ -501,40 +504,35 @@ phys_pmd_init(pmd_t *pmd, unsigned long + static void __meminit + phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) + { +- pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address)); +- +- if (pmd_none(*pmd)) { +- spin_lock(&init_mm.page_table_lock); +- phys_pmd_init(pmd, address, end); +- spin_unlock(&init_mm.page_table_lock); +- __flush_tlb_all(); +- } ++ pmd_t *pmd = pmd_offset(pud,0); ++ spin_lock(&init_mm.page_table_lock); ++ phys_pmd_init(pmd, address, end); ++ spin_unlock(&init_mm.page_table_lock); ++ __flush_tlb_all(); + } + +-static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) ++static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) + { +- long i = pud_index(address); +- +- pud = pud + i; +- +- if (after_bootmem && pud_val(*pud)) { +- phys_pmd_update(pud, address, end); +- return; +- } ++ int i = pud_index(addr); + +- for (; i < PTRS_PER_PUD; pud++, i++) { +- unsigned long paddr, pmd_phys; ++ for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { ++ unsigned long pmd_phys; ++ pud_t *pud = pud_page + pud_index(addr); + pmd_t *pmd; + +- paddr = (address & PGDIR_MASK) + i*PUD_SIZE; +- if (paddr >= end) ++ if (addr >= end) + break; + ++ if (pud_val(*pud)) { ++ phys_pmd_update(pud, addr, end); ++ continue; ++ } ++ + pmd = alloc_static_page(&pmd_phys); + early_make_page_readonly(pmd, XENFEAT_writable_page_tables); + spin_lock(&init_mm.page_table_lock); + set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); +- phys_pmd_init(pmd, paddr, end); ++ phys_pmd_init(pmd, addr, end); + spin_unlock(&init_mm.page_table_lock); + } + __flush_tlb(); +@@ -797,77 +795,19 @@ void __cpuinit zap_low_mappings(int cpu) + #endif + } + +-/* Compute zone sizes for the DMA and DMA32 zones in a node. */ +-__init void +-size_zones(unsigned long *z, unsigned long *h, +- unsigned long start_pfn, unsigned long end_pfn) +-{ +- int i; +-#ifndef CONFIG_XEN +- unsigned long w; +-#endif +- +- for (i = 0; i < MAX_NR_ZONES; i++) +- z[i] = 0; +- +-#ifndef CONFIG_XEN +- if (start_pfn < MAX_DMA_PFN) +- z[ZONE_DMA] = MAX_DMA_PFN - start_pfn; +- if (start_pfn < MAX_DMA32_PFN) { +- unsigned long dma32_pfn = MAX_DMA32_PFN; +- if (dma32_pfn > end_pfn) +- dma32_pfn = end_pfn; +- z[ZONE_DMA32] = dma32_pfn - start_pfn; +- } +- z[ZONE_NORMAL] = end_pfn - start_pfn; +- +- /* Remove lower zones from higher ones. */ +- w = 0; +- for (i = 0; i < MAX_NR_ZONES; i++) { +- if (z[i]) +- z[i] -= w; +- w += z[i]; +- } +- +- /* Compute holes */ +- w = start_pfn; +- for (i = 0; i < MAX_NR_ZONES; i++) { +- unsigned long s = w; +- w += z[i]; +- h[i] = e820_hole_size(s, w); +- } +- +- /* Add the space pace needed for mem_map to the holes too. */ +- for (i = 0; i < MAX_NR_ZONES; i++) +- h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE; +- +- /* The 16MB DMA zone has the kernel and other misc mappings. +- Account them too */ +- if (h[ZONE_DMA]) { +- h[ZONE_DMA] += dma_reserve; +- if (h[ZONE_DMA] >= z[ZONE_DMA]) { +- printk(KERN_WARNING +- "Kernel too large and filling up ZONE_DMA?\n"); +- h[ZONE_DMA] = z[ZONE_DMA]; +- } +- } +-#else +- z[ZONE_DMA] = end_pfn; +- for (i = 0; i < MAX_NR_ZONES; i++) +- h[i] = 0; +-#endif +-} +- + #ifndef CONFIG_NUMA + void __init paging_init(void) + { +- unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; ++ unsigned long max_zone_pfns[MAX_NR_ZONES]; ++ ++ memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); ++ max_zone_pfns[ZONE_DMA] = end_pfn; ++ max_zone_pfns[ZONE_DMA32] = end_pfn; ++ max_zone_pfns[ZONE_NORMAL] = end_pfn; + + memory_present(0, 0, end_pfn); + sparse_init(); +- size_zones(zones, holes, 0, end_pfn); +- free_area_init_node(0, NODE_DATA(0), zones, +- __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); ++ free_area_init_nodes(max_zone_pfns); + + init_mm.context.pinned = 1; + } +@@ -921,36 +861,23 @@ void online_page(struct page *page) + + #ifdef CONFIG_MEMORY_HOTPLUG + /* +- * XXX: memory_add_physaddr_to_nid() is to find node id from physical address +- * via probe interface of sysfs. If acpi notifies hot-add event, then it +- * can tell node id by searching dsdt. But, probe interface doesn't have +- * node id. So, return 0 as node id at this time. +- */ +-#ifdef CONFIG_NUMA +-int memory_add_physaddr_to_nid(u64 start) +-{ +- return 0; +-} +-#endif +- +-/* + * Memory is added always to NORMAL zone. This means you will never get + * additional DMA/DMA32 memory. + */ + int arch_add_memory(int nid, u64 start, u64 size) + { + struct pglist_data *pgdat = NODE_DATA(nid); +- struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2; ++ struct zone *zone = pgdat->node_zones + ZONE_NORMAL; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; + ++ init_memory_mapping(start, (start + size -1)); ++ + ret = __add_pages(zone, start_pfn, nr_pages); + if (ret) + goto error; + +- init_memory_mapping(start, (start + size -1)); +- + return ret; + error: + printk("%s: Problem encountered in __add_pages!\n", __func__); +@@ -964,7 +891,17 @@ int remove_memory(u64 start, u64 size) + } + EXPORT_SYMBOL_GPL(remove_memory); + +-#else /* CONFIG_MEMORY_HOTPLUG */ ++#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) ++int memory_add_physaddr_to_nid(u64 start) ++{ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); ++#endif ++ ++#endif /* CONFIG_MEMORY_HOTPLUG */ ++ ++#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE + /* + * Memory Hotadd without sparsemem. The mem_maps have been allocated in advance, + * just online the pages. +@@ -990,7 +927,7 @@ int __add_pages(struct zone *z, unsigned + } + return err; + } +-#endif /* CONFIG_MEMORY_HOTPLUG */ ++#endif + + static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, + kcore_vsyscall; +@@ -1007,12 +944,6 @@ void __init mem_init(void) + + pci_iommu_alloc(); + +- /* How many end-of-memory variables you have, grandma! */ +- max_low_pfn = end_pfn; +- max_pfn = end_pfn; +- num_physpages = end_pfn; +- high_memory = (void *) __va(end_pfn * PAGE_SIZE); +- + /* clear the zero-page */ + memset(empty_zero_page, 0, PAGE_SIZE); + +@@ -1030,7 +961,8 @@ void __init mem_init(void) + init_page_count(pfn_to_page(pfn)); + totalram_pages++; + } +- reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn); ++ reservedpages = end_pfn - totalram_pages - ++ absent_pages_in_range(0, end_pfn); + + after_bootmem = 1; + +@@ -1137,15 +1069,34 @@ void free_initrd_mem(unsigned long start + + void __init reserve_bootmem_generic(unsigned long phys, unsigned len) + { +- /* Should check here against the e820 map to avoid double free */ + #ifdef CONFIG_NUMA + int nid = phys_to_nid(phys); ++#endif ++ unsigned long pfn = phys >> PAGE_SHIFT; ++ if (pfn >= end_pfn) { ++ /* This can happen with kdump kernels when accessing firmware ++ tables. */ ++ if (pfn < end_pfn_map) ++ return; ++ printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n", ++ phys, len); ++ return; ++ } ++ ++ /* Should check here against the e820 map to avoid double free */ ++#ifdef CONFIG_NUMA + reserve_bootmem_node(NODE_DATA(nid), phys, len); + #else + reserve_bootmem(phys, len); + #endif +- if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) ++#ifndef CONFIG_XEN ++ if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { ++ static unsigned long dma_reserve __initdata; ++ + dma_reserve += len / PAGE_SIZE; ++ set_dma_reserve(dma_reserve); ++ } ++#endif + } + + int kern_addr_valid(unsigned long addr) +Index: 10.3-2007-11-26/arch/x86_64/mm/pageattr-xen.c +=================================================================== +--- 10.3-2007-11-26.orig/arch/x86_64/mm/pageattr-xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/arch/x86_64/mm/pageattr-xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -302,8 +302,8 @@ static void revert_page(unsigned long ad + BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, address); + BUG_ON(pmd_val(*pmd) & _PAGE_PSE); +- pgprot_val(ref_prot) |= _PAGE_PSE; + large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); ++ large_pte = pte_mkhuge(large_pte); + set_pte((pte_t *)pmd, large_pte); + } + +@@ -313,32 +313,28 @@ __change_page_attr(unsigned long address + { + pte_t *kpte; + struct page *kpte_page; +- unsigned kpte_flags; + pgprot_t ref_prot2; + kpte = lookup_address(address); + if (!kpte) return 0; + kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); +- kpte_flags = pte_val(*kpte); + if (pgprot_val(prot) != pgprot_val(ref_prot)) { +- if ((kpte_flags & _PAGE_PSE) == 0) { ++ if (!pte_huge(*kpte)) { + set_pte(kpte, pfn_pte(pfn, prot)); + } else { + /* + * split_large_page will take the reference for this + * change_page_attr on the split page. + */ +- + struct page *split; +- ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE)); +- ++ ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); + split = split_large_page(address, prot, ref_prot2); + if (!split) + return -ENOMEM; +- set_pte(kpte,mk_pte(split, ref_prot2)); ++ set_pte(kpte, mk_pte(split, ref_prot2)); + kpte_page = split; +- } ++ } + page_private(kpte_page)++; +- } else if ((kpte_flags & _PAGE_PSE) == 0) { ++ } else if (!pte_huge(*kpte)) { + set_pte(kpte, pfn_pte(pfn, ref_prot)); + BUG_ON(page_private(kpte_page) == 0); + page_private(kpte_page)--; +@@ -395,10 +391,12 @@ int change_page_attr_addr(unsigned long + * lowmem */ + if (__pa(address) < KERNEL_TEXT_SIZE) { + unsigned long addr2; +- pgprot_t prot2 = prot; ++ pgprot_t prot2; + addr2 = __START_KERNEL_map + __pa(address); +- pgprot_val(prot2) &= ~_PAGE_NX; +- err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC); ++ /* Make sure the kernel mappings stay executable */ ++ prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); ++ err = __change_page_attr(addr2, pfn, prot2, ++ PAGE_KERNEL_EXEC); + } + } + up_write(&init_mm.mmap_sem); +Index: 10.3-2007-11-26/drivers/char/tpm/tpm_xen.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/char/tpm/tpm_xen.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/char/tpm/tpm_xen.c 2007-10-22 13:53:08.000000000 +0200 +@@ -85,8 +85,7 @@ static struct tpm_private *my_priv; + + /* local function prototypes */ + static irqreturn_t tpmif_int(int irq, +- void *tpm_priv, +- struct pt_regs *ptregs); ++ void *tpm_priv); + static void tpmif_rx_action(unsigned long unused); + static int tpmif_connect(struct xenbus_device *dev, + struct tpm_private *tp, +@@ -558,7 +557,7 @@ static void tpmif_rx_action(unsigned lon + } + + +-static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs) ++static irqreturn_t tpmif_int(int irq, void *tpm_priv) + { + struct tpm_private *tp = tpm_priv; + unsigned long flags; +Index: 10.3-2007-11-26/drivers/pci/Kconfig +=================================================================== +--- 10.3-2007-11-26.orig/drivers/pci/Kconfig 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/pci/Kconfig 2007-10-22 13:53:08.000000000 +0200 +@@ -34,7 +34,7 @@ config PCI_DEBUG + config HT_IRQ + bool "Interrupts on hypertransport devices" + default y +- depends on PCI && X86_LOCAL_APIC && X86_IO_APIC ++ depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN + help + This allows native hypertransport devices to use interrupts. + +Index: 10.3-2007-11-26/drivers/xen/Kconfig +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/Kconfig 2007-09-03 09:52:56.000000000 +0200 ++++ 10.3-2007-11-26/drivers/xen/Kconfig 2007-10-22 13:53:08.000000000 +0200 +@@ -249,6 +249,9 @@ config HAVE_IRQ_IGNORE_UNHANDLED + bool + default y + ++config GENERIC_HARDIRQS_NO__DO_IRQ ++ def_bool y ++ + config NO_IDLE_HZ + bool + default y +Index: 10.3-2007-11-26/drivers/xen/balloon/balloon.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/balloon/balloon.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/balloon/balloon.c 2007-10-22 13:53:08.000000000 +0200 +@@ -83,7 +83,13 @@ static unsigned long frame_list[PAGE_SIZ + + /* VM /proc information for memory */ + extern unsigned long totalram_pages; ++#ifdef CONFIG_HIGHMEM + extern unsigned long totalhigh_pages; ++#define totalhigh_pages(op) (totalhigh_pages op) ++#else ++#undef totalhigh_pages ++#define totalhigh_pages(op) ++#endif + + /* List of ballooned pages, threaded through the mem_map array. */ + static LIST_HEAD(ballooned_pages); +@@ -119,7 +125,7 @@ static void balloon_append(struct page * + if (PageHighMem(page)) { + list_add_tail(PAGE_TO_LIST(page), &ballooned_pages); + bs.balloon_high++; +- totalhigh_pages--; ++ totalhigh_pages(--); + } else { + list_add(PAGE_TO_LIST(page), &ballooned_pages); + bs.balloon_low++; +@@ -139,7 +145,7 @@ static struct page *balloon_retrieve(voi + + if (PageHighMem(page)) { + bs.balloon_high--; +- totalhigh_pages++; ++ totalhigh_pages(++); + } + else + bs.balloon_low--; +Index: 10.3-2007-11-26/drivers/xen/blkback/blkback.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/blkback/blkback.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/blkback/blkback.c 2007-10-22 13:53:08.000000000 +0200 +@@ -287,7 +287,7 @@ static void blkif_notify_work(blkif_t *b + wake_up(&blkif->wq); + } + +-irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) ++irqreturn_t blkif_be_int(int irq, void *dev_id) + { + blkif_notify_work(dev_id); + return IRQ_HANDLED; +Index: 10.3-2007-11-26/drivers/xen/blkback/common.h +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/blkback/common.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/blkback/common.h 2007-10-22 13:53:08.000000000 +0200 +@@ -130,7 +130,7 @@ void blkif_interface_init(void); + + void blkif_xenbus_init(void); + +-irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); ++irqreturn_t blkif_be_int(int irq, void *dev_id); + int blkif_schedule(void *arg); + + int blkback_barrier(struct xenbus_transaction xbt, +Index: 10.3-2007-11-26/drivers/xen/blkfront/blkfront.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/blkfront/blkfront.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/blkfront/blkfront.c 2007-10-22 13:53:08.000000000 +0200 +@@ -69,7 +69,7 @@ static int setup_blkring(struct xenbus_d + + static void kick_pending_request_queues(struct blkfront_info *); + +-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs); ++static irqreturn_t blkif_int(int irq, void *dev_id); + static void blkif_restart_queue(void *arg); + static void blkif_recover(struct blkfront_info *); + static void blkif_completion(struct blk_shadow *); +@@ -688,7 +688,7 @@ void do_blkif_request(request_queue_t *r + } + + +-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) ++static irqreturn_t blkif_int(int irq, void *dev_id) + { + struct request *req; + blkif_response_t *bret; +Index: 10.3-2007-11-26/drivers/xen/blktap/blktap.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/blktap/blktap.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/blktap/blktap.c 2007-10-22 13:53:08.000000000 +0200 +@@ -1059,7 +1059,7 @@ static void blkif_notify_work(blkif_t *b + wake_up(&blkif->wq); + } + +-irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) ++irqreturn_t tap_blkif_be_int(int irq, void *dev_id) + { + blkif_notify_work(dev_id); + return IRQ_HANDLED; +Index: 10.3-2007-11-26/drivers/xen/blktap/common.h +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/blktap/common.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/blktap/common.h 2007-10-22 13:53:08.000000000 +0200 +@@ -112,7 +112,7 @@ void tap_blkif_interface_init(void); + + void tap_blkif_xenbus_init(void); + +-irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); ++irqreturn_t tap_blkif_be_int(int irq, void *dev_id); + int tap_blkif_schedule(void *arg); + + int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif); +Index: 10.3-2007-11-26/drivers/xen/console/console.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/console/console.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/console/console.c 2007-10-22 13:53:08.000000000 +0200 +@@ -335,7 +335,7 @@ static struct tty_struct *xencons_tty; + static int xencons_priv_irq; + static char x_char; + +-void xencons_rx(char *buf, unsigned len, struct pt_regs *regs) ++void xencons_rx(char *buf, unsigned len) + { + int i; + unsigned long flags; +@@ -360,8 +360,7 @@ void xencons_rx(char *buf, unsigned len, + if (time_before(jiffies, sysrq_timeout)) { + spin_unlock_irqrestore( + &xencons_lock, flags); +- handle_sysrq( +- buf[i], regs, xencons_tty); ++ handle_sysrq(buf[i], xencons_tty); + spin_lock_irqsave( + &xencons_lock, flags); + continue; +@@ -426,14 +425,13 @@ void xencons_tx(void) + } + + /* Privileged receive callback and transmit kicker. */ +-static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id, +- struct pt_regs *regs) ++static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id) + { + static char rbuf[16]; + int l; + + while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0) +- xencons_rx(rbuf, l, regs); ++ xencons_rx(rbuf, l); + + xencons_tx(); + +Index: 10.3-2007-11-26/drivers/xen/console/xencons_ring.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/console/xencons_ring.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/console/xencons_ring.c 2007-10-22 13:53:08.000000000 +0200 +@@ -83,7 +83,7 @@ int xencons_ring_send(const char *data, + return sent; + } + +-static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs) ++static irqreturn_t handle_input(int irq, void *unused) + { + struct xencons_interface *intf = xencons_interface(); + XENCONS_RING_IDX cons, prod; +@@ -94,7 +94,7 @@ static irqreturn_t handle_input(int irq, + BUG_ON((prod - cons) > sizeof(intf->in)); + + while (cons != prod) { +- xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs); ++ xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1); + cons++; + } + +Index: 10.3-2007-11-26/drivers/xen/core/evtchn.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/core/evtchn.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/core/evtchn.c 2007-10-22 13:53:08.000000000 +0200 +@@ -462,7 +462,7 @@ static void unbind_from_irq(unsigned int + + int bind_caller_port_to_irqhandler( + unsigned int caller_port, +- irqreturn_t (*handler)(int, void *, struct pt_regs *), ++ irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +@@ -485,7 +485,7 @@ EXPORT_SYMBOL_GPL(bind_caller_port_to_ir + + int bind_listening_port_to_irqhandler( + unsigned int remote_domain, +- irqreturn_t (*handler)(int, void *, struct pt_regs *), ++ irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +@@ -509,7 +509,7 @@ EXPORT_SYMBOL_GPL(bind_listening_port_to + int bind_interdomain_evtchn_to_irqhandler( + unsigned int remote_domain, + unsigned int remote_port, +- irqreturn_t (*handler)(int, void *, struct pt_regs *), ++ irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +@@ -533,7 +533,7 @@ EXPORT_SYMBOL_GPL(bind_interdomain_evtch + int bind_virq_to_irqhandler( + unsigned int virq, + unsigned int cpu, +- irqreturn_t (*handler)(int, void *, struct pt_regs *), ++ irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +@@ -557,7 +557,7 @@ EXPORT_SYMBOL_GPL(bind_virq_to_irqhandle + int bind_ipi_to_irqhandler( + unsigned int ipi, + unsigned int cpu, +- irqreturn_t (*handler)(int, void *, struct pt_regs *), ++ irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id) +@@ -644,15 +644,7 @@ static unsigned int startup_dynirq(unsig + return 0; + } + +-static void shutdown_dynirq(unsigned int irq) +-{ +- int evtchn = evtchn_from_irq(irq); +- +- if (VALID_EVTCHN(evtchn)) +- mask_evtchn(evtchn); +-} +- +-static void enable_dynirq(unsigned int irq) ++static void unmask_dynirq(unsigned int irq) + { + int evtchn = evtchn_from_irq(irq); + +@@ -660,7 +652,7 @@ static void enable_dynirq(unsigned int i + unmask_evtchn(evtchn); + } + +-static void disable_dynirq(unsigned int irq) ++static void mask_dynirq(unsigned int irq) + { + int evtchn = evtchn_from_irq(irq); + +@@ -688,12 +680,12 @@ static void end_dynirq(unsigned int irq) + unmask_evtchn(evtchn); + } + +-static struct hw_interrupt_type dynirq_type = { +- .typename = "Dynamic-irq", ++static struct irq_chip dynirq_chip = { ++ .name = "Dynamic-irq", + .startup = startup_dynirq, +- .shutdown = shutdown_dynirq, +- .enable = enable_dynirq, +- .disable = disable_dynirq, ++ .mask = mask_dynirq, ++ .unmask = unmask_dynirq, ++ .mask_ack = ack_dynirq, + .ack = ack_dynirq, + .end = end_dynirq, + #ifdef CONFIG_SMP +@@ -776,7 +768,7 @@ static void shutdown_pirq(unsigned int i + irq_info[irq] = IRQ_UNBOUND; + } + +-static void enable_pirq(unsigned int irq) ++static void unmask_pirq(unsigned int irq) + { + int evtchn = evtchn_from_irq(irq); + +@@ -786,7 +778,7 @@ static void enable_pirq(unsigned int irq + } + } + +-static void disable_pirq(unsigned int irq) ++static void mask_pirq(unsigned int irq) + { + int evtchn = evtchn_from_irq(irq); + +@@ -816,12 +808,14 @@ static void end_pirq(unsigned int irq) + } + } + +-static struct hw_interrupt_type pirq_type = { ++static struct irq_chip pirq_chip = { ++ .name = "Phys-irq", + .typename = "Phys-irq", + .startup = startup_pirq, + .shutdown = shutdown_pirq, +- .enable = enable_pirq, +- .disable = disable_pirq, ++ .mask = mask_pirq, ++ .unmask = unmask_pirq, ++ .mask_ack = ack_pirq, + .ack = ack_pirq, + .end = end_pirq, + #ifdef CONFIG_SMP +@@ -994,7 +988,8 @@ void __init xen_init_IRQ(void) + irq_desc[dynirq_to_irq(i)].status = IRQ_DISABLED; + irq_desc[dynirq_to_irq(i)].action = NULL; + irq_desc[dynirq_to_irq(i)].depth = 1; +- irq_desc[dynirq_to_irq(i)].chip = &dynirq_type; ++ set_irq_chip_and_handler_name(dynirq_to_irq(i), &dynirq_chip, ++ handle_level_irq, "level"); + } + + /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */ +@@ -1010,6 +1005,7 @@ void __init xen_init_IRQ(void) + irq_desc[pirq_to_irq(i)].status = IRQ_DISABLED; + irq_desc[pirq_to_irq(i)].action = NULL; + irq_desc[pirq_to_irq(i)].depth = 1; +- irq_desc[pirq_to_irq(i)].chip = &pirq_type; ++ set_irq_chip_and_handler_name(pirq_to_irq(i), &pirq_chip, ++ handle_level_irq, "level"); + } + } +Index: 10.3-2007-11-26/drivers/xen/core/reboot.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/core/reboot.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/core/reboot.c 2007-10-22 13:53:08.000000000 +0200 +@@ -180,7 +180,7 @@ static void sysrq_handler(struct xenbus_ + + #ifdef CONFIG_MAGIC_SYSRQ + if (sysrq_key != '\0') +- handle_sysrq(sysrq_key, NULL, NULL); ++ handle_sysrq(sysrq_key, NULL); + #endif + } + +Index: 10.3-2007-11-26/drivers/xen/core/smpboot.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/core/smpboot.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/core/smpboot.c 2007-10-22 13:53:08.000000000 +0200 +@@ -25,8 +25,8 @@ + #include <xen/cpu_hotplug.h> + #include <xen/xenbus.h> + +-extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *); +-extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *); ++extern irqreturn_t smp_reschedule_interrupt(int, void *); ++extern irqreturn_t smp_call_function_interrupt(int, void *); + + extern int local_setup_timer(unsigned int cpu); + extern void local_teardown_timer(unsigned int cpu); +@@ -72,8 +72,6 @@ EXPORT_SYMBOL(cpu_core_map); + #if defined(__i386__) + u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; + EXPORT_SYMBOL(x86_cpu_to_apicid); +-#elif !defined(CONFIG_X86_IO_APIC) +-unsigned int maxcpus = NR_CPUS; + #endif + + void __init prefill_possible_map(void) +Index: 10.3-2007-11-26/drivers/xen/fbfront/xenfb.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/fbfront/xenfb.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/fbfront/xenfb.c 2007-10-22 13:53:08.000000000 +0200 +@@ -417,8 +417,7 @@ static struct fb_ops xenfb_fb_ops = { + .fb_mmap = xenfb_mmap, + }; + +-static irqreturn_t xenfb_event_handler(int rq, void *dev_id, +- struct pt_regs *regs) ++static irqreturn_t xenfb_event_handler(int rq, void *dev_id) + { + /* + * No in events recognized, simply ignore them all. +Index: 10.3-2007-11-26/drivers/xen/fbfront/xenkbd.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/fbfront/xenkbd.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/fbfront/xenkbd.c 2007-10-22 13:53:08.000000000 +0200 +@@ -46,7 +46,7 @@ static void xenkbd_disconnect_backend(st + * to do that. + */ + +-static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs) ++static irqreturn_t input_handler(int rq, void *dev_id) + { + struct xenkbd_info *info = dev_id; + struct xenkbd_page *page = info->page; +Index: 10.3-2007-11-26/drivers/xen/gntdev/gntdev.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/gntdev/gntdev.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/gntdev/gntdev.c 2007-10-22 13:53:08.000000000 +0200 +@@ -701,9 +701,6 @@ static pte_t gntdev_clear_pte(struct vm_ + BUG(); + } + +- /* Copy the existing value of the PTE for returning. */ +- copy = *ptep; +- + /* Calculate the grant relating to this PTE. */ + slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT); + +@@ -718,6 +715,10 @@ static pte_t gntdev_clear_pte(struct vm_ + GNTDEV_INVALID_HANDLE && + !xen_feature(XENFEAT_auto_translated_physmap)) { + /* NOT USING SHADOW PAGE TABLES. */ ++ ++ /* Copy the existing value of the PTE for returning. */ ++ copy = *ptep; ++ + gnttab_set_unmap_op(&op, virt_to_machine(ptep), + GNTMAP_contains_pte, + private_data->grants[slot_index] +@@ -730,7 +731,7 @@ static pte_t gntdev_clear_pte(struct vm_ + op.status); + } else { + /* USING SHADOW PAGE TABLES. */ +- pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm); ++ copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm); + } + + /* Finally, we unmap the grant from kernel space. */ +@@ -758,7 +759,7 @@ static pte_t gntdev_clear_pte(struct vm_ + >> PAGE_SHIFT, INVALID_P2M_ENTRY); + + } else { +- pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm); ++ copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm); + } + + return copy; +Index: 10.3-2007-11-26/drivers/xen/privcmd/privcmd.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/privcmd/privcmd.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/privcmd/privcmd.c 2007-10-22 13:53:08.000000000 +0200 +@@ -234,7 +234,7 @@ static int privcmd_mmap(struct file * fi + return -ENOSYS; + + /* DONTCOPY is essential for Xen as copy_page_range is broken. */ +- vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY; ++ vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY; + vma->vm_ops = &privcmd_vm_ops; + vma->vm_private_data = NULL; + +Index: 10.3-2007-11-26/drivers/xen/netback/common.h +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/netback/common.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/netback/common.h 2007-10-22 13:53:08.000000000 +0200 +@@ -140,7 +140,7 @@ void netif_deschedule_work(netif_t *neti + + int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev); + struct net_device_stats *netif_be_get_stats(struct net_device *dev); +-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs); ++irqreturn_t netif_be_int(int irq, void *dev_id); + + static inline int netbk_can_queue(struct net_device *dev) + { +Index: 10.3-2007-11-26/drivers/xen/netback/loopback.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/netback/loopback.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/netback/loopback.c 2007-10-22 13:53:08.000000000 +0200 +@@ -151,7 +151,7 @@ static int loopback_start_xmit(struct sk + np->stats.rx_bytes += skb->len; + np->stats.rx_packets++; + +- if (skb->ip_summed == CHECKSUM_HW) { ++ if (skb->ip_summed == CHECKSUM_PARTIAL) { + /* Defer checksum calculation. */ + skb->proto_csum_blank = 1; + /* Must be a local packet: assert its integrity. */ +Index: 10.3-2007-11-26/drivers/xen/netback/netback.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/netback/netback.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/netback/netback.c 2007-10-22 13:53:08.000000000 +0200 +@@ -649,7 +649,7 @@ static void net_rx_action(unsigned long + id = meta[npo.meta_cons].id; + flags = nr_frags ? NETRXF_more_data : 0; + +- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ ++ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ + flags |= NETRXF_csum_blank | NETRXF_data_validated; + else if (skb->proto_data_valid) /* remote but checksummed? */ + flags |= NETRXF_data_validated; +@@ -1339,7 +1339,7 @@ static void netif_page_release(struct pa + netif_idx_release(netif_page_index(page)); + } + +-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) ++irqreturn_t netif_be_int(int irq, void *dev_id) + { + netif_t *netif = dev_id; + +@@ -1406,7 +1406,7 @@ static netif_rx_response_t *make_rx_resp + } + + #ifdef NETBE_DEBUG_INTERRUPT +-static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs) ++static irqreturn_t netif_be_dbg(int irq, void *dev_id) + { + struct list_head *ent; + netif_t *netif; +Index: 10.3-2007-11-26/drivers/xen/netfront/netfront.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/netfront/netfront.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/netfront/netfront.c 2007-10-22 13:53:08.000000000 +0200 +@@ -135,7 +135,7 @@ static inline int netif_needs_gso(struct + { + return skb_is_gso(skb) && + (!skb_gso_ok(skb, dev->features) || +- unlikely(skb->ip_summed != CHECKSUM_HW)); ++ unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); + } + #else + #define netif_needs_gso(dev, skb) 0 +@@ -268,7 +268,7 @@ static void network_tx_buf_gc(struct net + static void network_alloc_rx_buffers(struct net_device *); + static int send_fake_arp(struct net_device *); + +-static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs); ++static irqreturn_t netif_int(int irq, void *dev_id); + + #ifdef CONFIG_SYSFS + static int xennet_sysfs_addif(struct net_device *netdev); +@@ -978,7 +978,7 @@ static int network_start_xmit(struct sk_ + tx->flags = 0; + extra = NULL; + +- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ ++ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ + tx->flags |= NETTXF_csum_blank | NETTXF_data_validated; + #ifdef CONFIG_XEN + if (skb->proto_data_valid) /* remote but checksummed? */ +@@ -1034,7 +1034,7 @@ static int network_start_xmit(struct sk_ + return 0; + } + +-static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs) ++static irqreturn_t netif_int(int irq, void *dev_id) + { + struct net_device *dev = dev_id; + struct netfront_info *np = netdev_priv(dev); +Index: 10.3-2007-11-26/drivers/xen/pciback/pciback.h +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/pciback/pciback.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/pciback/pciback.h 2007-10-22 13:53:08.000000000 +0200 +@@ -83,7 +83,7 @@ int pciback_publish_pci_roots(struct pci + void pciback_release_devices(struct pciback_device *pdev); + + /* Handles events from front-end */ +-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs); ++irqreturn_t pciback_handle_event(int irq, void *dev_id); + void pciback_do_op(void *data); + + int pciback_xenbus_register(void); +Index: 10.3-2007-11-26/drivers/xen/pciback/pciback_ops.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/pciback/pciback_ops.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/pciback/pciback_ops.c 2007-10-22 13:53:08.000000000 +0200 +@@ -85,7 +85,7 @@ void pciback_do_op(void *data) + test_and_schedule_op(pdev); + } + +-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs) ++irqreturn_t pciback_handle_event(int irq, void *dev_id) + { + struct pciback_device *pdev = dev_id; + +Index: 10.3-2007-11-26/drivers/xen/tpmback/common.h +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/tpmback/common.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/tpmback/common.h 2007-10-22 13:53:08.000000000 +0200 +@@ -61,7 +61,7 @@ void tpmif_deschedule_work(tpmif_t * tpm + void tpmif_xenbus_init(void); + void tpmif_xenbus_exit(void); + int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn); +-irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs); ++irqreturn_t tpmif_be_int(int irq, void *dev_id); + + long int tpmback_get_instance(struct backend_info *bi); + +Index: 10.3-2007-11-26/drivers/xen/tpmback/tpmback.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/tpmback/tpmback.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/tpmback/tpmback.c 2007-10-22 13:53:08.000000000 +0200 +@@ -502,7 +502,7 @@ static ssize_t vtpm_op_read(struct file + list_del(&pak->next); + write_unlock_irqrestore(&dataex.pak_lock, flags); + +- DPRINTK("size given by app: %d, available: %d\n", size, left); ++ DPRINTK("size given by app: %zu, available: %u\n", size, left); + + ret_size = min_t(size_t, size, left); + +@@ -899,7 +899,7 @@ static void tpm_tx_action(unsigned long + } + } + +-irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs) ++irqreturn_t tpmif_be_int(int irq, void *dev_id) + { + tpmif_t *tpmif = (tpmif_t *) dev_id; + +Index: 10.3-2007-11-26/drivers/xen/xenbus/xenbus_comms.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/xenbus/xenbus_comms.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/xenbus/xenbus_comms.c 2007-10-22 13:53:08.000000000 +0200 +@@ -54,7 +54,7 @@ static DECLARE_WORK(probe_work, xenbus_p + + static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); + +-static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs) ++static irqreturn_t wake_waiting(int irq, void *unused) + { + if (unlikely(xenstored_ready == 0)) { + xenstored_ready = 1; +Index: 10.3-2007-11-26/drivers/xen/xenoprof/xenoprofile.c +=================================================================== +--- 10.3-2007-11-26.orig/drivers/xen/xenoprof/xenoprofile.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/drivers/xen/xenoprof/xenoprofile.c 2007-10-22 13:53:08.000000000 +0200 +@@ -177,7 +177,7 @@ done: + } + + static irqreturn_t +-xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs) ++xenoprof_ovf_interrupt(int irq, void * dev_id) + { + struct xenoprof_buf * buf; + int cpu; +Index: 10.3-2007-11-26/include/asm-i386/acpi.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/acpi.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/acpi.h 2007-10-22 13:53:08.000000000 +0200 +@@ -141,7 +141,9 @@ extern void acpi_reserve_bootmem(void); + + #endif /*CONFIG_ACPI_SLEEP*/ + ++#ifndef CONFIG_XEN + #define ARCH_HAS_POWER_INIT 1 ++#endif + + #endif /*__KERNEL__*/ + +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/desc.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/desc.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/desc.h 2007-10-22 13:53:08.000000000 +0200 +@@ -32,52 +32,108 @@ static inline struct desc_struct *get_cp + return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address; + } + ++/* ++ * This is the ldt that every process will get unless we need ++ * something other than this. ++ */ ++extern struct desc_struct default_ldt[]; ++extern struct desc_struct idt_table[]; ++extern void set_intr_gate(unsigned int irq, void * addr); ++ ++static inline void pack_descriptor(__u32 *a, __u32 *b, ++ unsigned long base, unsigned long limit, unsigned char type, unsigned char flags) ++{ ++ *a = ((base & 0xffff) << 16) | (limit & 0xffff); ++ *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) | ++ (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20); ++} ++ ++static inline void pack_gate(__u32 *a, __u32 *b, ++ unsigned long base, unsigned short seg, unsigned char type, unsigned char flags) ++{ ++ *a = (seg << 16) | (base & 0xffff); ++ *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff); ++} ++ ++#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */ ++#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */ ++#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */ ++#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */ ++#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */ ++#define DESCTYPE_DPL3 0x60 /* DPL-3 */ ++#define DESCTYPE_S 0x10 /* !system */ ++ + #define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8)) + #define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8)) + + #define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr)) + #define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr)) +-#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr)) +-#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt)) ++#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr)) ++#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt)) + + #define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr)) + #define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr)) +-#define store_tr(tr) __asm__ ("str %0":"=mr" (tr)) +-#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt)) ++#define store_tr(tr) __asm__ ("str %0":"=m" (tr)) ++#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt)) + +-/* +- * This is the ldt that every process will get unless we need +- * something other than this. +- */ +-extern struct desc_struct default_ldt[]; +-extern void set_intr_gate(unsigned int irq, void * addr); ++#if TLS_SIZE != 24 ++# error update this code. ++#endif ++ ++static inline void load_TLS(struct thread_struct *t, unsigned int cpu) ++{ ++#define C(i) HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), *(u64 *)&t->tls_array[i]) ++ C(0); C(1); C(2); ++#undef C ++} + +-#define _set_tssldt_desc(n,addr,limit,type) \ +-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ +- "movw %w1,2(%2)\n\t" \ +- "rorl $16,%1\n\t" \ +- "movb %b1,4(%2)\n\t" \ +- "movb %4,5(%2)\n\t" \ +- "movb $0,6(%2)\n\t" \ +- "movb %h1,7(%2)\n\t" \ +- "rorl $16,%1" \ +- : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type)) ++#ifndef CONFIG_XEN ++static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b) ++{ ++ __u32 *lp = (__u32 *)((char *)dt + entry*8); ++ *lp = entry_a; ++ *(lp+1) = entry_b; ++} + +-#ifndef CONFIG_X86_NO_TSS +-static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr) ++#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) ++#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) ++#else ++extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); ++extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b); ++#endif ++#ifndef CONFIG_X86_NO_IDT ++#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b) ++ ++static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg) + { +- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr, +- offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89); ++ __u32 a, b; ++ pack_gate(&a, &b, (unsigned long)addr, seg, type, 0); ++ write_idt_entry(idt_table, gate, a, b); + } ++#endif + +-#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) ++#ifndef CONFIG_X86_NO_TSS ++static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr) ++{ ++ __u32 a, b; ++ pack_descriptor(&a, &b, (unsigned long)addr, ++ offsetof(struct tss_struct, __cacheline_filler) - 1, ++ DESCTYPE_TSS, 0); ++ write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b); ++} + #endif + +-static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) ++static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries) + { +- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82); ++ __u32 a, b; ++ pack_descriptor(&a, &b, (unsigned long)addr, ++ entries * sizeof(struct desc_struct) - 1, ++ DESCTYPE_LDT, 0); ++ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b); + } + ++#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr) ++ + #define LDT_entry_a(info) \ + ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff)) + +@@ -103,19 +159,6 @@ static inline void set_ldt_desc(unsigned + (info)->seg_not_present == 1 && \ + (info)->useable == 0 ) + +-extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b); +- +-#if TLS_SIZE != 24 +-# error update this code. +-#endif +- +-static inline void load_TLS(struct thread_struct *t, unsigned int cpu) +-{ +-#define C(i) HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), *(u64 *)&t->tls_array[i]) +- C(0); C(1); C(2); +-#undef C +-} +- + static inline void clear_LDT(void) + { + int cpu = get_cpu(); +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/fixmap.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/fixmap.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/fixmap.h 2007-10-22 13:53:08.000000000 +0200 +@@ -55,7 +55,7 @@ enum fixed_addresses { + #ifdef CONFIG_X86_LOCAL_APIC + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ + #endif +-#ifdef CONFIG_X86_IO_APIC ++#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN) + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, + #endif +@@ -95,10 +95,9 @@ enum fixed_addresses { + __end_of_fixed_addresses + }; + +-extern void set_fixaddr_top(unsigned long top); +- + extern void __set_fixmap(enum fixed_addresses idx, + maddr_t phys, pgprot_t flags); ++extern void reserve_top_address(unsigned long reserve); + + #define set_fixmap(idx, phys) \ + __set_fixmap(idx, phys, PAGE_KERNEL) +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/floppy.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/floppy.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/floppy.h 2007-10-22 13:53:08.000000000 +0200 +@@ -43,14 +43,14 @@ static char *virtual_dma_addr; + static int virtual_dma_mode; + static int doing_pdma; + +-static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) ++static irqreturn_t floppy_hardint(int irq, void *dev_id) + { + register unsigned char st; + register int lcount; + register char *lptr; + + if (!doing_pdma) +- return floppy_interrupt(irq, dev_id, regs); ++ return floppy_interrupt(irq, dev_id); + + st = 1; + for(lcount=virtual_dma_count, lptr=virtual_dma_addr; +@@ -73,7 +73,7 @@ static irqreturn_t floppy_hardint(int ir + virtual_dma_residue += virtual_dma_count; + virtual_dma_count=0; + doing_pdma = 0; +- floppy_interrupt(irq, dev_id, regs); ++ floppy_interrupt(irq, dev_id); + return IRQ_HANDLED; + } + return IRQ_HANDLED; +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/hw_irq.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/hw_irq.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/hw_irq.h 2007-10-22 13:53:08.000000000 +0200 +@@ -17,8 +17,6 @@ + #include <asm/irq.h> + #include <asm/sections.h> + +-struct hw_interrupt_type; +- + #define NMI_VECTOR 0x02 + + /* +@@ -28,10 +26,6 @@ struct hw_interrupt_type; + * Interrupt entry/exit code at both C and assembly level + */ + +-extern u8 irq_vector[NR_IRQ_VECTORS]; +-#define IO_APIC_VECTOR(irq) (irq_vector[irq]) +-#define AUTO_ASSIGN -1 +- + extern void (*interrupt[NR_IRQS])(void); + + #ifdef CONFIG_SMP +@@ -44,7 +38,7 @@ fastcall void call_function_interrupt(vo + fastcall void apic_timer_interrupt(void); + fastcall void error_interrupt(void); + fastcall void spurious_interrupt(void); +-fastcall void thermal_interrupt(struct pt_regs *); ++fastcall void thermal_interrupt(void); + #define platform_legacy_irq(irq) ((irq) < 16) + #endif + +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/io.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/io.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/io.h 2007-10-22 13:53:08.000000000 +0200 +@@ -238,33 +238,6 @@ static inline void memcpy_toio(volatile + + #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void __force *)(b),(c),(d)) + +-/** +- * check_signature - find BIOS signatures +- * @io_addr: mmio address to check +- * @signature: signature block +- * @length: length of signature +- * +- * Perform a signature comparison with the mmio address io_addr. This +- * address should have been obtained by ioremap. +- * Returns 1 on a match. +- */ +- +-static inline int check_signature(volatile void __iomem * io_addr, +- const unsigned char *signature, int length) +-{ +- int retval = 0; +- do { +- if (readb(io_addr) != *signature) +- goto out; +- io_addr++; +- signature++; +- length--; +- } while (length); +- retval = 1; +-out: +- return retval; +-} +- + /* + * Cache management + * +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/page.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/page.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/page.h 2007-10-22 13:53:08.000000000 +0200 +@@ -196,7 +196,7 @@ extern int page_is_ram(unsigned long pag + + #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) + #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) +-#define MAXMEM (__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE) ++#define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) + #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) + #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) + #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-2level.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-2level.h 2007-10-22 13:53:08.000000000 +0200 +@@ -21,14 +21,6 @@ + set_pte((ptep), (pteval)); \ + } while (0) + +-#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \ +- if (((_mm) != current->mm && (_mm) != &init_mm) || \ +- HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \ +- set_pte((ptep), (pteval)); \ +- xen_invlpg((addr)); \ +- } \ +-} while (0) +- + #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) + + #define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval)) +@@ -38,6 +30,7 @@ + + #define pte_none(x) (!(x).pte_low) + ++#define __HAVE_ARCH_PTEP_GET_AND_CLEAR + static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) + { + pte_t pte = *ptep; +@@ -49,6 +42,7 @@ static inline pte_t ptep_get_and_clear(s + return pte; + } + ++#define __HAVE_ARCH_PTEP_CLEAR_FLUSH + #define ptep_clear_flush(vma, addr, ptep) \ + ({ \ + pte_t *__ptep = (ptep); \ +@@ -64,8 +58,6 @@ static inline pte_t ptep_get_and_clear(s + __res; \ + }) + +-#define pte_same(a, b) ((a).pte_low == (b).pte_low) +- + #define __pte_mfn(_pte) ((_pte).pte_low >> PAGE_SHIFT) + #define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \ + __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte))) +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-3level.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable-3level.h 2007-10-22 13:53:08.000000000 +0200 +@@ -50,7 +50,6 @@ static inline int pte_exec_kernel(pte_t + * not possible, use pte_get_and_clear to obtain the old pte + * value and then use set_pte to update it. -ben + */ +-#define __HAVE_ARCH_SET_PTE_ATOMIC + + static inline void set_pte(pte_t *ptep, pte_t pte) + { +@@ -67,14 +66,6 @@ static inline void set_pte(pte_t *ptep, + set_pte((ptep), (pteval)); \ + } while (0) + +-#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \ +- if (((_mm) != current->mm && (_mm) != &init_mm) || \ +- HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \ +- set_pte((ptep), (pteval)); \ +- xen_invlpg((addr)); \ +- } \ +-} while (0) +- + #define set_pmd(pmdptr,pmdval) \ + xen_l2_entry_update((pmdptr), (pmdval)) + #define set_pud(pudptr,pudval) \ +@@ -91,7 +82,7 @@ static inline void pud_clear (pud_t * pu + #define pud_page(pud) \ + ((struct page *) __va(pud_val(pud) & PAGE_MASK)) + +-#define pud_page_kernel(pud) \ ++#define pud_page_vaddr(pud) \ + ((unsigned long) __va(pud_val(pud) & PAGE_MASK)) + + +@@ -121,6 +112,7 @@ static inline void pte_clear(struct mm_s + + #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) + ++#define __HAVE_ARCH_PTEP_GET_AND_CLEAR + static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) + { + pte_t pte = *ptep; +@@ -139,6 +131,7 @@ static inline pte_t ptep_get_and_clear(s + return pte; + } + ++#define __HAVE_ARCH_PTEP_CLEAR_FLUSH + #define ptep_clear_flush(vma, addr, ptep) \ + ({ \ + pte_t *__ptep = (ptep); \ +@@ -156,6 +149,7 @@ static inline pte_t ptep_get_and_clear(s + __res; \ + }) + ++#define __HAVE_ARCH_PTE_SAME + static inline int pte_same(pte_t a, pte_t b) + { + return a.pte_low == b.pte_low && a.pte_high == b.pte_high; +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/pgtable.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/pgtable.h 2007-10-22 13:53:08.000000000 +0200 +@@ -256,31 +256,89 @@ static inline pte_t pte_mkhuge(pte_t pte + # include <asm/pgtable-2level.h> + #endif + +-#define ptep_test_and_clear_dirty(vma, addr, ptep) \ ++/* ++ * Rules for using pte_update - it must be called after any PTE update which ++ * has not been done using the set_pte / clear_pte interfaces. It is used by ++ * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE ++ * updates should either be sets, clears, or set_pte_atomic for P->P ++ * transitions, which means this hook should only be called for user PTEs. ++ * This hook implies a P->P protection or access change has taken place, which ++ * requires a subsequent TLB flush. The notification can optionally be delayed ++ * until the TLB flush event by using the pte_update_defer form of the ++ * interface, but care must be taken to assure that the flush happens while ++ * still holding the same page table lock so that the shadow and primary pages ++ * do not become out of sync on SMP. ++ */ ++#define pte_update(mm, addr, ptep) do { } while (0) ++#define pte_update_defer(mm, addr, ptep) do { } while (0) ++ ++ ++/* ++ * We only update the dirty/accessed state if we set ++ * the dirty bit by hand in the kernel, since the hardware ++ * will do the accessed bit for us, and we don't want to ++ * race with other CPU's that might be updating the dirty ++ * bit at the same time. ++ */ ++#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS ++#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ ++do { \ ++ if (dirty) \ ++ ptep_establish(vma, address, ptep, entry); \ ++} while (0) ++ ++/* ++ * We don't actually have these, but we want to advertise them so that ++ * we can encompass the flush here. ++ */ ++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY ++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG ++ ++/* ++ * Rules for using ptep_establish: the pte MUST be a user pte, and ++ * must be a present->present transition. ++ */ ++#define __HAVE_ARCH_PTEP_ESTABLISH ++#define ptep_establish(vma, address, ptep, pteval) \ ++do { \ ++ if ( likely((vma)->vm_mm == current->mm) ) { \ ++ BUG_ON(HYPERVISOR_update_va_mapping(address, \ ++ pteval, \ ++ (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ ++ UVMF_INVLPG|UVMF_MULTI)); \ ++ } else { \ ++ xen_l1_entry_update(ptep, pteval); \ ++ flush_tlb_page(vma, address); \ ++ } \ ++} while (0) ++ ++#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH ++#define ptep_clear_flush_dirty(vma, address, ptep) \ + ({ \ + pte_t __pte = *(ptep); \ +- int __ret = pte_dirty(__pte); \ +- if (__ret) { \ +- __pte = pte_mkclean(__pte); \ +- if ((vma)->vm_mm != current->mm || \ +- HYPERVISOR_update_va_mapping(addr, __pte, 0)) \ +- (ptep)->pte_low = __pte.pte_low; \ +- } \ +- __ret; \ ++ int __dirty = pte_dirty(__pte); \ ++ __pte = pte_mkclean(__pte); \ ++ if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ ++ ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ ++ else if (__dirty) \ ++ (ptep)->pte_low = __pte.pte_low; \ ++ __dirty; \ + }) + +-#define ptep_test_and_clear_young(vma, addr, ptep) \ ++#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH ++#define ptep_clear_flush_young(vma, address, ptep) \ + ({ \ + pte_t __pte = *(ptep); \ +- int __ret = pte_young(__pte); \ +- if (__ret) \ +- __pte = pte_mkold(__pte); \ +- if ((vma)->vm_mm != current->mm || \ +- HYPERVISOR_update_va_mapping(addr, __pte, 0)) \ +- (ptep)->pte_low = __pte.pte_low; \ +- __ret; \ ++ int __young = pte_young(__pte); \ ++ __pte = pte_mkold(__pte); \ ++ if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \ ++ ptep_set_access_flags(vma, address, ptep, __pte, __young); \ ++ else if (__young) \ ++ (ptep)->pte_low = __pte.pte_low; \ ++ __young; \ + }) + ++#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL + #define ptep_get_and_clear_full(mm, addr, ptep, full) \ + ((full) ? ({ \ + pte_t __res = *(ptep); \ +@@ -292,6 +350,7 @@ static inline pte_t pte_mkhuge(pte_t pte + }) : \ + ptep_get_and_clear(mm, addr, ptep)) + ++#define __HAVE_ARCH_PTEP_SET_WRPROTECT + static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) + { + pte_t pte = *ptep; +@@ -387,11 +446,11 @@ static inline pte_t pte_modify(pte_t pte + #define pte_index(address) \ + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + #define pte_offset_kernel(dir, address) \ +- ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) ++ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) + + #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) + +-#define pmd_page_kernel(pmd) \ ++#define pmd_page_vaddr(pmd) \ + ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + + /* +@@ -414,8 +473,6 @@ extern pte_t *lookup_address(unsigned lo + static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} + #endif + +-extern void noexec_setup(const char *str); +- + #if defined(CONFIG_HIGHPTE) + #define pte_offset_map(dir, address) \ + ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \ +@@ -433,37 +490,15 @@ extern void noexec_setup(const char *str + #define pte_unmap_nested(pte) do { } while (0) + #endif + +-#define __HAVE_ARCH_PTEP_ESTABLISH +-#define ptep_establish(vma, address, ptep, pteval) \ +- do { \ +- if ( likely((vma)->vm_mm == current->mm) ) { \ +- BUG_ON(HYPERVISOR_update_va_mapping(address, \ +- pteval, \ +- (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \ +- UVMF_INVLPG|UVMF_MULTI)); \ +- } else { \ +- xen_l1_entry_update(ptep, pteval); \ +- flush_tlb_page(vma, address); \ +- } \ +- } while (0) ++/* Clear a kernel PTE and flush it from the TLB */ ++#define kpte_clear_flush(ptep, vaddr) \ ++ HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG) + + /* + * The i386 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. +- * +- * Also, we only update the dirty/accessed state if we set +- * the dirty bit by hand in the kernel, since the hardware +- * will do the accessed bit for us, and we don't want to +- * race with other CPU's that might be updating the dirty +- * bit at the same time. + */ + #define update_mmu_cache(vma,address,pte) do { } while (0) +-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +-#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ +- do { \ +- if (dirty) \ +- ptep_establish(vma, address, ptep, entry); \ +- } while (0) + + #include <xen/features.h> + void make_lowmem_page_readonly(void *va, unsigned int feature); +@@ -518,13 +553,6 @@ direct_remap_pfn_range(vma,from,pfn,size + #define GET_IOSPACE(pfn) 0 + #define GET_PFN(pfn) (pfn) + +-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY +-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL +-#define __HAVE_ARCH_PTEP_CLEAR_FLUSH +-#define __HAVE_ARCH_PTEP_SET_WRPROTECT +-#define __HAVE_ARCH_PTE_SAME + #include <asm-generic/pgtable.h> + + #endif /* _I386_PGTABLE_H */ +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/processor.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/processor.h 2007-09-03 09:52:56.000000000 +0200 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/processor.h 2007-10-22 13:53:08.000000000 +0200 +@@ -146,6 +146,18 @@ static inline void detect_ht(struct cpui + #define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */ + #define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */ + ++static inline void __cpuid(unsigned int *eax, unsigned int *ebx, ++ unsigned int *ecx, unsigned int *edx) ++{ ++ /* ecx is often an input as well as an output. */ ++ __asm__(XEN_CPUID ++ : "=a" (*eax), ++ "=b" (*ebx), ++ "=c" (*ecx), ++ "=d" (*edx) ++ : "0" (*eax), "2" (*ecx)); ++} ++ + /* + * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx +@@ -153,24 +165,18 @@ static inline void detect_ht(struct cpui + */ + static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) + { +- __asm__(XEN_CPUID +- : "=a" (*eax), +- "=b" (*ebx), +- "=c" (*ecx), +- "=d" (*edx) +- : "0" (op), "c"(0)); ++ *eax = op; ++ *ecx = 0; ++ __cpuid(eax, ebx, ecx, edx); + } + + /* Some CPUID calls want 'count' to be placed in ecx */ + static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, +- int *edx) ++ int *edx) + { +- __asm__(XEN_CPUID +- : "=a" (*eax), +- "=b" (*ebx), +- "=c" (*ecx), +- "=d" (*edx) +- : "0" (op), "c" (count)); ++ *eax = op; ++ *ecx = count; ++ __cpuid(eax, ebx, ecx, edx); + } + + /* +@@ -178,42 +184,30 @@ static inline void cpuid_count(int op, i + */ + static inline unsigned int cpuid_eax(unsigned int op) + { +- unsigned int eax; ++ unsigned int eax, ebx, ecx, edx; + +- __asm__(XEN_CPUID +- : "=a" (eax) +- : "0" (op) +- : "bx", "cx", "dx"); ++ cpuid(op, &eax, &ebx, &ecx, &edx); + return eax; + } + static inline unsigned int cpuid_ebx(unsigned int op) + { +- unsigned int eax, ebx; ++ unsigned int eax, ebx, ecx, edx; + +- __asm__(XEN_CPUID +- : "=a" (eax), "=b" (ebx) +- : "0" (op) +- : "cx", "dx" ); ++ cpuid(op, &eax, &ebx, &ecx, &edx); + return ebx; + } + static inline unsigned int cpuid_ecx(unsigned int op) + { +- unsigned int eax, ecx; ++ unsigned int eax, ebx, ecx, edx; + +- __asm__(XEN_CPUID +- : "=a" (eax), "=c" (ecx) +- : "0" (op) +- : "bx", "dx" ); ++ cpuid(op, &eax, &ebx, &ecx, &edx); + return ecx; + } + static inline unsigned int cpuid_edx(unsigned int op) + { +- unsigned int eax, edx; ++ unsigned int eax, ebx, ecx, edx; + +- __asm__(XEN_CPUID +- : "=a" (eax), "=d" (edx) +- : "0" (op) +- : "bx", "cx"); ++ cpuid(op, &eax, &ebx, &ecx, &edx); + return edx; + } + +@@ -315,6 +309,8 @@ static inline void __mwait(unsigned long + : :"a" (eax), "c" (ecx)); + } + ++extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); ++ + /* from system description table in BIOS. Mostly for MCA use, but + others may find it useful. */ + extern unsigned int machine_id; +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/ptrace.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/ptrace.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/ptrace.h 2007-10-22 13:53:08.000000000 +0200 +@@ -1,24 +1,7 @@ + #ifndef _I386_PTRACE_H + #define _I386_PTRACE_H + +-#define EBX 0 +-#define ECX 1 +-#define EDX 2 +-#define ESI 3 +-#define EDI 4 +-#define EBP 5 +-#define EAX 6 +-#define DS 7 +-#define ES 8 +-#define FS 9 +-#define GS 10 +-#define ORIG_EAX 11 +-#define EIP 12 +-#define CS 13 +-#define EFL 14 +-#define UESP 15 +-#define SS 16 +-#define FRAME_SIZE 17 ++#include <asm/ptrace-abi.h> + + /* this struct defines the way the registers are stored on the + stack during a system call. */ +@@ -41,25 +24,10 @@ struct pt_regs { + int xss; + }; + +-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +-#define PTRACE_GETREGS 12 +-#define PTRACE_SETREGS 13 +-#define PTRACE_GETFPREGS 14 +-#define PTRACE_SETFPREGS 15 +-#define PTRACE_GETFPXREGS 18 +-#define PTRACE_SETFPXREGS 19 +- +-#define PTRACE_OLDSETOPTIONS 21 +- +-#define PTRACE_GET_THREAD_AREA 25 +-#define PTRACE_SET_THREAD_AREA 26 +- +-#define PTRACE_SYSEMU 31 +-#define PTRACE_SYSEMU_SINGLESTEP 32 +- + #ifdef __KERNEL__ + + #include <asm/vm86.h> ++#include <asm/segment.h> + + struct task_struct; + extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); +@@ -73,18 +41,17 @@ extern void send_sigtrap(struct task_str + */ + static inline int user_mode(struct pt_regs *regs) + { +- return (regs->xcs & 2) != 0; ++ return (regs->xcs & SEGMENT_RPL_MASK) == USER_RPL; + } + static inline int user_mode_vm(struct pt_regs *regs) + { +- return ((regs->xcs & 2) | (regs->eflags & VM_MASK)) != 0; ++ return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL; + } ++ + #define instruction_pointer(regs) ((regs)->eip) +-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) ++#define regs_return_value(regs) ((regs)->eax) ++ + extern unsigned long profile_pc(struct pt_regs *regs); +-#else +-#define profile_pc(regs) instruction_pointer(regs) +-#endif + #endif /* __KERNEL__ */ + + #endif +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/segment.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/segment.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/segment.h 2007-10-22 13:53:08.000000000 +0200 +@@ -61,11 +61,9 @@ + + #define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE + 0) + #define __KERNEL_CS (GDT_ENTRY_KERNEL_CS * 8) +-#define GET_KERNEL_CS() (__KERNEL_CS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) ) + + #define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE + 1) + #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS * 8) +-#define GET_KERNEL_DS() (__KERNEL_DS | (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) ) + + #define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE + 4) + #define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE + 5) +@@ -85,6 +83,11 @@ + + #define GDT_SIZE (GDT_ENTRIES * 8) + ++/* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ ++#define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) ++/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ ++#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) ++ + /* Simple and small GDT entries for booting only */ + + #define GDT_ENTRY_BOOT_CS 2 +@@ -114,4 +117,16 @@ + */ + #define IDT_ENTRIES 256 + ++/* Bottom two bits of selector give the ring privilege level */ ++#define SEGMENT_RPL_MASK 0x3 ++/* Bit 2 is table indicator (LDT/GDT) */ ++#define SEGMENT_TI_MASK 0x4 ++ ++/* User mode is privilege level 3 */ ++#define USER_RPL 0x3 ++/* LDT segment has TI set, GDT has it cleared */ ++#define SEGMENT_LDT 0x4 ++#define SEGMENT_GDT 0x0 ++ ++#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1) + #endif +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/smp.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/smp.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/smp.h 2007-10-22 13:53:08.000000000 +0200 +@@ -79,25 +79,36 @@ static inline int hard_smp_processor_id( + return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID)); + } + #endif +- +-static __inline int logical_smp_processor_id(void) +-{ +- /* we don't want to mark this access volatile - bad code generation */ +- return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); +-} +- + #endif + ++extern int safe_smp_processor_id(void); + extern int __cpu_disable(void); + extern void __cpu_die(unsigned int cpu); + extern void prefill_possible_map(void); ++extern unsigned int num_processors; ++ + #endif /* !__ASSEMBLY__ */ + + #else /* CONFIG_SMP */ + ++#define safe_smp_processor_id() 0 + #define cpu_physical_id(cpu) boot_cpu_physical_apicid + + #define NO_PROC_ID 0xFF /* No processor magic marker */ + + #endif ++ ++#ifndef __ASSEMBLY__ ++ ++extern u8 apicid_2_node[]; ++ ++#ifdef CONFIG_X86_LOCAL_APIC ++static __inline int logical_smp_processor_id(void) ++{ ++ /* we don't want to mark this access volatile - bad code generation */ ++ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); ++} ++#endif ++#endif ++ + #endif +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/spinlock.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/spinlock.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/spinlock.h 2007-10-22 13:53:08.000000000 +0200 +@@ -4,8 +4,12 @@ + #include <asm/atomic.h> + #include <asm/rwlock.h> + #include <asm/page.h> ++#include <asm/processor.h> + #include <linux/compiler.h> + ++#define CLI_STRING "#cli" ++#define STI_STRING "#sti" ++ + /* + * Your basic SMP spinlocks, allowing only a single CPU anywhere + * +@@ -17,67 +21,64 @@ + * (the type definitions are in asm/spinlock_types.h) + */ + +-#define __raw_spin_is_locked(x) \ +- (*(volatile signed char *)(&(x)->slock) <= 0) +- +-#define __raw_spin_lock_string \ +- "\n1:\t" \ +- LOCK_PREFIX " ; decb %0\n\t" \ +- "jns 3f\n" \ +- "2:\t" \ +- "rep;nop\n\t" \ +- "cmpb $0,%0\n\t" \ +- "jle 2b\n\t" \ +- "jmp 1b\n" \ +- "3:\n\t" +- +-/* +- * NOTE: there's an irqs-on section here, which normally would have to be +- * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use +- * __raw_spin_lock_string_flags(). +- */ +-#define __raw_spin_lock_string_flags \ +- "\n1:\t" \ +- LOCK_PREFIX " ; decb %0\n\t" \ +- "jns 5f\n" \ +- "2:\t" \ +- "testl $0x200, %1\n\t" \ +- "jz 4f\n\t" \ +- "#sti\n" \ +- "3:\t" \ +- "rep;nop\n\t" \ +- "cmpb $0, %0\n\t" \ +- "jle 3b\n\t" \ +- "#cli\n\t" \ +- "jmp 1b\n" \ +- "4:\t" \ +- "rep;nop\n\t" \ +- "cmpb $0, %0\n\t" \ +- "jg 1b\n\t" \ +- "jmp 4b\n" \ +- "5:\n\t" ++static inline int __raw_spin_is_locked(raw_spinlock_t *x) ++{ ++ return *(volatile signed char *)(&(x)->slock) <= 0; ++} + + static inline void __raw_spin_lock(raw_spinlock_t *lock) + { +- asm(__raw_spin_lock_string : "+m" (lock->slock) : : "memory"); ++ asm volatile("\n1:\n" \ ++ LOCK_PREFIX "decb %0\n\t" ++ "jns 3f\n" ++ "2:\t" ++ "rep;nop\n\t" ++ "cmpb $0,%0\n\t" ++ "jle 2b\n\t" ++ "jmp 1b\n" ++ "3:\n\t" ++ : "+m" (lock->slock) : : "memory"); + } + + /* + * It is easier for the lock validator if interrupts are not re-enabled + * in the middle of a lock-acquire. This is a performance feature anyway + * so we turn it off: ++ * ++ * NOTE: there's an irqs-on section here, which normally would have to be ++ * irq-traced, but on CONFIG_TRACE_IRQFLAGS we never use this variant. + */ + #ifndef CONFIG_PROVE_LOCKING + static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) + { +- asm(__raw_spin_lock_string_flags : "+m" (lock->slock) : "r" (flags) : "memory"); ++ asm volatile( ++ "\n1:\t" ++ LOCK_PREFIX "decb %0\n\t" ++ "jns 5f\n" ++ "2:\t" ++ "testl $0x200, %1\n\t" ++ "jz 4f\n\t" ++ STI_STRING "\n" ++ "3:\t" ++ "rep;nop\n\t" ++ "cmpb $0, %0\n\t" ++ "jle 3b\n\t" ++ CLI_STRING "\n\t" ++ "jmp 1b\n" ++ "4:\t" ++ "rep;nop\n\t" ++ "cmpb $0, %0\n\t" ++ "jg 1b\n\t" ++ "jmp 4b\n" ++ "5:\n\t" ++ : "+m" (lock->slock) : "r" (flags) : "memory"); + } + #endif + + static inline int __raw_spin_trylock(raw_spinlock_t *lock) + { + char oldval; +- __asm__ __volatile__( ++ asm volatile( + "xchgb %b0,%1" + :"=q" (oldval), "+m" (lock->slock) + :"0" (0) : "memory"); +@@ -93,38 +94,29 @@ static inline int __raw_spin_trylock(raw + + #if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE) + +-#define __raw_spin_unlock_string \ +- "movb $1,%0" \ +- :"+m" (lock->slock) : : "memory" +- +- + static inline void __raw_spin_unlock(raw_spinlock_t *lock) + { +- __asm__ __volatile__( +- __raw_spin_unlock_string +- ); ++ asm volatile("movb $1,%0" : "+m" (lock->slock) :: "memory"); + } + + #else + +-#define __raw_spin_unlock_string \ +- "xchgb %b0, %1" \ +- :"=q" (oldval), "+m" (lock->slock) \ +- :"0" (oldval) : "memory" +- + static inline void __raw_spin_unlock(raw_spinlock_t *lock) + { + char oldval = 1; + +- __asm__ __volatile__( +- __raw_spin_unlock_string +- ); ++ asm volatile("xchgb %b0, %1" ++ : "=q" (oldval), "+m" (lock->slock) ++ : "0" (oldval) : "memory"); + } + + #endif + +-#define __raw_spin_unlock_wait(lock) \ +- do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) ++static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) ++{ ++ while (__raw_spin_is_locked(lock)) ++ cpu_relax(); ++} + + /* + * Read-write spinlocks, allowing multiple readers +@@ -151,22 +143,36 @@ static inline void __raw_spin_unlock(raw + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +-#define __raw_read_can_lock(x) ((int)(x)->lock > 0) ++static inline int __raw_read_can_lock(raw_rwlock_t *x) ++{ ++ return (int)(x)->lock > 0; ++} + + /** + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +-#define __raw_write_can_lock(x) ((x)->lock == RW_LOCK_BIAS) ++static inline int __raw_write_can_lock(raw_rwlock_t *x) ++{ ++ return (x)->lock == RW_LOCK_BIAS; ++} + + static inline void __raw_read_lock(raw_rwlock_t *rw) + { +- __build_read_lock(rw, "__read_lock_failed"); ++ asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" ++ "jns 1f\n" ++ "call __read_lock_failed\n\t" ++ "1:\n" ++ ::"a" (rw) : "memory"); + } + + static inline void __raw_write_lock(raw_rwlock_t *rw) + { +- __build_write_lock(rw, "__write_lock_failed"); ++ asm volatile(LOCK_PREFIX " subl $" RW_LOCK_BIAS_STR ",(%0)\n\t" ++ "jz 1f\n" ++ "call __write_lock_failed\n\t" ++ "1:\n" ++ ::"a" (rw) : "memory"); + } + + static inline int __raw_read_trylock(raw_rwlock_t *lock) +@@ -199,4 +205,8 @@ static inline void __raw_write_unlock(ra + : "+m" (rw->lock) : : "memory"); + } + ++#define _raw_spin_relax(lock) cpu_relax() ++#define _raw_read_relax(lock) cpu_relax() ++#define _raw_write_relax(lock) cpu_relax() ++ + #endif /* __ASM_SPINLOCK_H */ +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/system.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/system.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/system.h 2007-10-22 13:53:08.000000000 +0200 +@@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsig + #define cmpxchg(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) ++#define sync_cmpxchg(ptr,o,n)\ ++ ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\ ++ (unsigned long)(n),sizeof(*(ptr)))) + #endif + + static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, +@@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(vo + return old; + } + ++/* ++ * Always use locked operations when touching memory shared with a ++ * hypervisor, since the system may be SMP even if the guest kernel ++ * isn't. ++ */ ++static inline unsigned long __sync_cmpxchg(volatile void *ptr, ++ unsigned long old, ++ unsigned long new, int size) ++{ ++ unsigned long prev; ++ switch (size) { ++ case 1: ++ __asm__ __volatile__("lock; cmpxchgb %b1,%2" ++ : "=a"(prev) ++ : "q"(new), "m"(*__xg(ptr)), "0"(old) ++ : "memory"); ++ return prev; ++ case 2: ++ __asm__ __volatile__("lock; cmpxchgw %w1,%2" ++ : "=a"(prev) ++ : "r"(new), "m"(*__xg(ptr)), "0"(old) ++ : "memory"); ++ return prev; ++ case 4: ++ __asm__ __volatile__("lock; cmpxchgl %1,%2" ++ : "=a"(prev) ++ : "r"(new), "m"(*__xg(ptr)), "0"(old) ++ : "memory"); ++ return prev; ++ } ++ return old; ++} ++ + #ifndef CONFIG_X86_CMPXCHG + /* + * Building a kernel capable running on 80386. It may be necessary to +Index: 10.3-2007-11-26/include/asm-i386/mach-xen/asm/tlbflush.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-i386/mach-xen/asm/tlbflush.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-i386/mach-xen/asm/tlbflush.h 2007-10-22 13:53:08.000000000 +0200 +@@ -8,8 +8,6 @@ + #define __flush_tlb_global() xen_tlb_flush() + #define __flush_tlb_all() xen_tlb_flush() + +-extern unsigned long pgkern_mask; +- + #define cpu_has_invlpg (boot_cpu_data.x86 > 3) + + #define __flush_tlb_single(addr) xen_invlpg(addr) +Index: 10.3-2007-11-26/include/asm-x86_64/acpi.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/acpi.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/acpi.h 2007-10-22 13:53:08.000000000 +0200 +@@ -136,7 +136,9 @@ extern void acpi_reserve_bootmem(void); + extern int acpi_disabled; + extern int acpi_pci_disabled; + ++#ifndef CONFIG_XEN + #define ARCH_HAS_POWER_INIT 1 ++#endif + + extern int acpi_skip_timer_override; + extern int acpi_use_timer_override; +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/arch_hooks.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/arch_hooks.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/arch_hooks.h 2007-10-22 13:53:08.000000000 +0200 +@@ -14,7 +14,7 @@ + extern void init_ISA_irqs(void); + extern void apic_intr_init(void); + extern void smp_intr_init(void); +-extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs); ++extern irqreturn_t timer_interrupt(int irq, void *dev_id); + + /* these are the defined hooks */ + extern void intr_init_hook(void); +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/dma-mapping.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/dma-mapping.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/dma-mapping.h 2007-10-22 13:53:08.000000000 +0200 +@@ -55,13 +55,6 @@ extern dma_addr_t bad_dma_address; + extern struct dma_mapping_ops* dma_ops; + extern int iommu_merge; + +-static inline int valid_dma_direction(int dma_direction) +-{ +- return ((dma_direction == DMA_BIDIRECTIONAL) || +- (dma_direction == DMA_TO_DEVICE) || +- (dma_direction == DMA_FROM_DEVICE)); +-} +- + #if 0 + static inline int dma_mapping_error(dma_addr_t dma_addr) + { +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/e820.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/e820.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/e820.h 2007-10-22 13:53:08.000000000 +0200 +@@ -19,13 +19,9 @@ + + #define E820_RAM 1 + #define E820_RESERVED 2 +-#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */ ++#define E820_ACPI 3 + #define E820_NVS 4 + +-#define HIGH_MEMORY (1024*1024) +- +-#define LOWMEMSIZE() (0x9f000) +- + #ifndef __ASSEMBLY__ + struct e820entry { + u64 addr; /* start of memory segment */ +@@ -46,17 +42,16 @@ extern void setup_memory_region(void); + extern void contig_e820_setup(void); + extern unsigned long e820_end_of_ram(void); + extern void e820_reserve_resources(struct e820entry *e820, int nr_map); ++extern void e820_mark_nosave_regions(void); + extern void e820_print_map(char *who); + extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); + extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); + +-extern void e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end); + extern void e820_setup_gap(struct e820entry *e820, int nr_map); +-extern unsigned long e820_hole_size(unsigned long start_pfn, +- unsigned long end_pfn); ++extern void e820_register_active_regions(int nid, ++ unsigned long start_pfn, unsigned long end_pfn); + +-extern void __init parse_memopt(char *p, char **end); +-extern void __init parse_memmapopt(char *p, char **end); ++extern void finish_e820_parsing(void); + + extern struct e820map e820; + +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/fixmap.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/fixmap.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/fixmap.h 2007-10-22 13:53:08.000000000 +0200 +@@ -41,7 +41,7 @@ enum fixed_addresses { + #ifdef CONFIG_X86_LOCAL_APIC + FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ + #endif +-#ifdef CONFIG_X86_IO_APIC ++#ifndef CONFIG_XEN + FIX_IO_APIC_BASE_0, + FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1, + #endif +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/floppy.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/floppy.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/floppy.h 2007-10-22 13:53:08.000000000 +0200 +@@ -54,7 +54,7 @@ static char *virtual_dma_addr; + static int virtual_dma_mode; + static int doing_pdma; + +-static irqreturn_t floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) ++static irqreturn_t floppy_hardint(int irq, void *dev_id) + { + register unsigned char st; + +@@ -66,7 +66,7 @@ static irqreturn_t floppy_hardint(int ir + static int dma_wait=0; + #endif + if (!doing_pdma) +- return floppy_interrupt(irq, dev_id, regs); ++ return floppy_interrupt(irq, dev_id); + + #ifdef TRACE_FLPY_INT + if(!calls) +@@ -109,7 +109,7 @@ static irqreturn_t floppy_hardint(int ir + dma_wait=0; + #endif + doing_pdma = 0; +- floppy_interrupt(irq, dev_id, regs); ++ floppy_interrupt(irq, dev_id); + return IRQ_HANDLED; + } + #ifdef TRACE_FLPY_INT +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/hw_irq.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/hw_irq.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/hw_irq.h 2007-10-22 13:53:08.000000000 +0200 +@@ -19,8 +19,7 @@ + #include <asm/irq.h> + #include <linux/profile.h> + #include <linux/smp.h> +- +-struct hw_interrupt_type; ++#include <linux/percpu.h> + #endif + + #define NMI_VECTOR 0x02 +@@ -77,9 +76,10 @@ struct hw_interrupt_type; + + + #ifndef __ASSEMBLY__ +-extern u8 irq_vector[NR_IRQ_VECTORS]; +-#define IO_APIC_VECTOR(irq) (irq_vector[irq]) +-#define AUTO_ASSIGN -1 ++typedef int vector_irq_t[NR_VECTORS]; ++DECLARE_PER_CPU(vector_irq_t, vector_irq); ++extern void __setup_vector_irq(int cpu); ++extern spinlock_t vector_lock; + + /* + * Various low-level irq details needed by irq.c, process.c, +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/io.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/io.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/io.h 2007-10-22 13:53:08.000000000 +0200 +@@ -273,33 +273,6 @@ void memset_io(volatile void __iomem *a, + + #define eth_io_copy_and_sum(a,b,c,d) eth_copy_and_sum((a),(void *)(b),(c),(d)) + +-/** +- * check_signature - find BIOS signatures +- * @io_addr: mmio address to check +- * @signature: signature block +- * @length: length of signature +- * +- * Perform a signature comparison with the mmio address io_addr. This +- * address should have been obtained by ioremap. +- * Returns 1 on a match. +- */ +- +-static inline int check_signature(void __iomem *io_addr, +- const unsigned char *signature, int length) +-{ +- int retval = 0; +- do { +- if (readb(io_addr) != *signature) +- goto out; +- io_addr++; +- signature++; +- length--; +- } while (length); +- retval = 1; +-out: +- return retval; +-} +- + /* Nothing to do */ + + #define dma_cache_inv(_start,_size) do { } while (0) +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/msr.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/msr.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/msr.h 2007-10-22 13:53:08.000000000 +0200 +@@ -66,14 +66,25 @@ + #define rdtscl(low) \ + __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx") + ++#define rdtscp(low,high,aux) \ ++ asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (low), "=d" (high), "=c" (aux)) ++ + #define rdtscll(val) do { \ + unsigned int __a,__d; \ + asm volatile("rdtsc" : "=a" (__a), "=d" (__d)); \ + (val) = ((unsigned long)__a) | (((unsigned long)__d)<<32); \ + } while(0) + ++#define rdtscpll(val, aux) do { \ ++ unsigned long __a, __d; \ ++ asm volatile (".byte 0x0f,0x01,0xf9" : "=a" (__a), "=d" (__d), "=c" (aux)); \ ++ (val) = (__d << 32) | __a; \ ++} while (0) ++ + #define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + ++#define write_rdtscp_aux(val) wrmsr(0xc0000103, val, 0) ++ + #define rdpmc(counter,low,high) \ + __asm__ __volatile__("rdpmc" \ + : "=a" (low), "=d" (high) \ +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/nmi.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/nmi.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/nmi.h 2007-10-22 13:53:08.000000000 +0200 +@@ -9,24 +9,13 @@ + + #include <xen/interface/nmi.h> + +-struct pt_regs; +- +-typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu); +- +-/** +- * set_nmi_callback +- * +- * Set a handler for an NMI. Only one handler may be +- * set. Return 1 if the NMI was handled. +- */ +-void set_nmi_callback(nmi_callback_t callback); +- + /** +- * unset_nmi_callback ++ * do_nmi_callback + * +- * Remove the handler previously set. ++ * Check to see if a callback exists and execute it. Return 1 ++ * if the handler exists and was handled successfully. + */ +-void unset_nmi_callback(void); ++int do_nmi_callback(struct pt_regs *regs, int cpu); + + #ifdef CONFIG_PM + +@@ -50,7 +39,7 @@ static inline void unset_nmi_pm_callback + #endif /* CONFIG_PM */ + + extern void default_do_nmi(struct pt_regs *); +-extern void die_nmi(char *str, struct pt_regs *regs); ++extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); + + static inline unsigned char get_nmi_reason(void) + { +@@ -70,19 +59,26 @@ static inline unsigned char get_nmi_reas + + extern int panic_on_timeout; + extern int unknown_nmi_panic; ++extern int nmi_watchdog_enabled; + + extern int check_nmi_watchdog(void); +- +-extern void setup_apic_nmi_watchdog (void); +-extern int reserve_lapic_nmi(void); +-extern void release_lapic_nmi(void); ++extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); ++extern int avail_to_resrv_perfctr_nmi(unsigned int); ++extern int reserve_perfctr_nmi(unsigned int); ++extern void release_perfctr_nmi(unsigned int); ++extern int reserve_evntsel_nmi(unsigned int); ++extern void release_evntsel_nmi(unsigned int); ++ ++extern void setup_apic_nmi_watchdog (void *); ++extern void stop_apic_nmi_watchdog (void *); + extern void disable_timer_nmi_watchdog(void); + extern void enable_timer_nmi_watchdog(void); +-extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); ++extern int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason); + + extern void nmi_watchdog_default(void); + extern int setup_nmi_watchdog(char *); + ++extern atomic_t nmi_active; + extern unsigned int nmi_watchdog; + #define NMI_DEFAULT -1 + #define NMI_NONE 0 +@@ -90,4 +86,11 @@ extern unsigned int nmi_watchdog; + #define NMI_LOCAL_APIC 2 + #define NMI_INVALID 3 + ++struct ctl_table; ++struct file; ++extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, ++ void __user *, size_t *, loff_t *); ++ ++extern int unknown_nmi_panic; ++ + #endif /* ASM_NMI_H */ +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/pgtable.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/pgtable.h 2007-10-22 13:53:08.000000000 +0200 +@@ -44,12 +44,9 @@ extern unsigned long __supported_pte_mas + + #define swapper_pg_dir init_level4_pgt + +-extern int nonx_setup(char *str); + extern void paging_init(void); + extern void clear_kernel_mapping(unsigned long addr, unsigned long size); + +-extern unsigned long pgkern_mask; +- + /* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. +@@ -115,9 +112,6 @@ static inline void pgd_clear (pgd_t * pg + set_pgd(__user_pgd(pgd), __pgd(0)); + } + +-#define pud_page(pud) \ +- ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK)) +- + #define pte_same(a, b) ((a).pte == (b).pte) + + #define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK)) +@@ -326,7 +320,7 @@ static inline pte_t ptep_get_and_clear_f + #define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT) + static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; } + static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; } +-static inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; } ++static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); } + static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; } + static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; } + static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; } +@@ -339,29 +333,12 @@ static inline pte_t pte_mkclean(pte_t pt + static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } + static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; } + static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; } +-static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; } ++static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; } + static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; } + static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; } + static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; } + static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; } +- +-#define ptep_test_and_clear_dirty(vma, addr, ptep) \ +-({ \ +- pte_t __pte = *(ptep); \ +- int __ret = pte_dirty(__pte); \ +- if (__ret) \ +- set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \ +- __ret; \ +-}) +- +-#define ptep_test_and_clear_young(vma, addr, ptep) \ +-({ \ +- pte_t __pte = *(ptep); \ +- int __ret = pte_young(__pte); \ +- if (__ret) \ +- set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \ +- __ret; \ +-}) ++static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; } + + static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) + { +@@ -389,7 +366,8 @@ static inline int pmd_large(pmd_t pte) { + * Level 4 access. + * Never use these in the common code. + */ +-#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK)) ++#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK)) ++#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)) + #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) + #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) + #define pgd_offset_k(address) (pgd_t *)(init_level4_pgt + pgd_index(address)) +@@ -398,16 +376,18 @@ static inline int pmd_large(pmd_t pte) { + + /* PUD - Level3 access */ + /* to find an entry in a page-table-directory. */ ++#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK)) ++#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT)) + #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) +-#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address)) ++#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address)) + #define pud_present(pud) (pud_val(pud) & _PAGE_PRESENT) + + /* PMD - Level 2 access */ +-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) ++#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK)) + #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) + + #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) +-#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \ ++#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \ + pmd_index(address)) + #define pmd_none(x) (!pmd_val(x)) + #if CONFIG_XEN_COMPAT <= 0x030002 +@@ -438,6 +418,7 @@ static inline pte_t mk_pte_phys(unsigned + { + unsigned long pteval; + pteval = physpage | pgprot_val(pgprot); ++ pteval &= __supported_pte_mask; + return __pte(pteval); + } + +@@ -459,7 +440,7 @@ static inline pte_t pte_modify(pte_t pte + + #define pte_index(address) \ + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \ ++#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \ + pte_index(address)) + + /* x86-64 always has all page tables mapped. */ +@@ -500,6 +481,40 @@ static inline pte_t pte_modify(pte_t pte + ptep_establish(vma, address, ptep, entry); \ + } while (0) + ++ ++/* ++ * i386 says: We don't actually have these, but we want to advertise ++ * them so that we can encompass the flush here. ++ */ ++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY ++#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG ++ ++#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH ++#define ptep_clear_flush_dirty(vma, address, ptep) \ ++({ \ ++ pte_t __pte = *(ptep); \ ++ int __dirty = pte_dirty(__pte); \ ++ __pte = pte_mkclean(__pte); \ ++ if ((vma)->vm_mm->context.pinned) \ ++ ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \ ++ else if (__dirty) \ ++ set_pte(ptep, __pte); \ ++ __dirty; \ ++}) ++ ++#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH ++#define ptep_clear_flush_young(vma, address, ptep) \ ++({ \ ++ pte_t __pte = *(ptep); \ ++ int __young = pte_young(__pte); \ ++ __pte = pte_mkold(__pte); \ ++ if ((vma)->vm_mm->context.pinned) \ ++ ptep_set_access_flags(vma, address, ptep, __pte, __young); \ ++ else if (__young) \ ++ set_pte(ptep, __pte); \ ++ __young; \ ++}) ++ + /* Encode and de-code a swap entry */ + #define __swp_type(x) (((x).val >> 1) & 0x3f) + #define __swp_offset(x) ((x).val >> 8) +@@ -560,8 +575,6 @@ int touch_pte_range(struct mm_struct *mm + #define kc_offset_to_vaddr(o) \ + (((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o)) + +-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY + #define __HAVE_ARCH_PTEP_GET_AND_CLEAR + #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL + #define __HAVE_ARCH_PTEP_CLEAR_FLUSH +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/processor.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/processor.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/processor.h 2007-10-22 13:53:08.000000000 +0200 +@@ -488,6 +488,8 @@ static inline void __mwait(unsigned long + : :"a" (eax), "c" (ecx)); + } + ++extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); ++ + #define stack_current() \ + ({ \ + struct thread_info *ti; \ +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/ptrace.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/ptrace.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/ptrace.h 2007-10-22 13:53:08.000000000 +0200 +@@ -1,40 +1,9 @@ + #ifndef _X86_64_PTRACE_H + #define _X86_64_PTRACE_H + +-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS) +-#define R15 0 +-#define R14 8 +-#define R13 16 +-#define R12 24 +-#define RBP 32 +-#define RBX 40 +-/* arguments: interrupts/non tracing syscalls only save upto here*/ +-#define R11 48 +-#define R10 56 +-#define R9 64 +-#define R8 72 +-#define RAX 80 +-#define RCX 88 +-#define RDX 96 +-#define RSI 104 +-#define RDI 112 +-#define ORIG_RAX 120 /* = ERROR */ +-/* end of arguments */ +-/* cpu exception frame or undefined in case of fast syscall. */ +-#define RIP 128 +-#define CS 136 +-#define EFLAGS 144 +-#define RSP 152 +-#define SS 160 +-#define ARGOFFSET R11 +-#endif /* __ASSEMBLY__ */ ++#include <asm/ptrace-abi.h> + +-/* top of stack page */ +-#define FRAME_SIZE 168 +- +-#define PTRACE_OLDSETOPTIONS 21 +- +-#ifndef __ASSEMBLY__ ++#ifndef __ASSEMBLY__ + + struct pt_regs { + unsigned long r15; +@@ -45,7 +14,7 @@ struct pt_regs { + unsigned long rbx; + /* arguments: non interrupts/non tracing syscalls only save upto here*/ + unsigned long r11; +- unsigned long r10; ++ unsigned long r10; + unsigned long r9; + unsigned long r8; + unsigned long rax; +@@ -54,41 +23,25 @@ struct pt_regs { + unsigned long rsi; + unsigned long rdi; + unsigned long orig_rax; +-/* end of arguments */ ++/* end of arguments */ + /* cpu exception frame or undefined */ + unsigned long rip; + unsigned long cs; +- unsigned long eflags; +- unsigned long rsp; ++ unsigned long eflags; ++ unsigned long rsp; + unsigned long ss; +-/* top of stack page */ ++/* top of stack page */ + }; + + #endif + +-/* Arbitrarily choose the same ptrace numbers as used by the Sparc code. */ +-#define PTRACE_GETREGS 12 +-#define PTRACE_SETREGS 13 +-#define PTRACE_GETFPREGS 14 +-#define PTRACE_SETFPREGS 15 +-#define PTRACE_GETFPXREGS 18 +-#define PTRACE_SETFPXREGS 19 +- +-/* only useful for access 32bit programs */ +-#define PTRACE_GET_THREAD_AREA 25 +-#define PTRACE_SET_THREAD_AREA 26 +- +-#define PTRACE_ARCH_PRCTL 30 /* arch_prctl for child */ +- + #if defined(__KERNEL__) && !defined(__ASSEMBLY__) + #define user_mode(regs) (!!((regs)->cs & 3)) + #define user_mode_vm(regs) user_mode(regs) + #define instruction_pointer(regs) ((regs)->rip) +-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) ++#define regs_return_value(regs) ((regs)->rax) ++ + extern unsigned long profile_pc(struct pt_regs *regs); +-#else +-#define profile_pc(regs) instruction_pointer(regs) +-#endif + + #include <linux/compiler.h> + +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/smp.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/smp.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/smp.h 2007-10-22 13:53:08.000000000 +0200 +@@ -4,15 +4,12 @@ + /* + * We need the APIC definitions automatically as part of 'smp.h' + */ +-#ifndef __ASSEMBLY__ + #include <linux/threads.h> + #include <linux/cpumask.h> + #include <linux/bitops.h> + extern int disable_apic; +-#endif + + #ifdef CONFIG_X86_LOCAL_APIC +-#ifndef __ASSEMBLY__ + #include <asm/fixmap.h> + #include <asm/mpspec.h> + #ifdef CONFIG_X86_IO_APIC +@@ -21,10 +18,8 @@ extern int disable_apic; + #include <asm/apic.h> + #include <asm/thread_info.h> + #endif +-#endif + + #ifdef CONFIG_SMP +-#ifndef ASSEMBLY + + #include <asm/pda.h> + +@@ -41,14 +36,11 @@ extern cpumask_t cpu_initialized; + + extern void smp_alloc_memory(void); + extern volatile unsigned long smp_invalidate_needed; +-extern int pic_mode; + extern void lock_ipi_call_lock(void); + extern void unlock_ipi_call_lock(void); + extern int smp_num_siblings; + extern void smp_send_reschedule(int cpu); + void smp_stop_cpu(void); +-extern int smp_call_function_single(int cpuid, void (*func) (void *info), +- void *info, int retry, int wait); + + extern cpumask_t cpu_sibling_map[NR_CPUS]; + extern cpumask_t cpu_core_map[NR_CPUS]; +@@ -77,20 +69,16 @@ static inline int hard_smp_processor_id( + } + #endif + +-extern int safe_smp_processor_id(void); + extern int __cpu_disable(void); + extern void __cpu_die(unsigned int cpu); + extern void prefill_possible_map(void); + extern unsigned num_processors; + extern unsigned disabled_cpus; + +-#endif /* !ASSEMBLY */ +- + #define NO_PROC_ID 0xFF /* No processor magic marker */ + + #endif + +-#ifndef ASSEMBLY + /* + * Some lowlevel functions might want to know about + * the real APIC ID <-> CPU # mapping. +@@ -114,11 +102,8 @@ static inline int cpu_present_to_apicid( + } + #endif + +-#endif /* !ASSEMBLY */ +- + #ifndef CONFIG_SMP + #define stack_smp_processor_id() 0 +-#define safe_smp_processor_id() 0 + #define cpu_logical_map(x) (x) + #else + #include <asm/thread_info.h> +@@ -130,7 +115,6 @@ static inline int cpu_present_to_apicid( + }) + #endif + +-#ifndef __ASSEMBLY__ + #ifdef CONFIG_X86_LOCAL_APIC + static __inline int logical_smp_processor_id(void) + { +@@ -138,13 +122,18 @@ static __inline int logical_smp_processo + return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR)); + } + #endif +-#endif + + #ifdef CONFIG_SMP + #define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] + #else + #define cpu_physical_id(cpu) boot_cpu_id +-#endif +- ++static inline int smp_call_function_single(int cpuid, void (*func) (void *info), ++ void *info, int retry, int wait) ++{ ++ /* Disable interrupts here? */ ++ func(info); ++ return 0; ++} ++#endif /* !CONFIG_SMP */ + #endif + +Index: 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/system.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/mach-xen/asm/system.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/mach-xen/asm/system.h 2007-10-22 13:53:08.000000000 +0200 +@@ -24,12 +24,13 @@ + #define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" + + /* frame pointer must be last for get_wchan */ +-#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t" +-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\n\t" ++#define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" ++#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp ; popf\t" + + #define __EXTRA_CLOBBER \ + ,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15" + ++/* Save restore flags to clear handle leaking NT */ + #define switch_to(prev,next,last) \ + asm volatile(SAVE_CONTEXT \ + "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ +Index: 10.3-2007-11-26/include/asm-x86_64/thread_info.h +=================================================================== +--- 10.3-2007-11-26.orig/include/asm-x86_64/thread_info.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/asm-x86_64/thread_info.h 2007-10-22 13:53:08.000000000 +0200 +@@ -147,7 +147,11 @@ static inline struct thread_info *stack_ + #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) + + /* flags to check in __switch_to() */ ++#ifndef CONFIG_XEN + #define _TIF_WORK_CTXSW (_TIF_DEBUG|_TIF_IO_BITMAP) ++#else ++#define _TIF_WORK_CTXSW _TIF_DEBUG ++#endif + + #define PREEMPT_ACTIVE 0x10000000 + +Index: 10.3-2007-11-26/include/linux/skbuff.h +=================================================================== +--- 10.3-2007-11-26.orig/include/linux/skbuff.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/linux/skbuff.h 2007-10-22 13:53:08.000000000 +0200 +@@ -1729,5 +1729,11 @@ static inline void skb_forward_csum(stru + skb->ip_summed = CHECKSUM_NONE; + } + ++#ifdef CONFIG_XEN ++int skb_checksum_setup(struct sk_buff *skb); ++#else ++static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; } ++#endif ++ + #endif /* __KERNEL__ */ + #endif /* _LINUX_SKBUFF_H */ +Index: 10.3-2007-11-26/include/xen/evtchn.h +=================================================================== +--- 10.3-2007-11-26.orig/include/xen/evtchn.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/xen/evtchn.h 2007-10-22 13:53:08.000000000 +0200 +@@ -54,34 +54,34 @@ + */ + int bind_caller_port_to_irqhandler( + unsigned int caller_port, +- irqreturn_t (*handler)(int, void *, struct pt_regs *), ++ irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); + int bind_listening_port_to_irqhandler( + unsigned int remote_domain, +- irqreturn_t (*handler)(int, void *, struct pt_regs *), ++ irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); + int bind_interdomain_evtchn_to_irqhandler( + unsigned int remote_domain, + unsigned int remote_port, +- irqreturn_t (*handler)(int, void *, struct pt_regs *), ++ irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); + int bind_virq_to_irqhandler( + unsigned int virq, + unsigned int cpu, +- irqreturn_t (*handler)(int, void *, struct pt_regs *), ++ irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); + int bind_ipi_to_irqhandler( + unsigned int ipi, + unsigned int cpu, +- irqreturn_t (*handler)(int, void *, struct pt_regs *), ++ irq_handler_t handler, + unsigned long irqflags, + const char *devname, + void *dev_id); +Index: 10.3-2007-11-26/include/xen/xencons.h +=================================================================== +--- 10.3-2007-11-26.orig/include/xen/xencons.h 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/include/xen/xencons.h 2007-10-22 13:53:08.000000000 +0200 +@@ -8,7 +8,7 @@ void xencons_force_flush(void); + void xencons_resume(void); + + /* Interrupt work hooks. Receive data, or kick data out. */ +-void xencons_rx(char *buf, unsigned len, struct pt_regs *regs); ++void xencons_rx(char *buf, unsigned len); + void xencons_tx(void); + + int xencons_ring_init(void); +Index: 10.3-2007-11-26/net/core/dev.c +=================================================================== +--- 10.3-2007-11-26.orig/net/core/dev.c 2007-12-06 17:27:35.000000000 +0100 ++++ 10.3-2007-11-26/net/core/dev.c 2007-10-22 13:53:08.000000000 +0200 +@@ -1487,15 +1487,13 @@ inline int skb_checksum_setup(struct sk_ + } + if ((skb->h.raw + skb->csum + 2) > skb->tail) + goto out; +- skb->ip_summed = CHECKSUM_HW; ++ skb->ip_summed = CHECKSUM_PARTIAL; + skb->proto_csum_blank = 0; + } + return 0; + out: + return -EPROTO; + } +-#else +-inline int skb_checksum_setup(struct sk_buff *skb) { return 0; } + #endif + + +@@ -1928,7 +1926,7 @@ int netif_receive_skb(struct sk_buff *sk + case CHECKSUM_UNNECESSARY: + skb->proto_data_valid = 1; + break; +- case CHECKSUM_HW: ++ case CHECKSUM_PARTIAL: + /* XXX Implement me. */ + default: + skb->proto_data_valid = 0; |