diff -urNp linux-2.6.21.5/arch/alpha/kernel/module.c linux-2.6.21.5/arch/alpha/kernel/module.c --- linux-2.6.21.5/arch/alpha/kernel/module.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/alpha/kernel/module.c 2007-05-24 22:04:52.000000000 -0400 @@ -177,7 +177,7 @@ apply_relocate_add(Elf64_Shdr *sechdrs, /* The small sections were sorted to the end of the segment. The following should definitely cover them. */ - gp = (u64)me->module_core + me->core_size - 0x8000; + gp = (u64)me->module_core_rw + me->core_size_rw - 0x8000; got = sechdrs[me->arch.gotsecindex].sh_addr; for (i = 0; i < n; i++) { diff -urNp linux-2.6.21.5/arch/alpha/kernel/osf_sys.c linux-2.6.21.5/arch/alpha/kernel/osf_sys.c --- linux-2.6.21.5/arch/alpha/kernel/osf_sys.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/alpha/kernel/osf_sys.c 2007-05-24 22:04:52.000000000 -0400 @@ -1277,6 +1277,10 @@ arch_get_unmapped_area(struct file *filp merely specific addresses, but regions of memory -- perhaps this feature should be incorporated into all ports? */ +#ifdef CONFIG_PAX_RANDMMAP + if (!(current->mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { addr = arch_get_unmapped_area_1 (PAGE_ALIGN(addr), len, limit); if (addr != (unsigned long) -ENOMEM) @@ -1284,8 +1288,8 @@ arch_get_unmapped_area(struct file *filp } /* Next, try allocating at TASK_UNMAPPED_BASE. */ - addr = arch_get_unmapped_area_1 (PAGE_ALIGN(TASK_UNMAPPED_BASE), - len, limit); + addr = arch_get_unmapped_area_1 (PAGE_ALIGN(current->mm->mmap_base), len, limit); + if (addr != (unsigned long) -ENOMEM) return addr; diff -urNp linux-2.6.21.5/arch/alpha/kernel/ptrace.c linux-2.6.21.5/arch/alpha/kernel/ptrace.c --- linux-2.6.21.5/arch/alpha/kernel/ptrace.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/alpha/kernel/ptrace.c 2007-05-24 22:04:52.000000000 -0400 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -283,6 +284,9 @@ do_sys_ptrace(long request, long pid, lo goto out_notsk; } + if (gr_handle_ptrace(child, request)) + goto out; + if (request == PTRACE_ATTACH) { ret = ptrace_attach(child); goto out; diff -urNp linux-2.6.21.5/arch/alpha/mm/fault.c linux-2.6.21.5/arch/alpha/mm/fault.c --- linux-2.6.21.5/arch/alpha/mm/fault.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/alpha/mm/fault.c 2007-05-24 22:04:52.000000000 -0400 @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -55,6 +56,124 @@ __load_new_mm_context(struct mm_struct * __reload_thread(pcb); } +#ifdef CONFIG_PAX_PAGEEXEC +/* + * PaX: decide what to do with offenders (regs->pc = fault address) + * + * returns 1 when task should be killed + * 2 when patched PLT trampoline was detected + * 3 when unpatched PLT trampoline was detected + */ +static int pax_handle_fetch_fault(struct pt_regs *regs) +{ + +#ifdef CONFIG_PAX_EMUPLT + int err; + + do { /* PaX: patched PLT emulation #1 */ + unsigned int ldah, ldq, jmp; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(ldq, (unsigned int *)(regs->pc+4)); + err |= get_user(jmp, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (ldq & 0xFFFF0000U) == 0xA77B0000U && + jmp == 0x6BFB0000U) + { + unsigned long r27, addr; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = ldq | 0xFFFFFFFFFFFF0000UL; + + addr = regs->r27 + ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + err = get_user(r27, (unsigned long*)addr); + if (err) + break; + + regs->r27 = r27; + regs->pc = r27; + return 2; + } + } while (0); + + do { /* PaX: patched PLT emulation #2 */ + unsigned int ldah, lda, br; + + err = get_user(ldah, (unsigned int *)regs->pc); + err |= get_user(lda, (unsigned int *)(regs->pc+4)); + err |= get_user(br, (unsigned int *)(regs->pc+8)); + + if (err) + break; + + if ((ldah & 0xFFFF0000U) == 0x277B0000U && + (lda & 0xFFFF0000U) == 0xA77B0000U && + (br & 0xFFE00000U) == 0xC3E00000U) + { + unsigned long addr = br | 0xFFFFFFFFFFE00000UL; + unsigned long addrh = (ldah | 0xFFFFFFFFFFFF0000UL) << 16; + unsigned long addrl = lda | 0xFFFFFFFFFFFF0000UL; + + regs->r27 += ((addrh ^ 0x80000000UL) + 0x80000000UL) + ((addrl ^ 0x8000UL) + 0x8000UL); + regs->pc += 12 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + return 2; + } + } while (0); + + do { /* PaX: unpatched PLT emulation */ + unsigned int br; + + err = get_user(br, (unsigned int *)regs->pc); + + if (!err && (br & 0xFFE00000U) == 0xC3800000U) { + unsigned int br2, ldq, nop, jmp; + unsigned long addr = br | 0xFFFFFFFFFFE00000UL, resolver; + + addr = regs->pc + 4 + (((addr ^ 0x00100000UL) + 0x00100000UL) << 2); + err = get_user(br2, (unsigned int *)addr); + err |= get_user(ldq, (unsigned int *)(addr+4)); + err |= get_user(nop, (unsigned int *)(addr+8)); + err |= get_user(jmp, (unsigned int *)(addr+12)); + err |= get_user(resolver, (unsigned long *)(addr+16)); + + if (err) + break; + + if (br2 == 0xC3600000U && + ldq == 0xA77B000CU && + nop == 0x47FF041FU && + jmp == 0x6B7B0000U) + { + regs->r28 = regs->pc+4; + regs->r27 = addr+16; + regs->pc = resolver; + return 3; + } + } + } while (0); +#endif + + return 1; +} + +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 5; i++) { + unsigned int c; + if (get_user(c, (unsigned int*)pc+i)) + printk("???????? "); + else + printk("%08x ", c); + } + printk("\n"); +} +#endif /* * This routine handles page faults. It determines the address, @@ -132,8 +251,29 @@ do_page_fault(unsigned long address, uns good_area: si_code = SEGV_ACCERR; if (cause < 0) { - if (!(vma->vm_flags & VM_EXEC)) + if (!(vma->vm_flags & VM_EXEC)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (!(mm->pax_flags & MF_PAX_PAGEEXEC) || address != regs->pc) + goto bad_area; + + up_read(&mm->mmap_sem); + switch(pax_handle_fetch_fault(regs)) { + +#ifdef CONFIG_PAX_EMUPLT + case 2: + case 3: + return; +#endif + + } + pax_report_fault(regs, (void*)regs->pc, (void*)rdusp()); + do_exit(SIGKILL); +#else goto bad_area; +#endif + + } } else if (!cause) { /* Allow reads even for write-only mappings */ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) diff -urNp linux-2.6.21.5/arch/arm/mm/mmap.c linux-2.6.21.5/arch/arm/mm/mmap.c --- linux-2.6.21.5/arch/arm/mm/mmap.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/arm/mm/mmap.c 2007-05-24 22:04:52.000000000 -0400 @@ -61,6 +61,10 @@ arch_get_unmapped_area(struct file *filp if (len > TASK_SIZE) return -ENOMEM; +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + if (addr) { if (do_align) addr = COLOUR_ALIGN(addr, pgoff); @@ -75,7 +79,7 @@ arch_get_unmapped_area(struct file *filp if (len > mm->cached_hole_size) { start_addr = addr = mm->free_area_cache; } else { - start_addr = addr = TASK_UNMAPPED_BASE; + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; } @@ -92,8 +96,8 @@ full_search: * Start a new search - just in case we missed * some holes. */ - if (start_addr != TASK_UNMAPPED_BASE) { - start_addr = addr = TASK_UNMAPPED_BASE; + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; mm->cached_hole_size = 0; goto full_search; } diff -urNp linux-2.6.21.5/arch/avr32/mm/fault.c linux-2.6.21.5/arch/avr32/mm/fault.c --- linux-2.6.21.5/arch/avr32/mm/fault.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/avr32/mm/fault.c 2007-05-24 22:04:52.000000000 -0400 @@ -68,6 +68,23 @@ static inline int notify_page_fault(enum } #endif +#ifdef CONFIG_PAX_PAGEEXEC +void pax_report_insns(void *pc, void *sp) +{ + unsigned long i; + + printk(KERN_ERR "PAX: bytes at PC: "); + for (i = 0; i < 20; i++) { + unsigned char c; + if (get_user(c, (unsigned char*)pc+i)) + printk("???????? "); + else + printk("%02x ", c); + } + printk("\n"); +} +#endif + /* * This routine handles page faults. It determines the address and the * problem, and then passes it off to one of the appropriate routines. @@ -182,6 +199,16 @@ bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { + +#ifdef CONFIG_PAX_PAGEEXEC + if (mm->pax_flags & MF_PAX_PAGEEXEC) { + if (ecr == ECR_PROTECTION_X || ecr == ECR_TLB_MISS_X) { + pax_report_fault(regs, (void*)regs->pc, (void*)regs->sp); + do_exit(SIGKILL); + } + } +#endif + /* Hmm...we have to pass address and ecr somehow... */ /* tsk->thread.address = address; tsk->thread.error_code = ecr; */ diff -urNp linux-2.6.21.5/arch/i386/boot/setup.S linux-2.6.21.5/arch/i386/boot/setup.S --- linux-2.6.21.5/arch/i386/boot/setup.S 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/boot/setup.S 2007-05-24 22:04:52.000000000 -0400 @@ -869,11 +869,13 @@ startup_32: movl %eax, %gs movl %eax, %ss + movl 0x00000000, %ecx xorl %eax, %eax 1: incl %eax # check that A20 really IS enabled movl %eax, 0x00000000 # loop forever if it isn't cmpl %eax, 0x00100000 je 1b + movl %ecx, 0x00000000 # Jump to the 32bit entry point jmpl *(code32_start - start + (DELTA_INITSEG << 4))(%esi) diff -urNp linux-2.6.21.5/arch/i386/Kconfig linux-2.6.21.5/arch/i386/Kconfig --- linux-2.6.21.5/arch/i386/Kconfig 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/Kconfig 2007-06-01 22:13:13.000000000 -0400 @@ -578,7 +578,7 @@ endchoice config PAGE_OFFSET hex default 0xB0000000 if VMSPLIT_3G_OPT - default 0x78000000 if VMSPLIT_2G + default 0x70000000 if VMSPLIT_2G default 0x40000000 if VMSPLIT_1G default 0xC0000000 @@ -892,7 +892,7 @@ config HOTPLUG_CPU config COMPAT_VDSO bool "Compat VDSO support" - default y + default n help Map the VDSO to the predictable old-style address too. ---help--- @@ -1087,7 +1087,7 @@ config PCI choice prompt "PCI access mode" depends on PCI && !X86_VISWS - default PCI_GOANY + default PCI_GODIRECT ---help--- On PCI systems, the BIOS can be used to detect the PCI devices and determine their configuration. However, some old PCI motherboards @@ -1119,7 +1119,7 @@ endchoice config PCI_BIOS bool - depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) + depends on !X86_VISWS && PCI && PCI_GOBIOS default y config PCI_DIRECT diff -urNp linux-2.6.21.5/arch/i386/Kconfig.cpu linux-2.6.21.5/arch/i386/Kconfig.cpu --- linux-2.6.21.5/arch/i386/Kconfig.cpu 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/Kconfig.cpu 2007-05-24 22:04:52.000000000 -0400 @@ -262,7 +262,7 @@ config X86_PPRO_FENCE config X86_F00F_BUG bool - depends on M586MMX || M586TSC || M586 || M486 || M386 + depends on (M586MMX || M586TSC || M586 || M486 || M386) && !PAX_KERNEXEC default y config X86_WP_WORKS_OK @@ -292,7 +292,7 @@ config X86_CMPXCHG64 config X86_ALIGNMENT_16 bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK8 || MK7 || MK6 || MPENTIUM4 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 default y config X86_GOOD_APIC diff -urNp linux-2.6.21.5/arch/i386/Kconfig.debug linux-2.6.21.5/arch/i386/Kconfig.debug --- linux-2.6.21.5/arch/i386/Kconfig.debug 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/Kconfig.debug 2007-05-24 22:04:52.000000000 -0400 @@ -48,7 +48,7 @@ config DEBUG_PAGEALLOC config DEBUG_RODATA bool "Write protect kernel read-only data structures" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !PAX_KERNEXEC help Mark the kernel read-only data as write-protected in the pagetables, in order to catch accidental (and incorrect) writes to such const diff -urNp linux-2.6.21.5/arch/i386/kernel/acpi/boot.c linux-2.6.21.5/arch/i386/kernel/acpi/boot.c --- linux-2.6.21.5/arch/i386/kernel/acpi/boot.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/acpi/boot.c 2007-05-24 22:04:52.000000000 -0400 @@ -1116,7 +1116,7 @@ static struct dmi_system_id __initdata a DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, - {} + { NULL, NULL, {{0, NULL}}, NULL} }; #endif /* __i386__ */ diff -urNp linux-2.6.21.5/arch/i386/kernel/acpi/sleep.c linux-2.6.21.5/arch/i386/kernel/acpi/sleep.c --- linux-2.6.21.5/arch/i386/kernel/acpi/sleep.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/acpi/sleep.c 2007-05-24 22:04:52.000000000 -0400 @@ -94,7 +94,7 @@ static __initdata struct dmi_system_id a DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"), }, }, - {} + { NULL, NULL, {{0, NULL}}, NULL} }; static int __init acpisleep_dmi_init(void) diff -urNp linux-2.6.21.5/arch/i386/kernel/acpi/wakeup.S linux-2.6.21.5/arch/i386/kernel/acpi/wakeup.S --- linux-2.6.21.5/arch/i386/kernel/acpi/wakeup.S 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/acpi/wakeup.S 2007-05-24 22:04:52.000000000 -0400 @@ -205,13 +205,11 @@ wakeup_pmode_return: # and restore the stack ... but you need gdt for this to work movl saved_context_esp, %esp - movl %cs:saved_magic, %eax - cmpl $0x12345678, %eax + cmpl $0x12345678, saved_magic jne bogus_magic # jump to place where we left off - movl saved_eip,%eax - jmp *%eax + jmp *(saved_eip) bogus_magic: movw $0x0e00 + 'B', 0xb8018 diff -urNp linux-2.6.21.5/arch/i386/kernel/alternative.c linux-2.6.21.5/arch/i386/kernel/alternative.c --- linux-2.6.21.5/arch/i386/kernel/alternative.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/alternative.c 2007-05-24 22:04:52.000000000 -0400 @@ -4,6 +4,7 @@ #include #include #include +#include static int smp_alt_once = 0; static int debug_alternative = 0; @@ -149,12 +150,18 @@ void apply_alternatives(struct alt_instr u8 *instr; int diff; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { BUG_ON(a->replacementlen > a->instrlen); if (!boot_cpu_has(a->cpuid)) continue; - instr = a->instr; + instr = a->instr + __KERNEL_TEXT_OFFSET; #ifdef CONFIG_X86_64 /* vsyscall code is not mapped yet. resolve it manually. */ if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { @@ -167,6 +174,11 @@ void apply_alternatives(struct alt_instr diff = a->instrlen - a->replacementlen; nop_out(instr + a->replacementlen, diff); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } #ifdef CONFIG_SMP @@ -175,49 +187,95 @@ static void alternatives_smp_save(struct { struct alt_instr *a; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); for (a = start; a < end; a++) { memcpy(a->replacement + a->replacementlen, - a->instr, + a->instr + __KERNEL_TEXT_OFFSET, a->instrlen); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) { struct alt_instr *a; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + for (a = start; a < end; a++) { - memcpy(a->instr, + memcpy(a->instr + __KERNEL_TEXT_OFFSET, a->replacement + a->replacementlen, a->instrlen); } + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) { - u8 **ptr; + u8 *ptr; - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + + for (; start < end; start++) { + ptr = *start + __KERNEL_TEXT_OFFSET; + if (ptr < text) continue; - if (*ptr > text_end) + if (ptr > text_end) continue; - **ptr = 0xf0; /* lock prefix */ + *ptr = 0xf0; /* lock prefix */ }; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { - u8 **ptr; + u8 *ptr; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif - for (ptr = start; ptr < end; ptr++) { - if (*ptr < text) + for (; start < end; start++) { + ptr = *start + __KERNEL_TEXT_OFFSET; + if (ptr < text) continue; - if (*ptr > text_end) + if (ptr > text_end) continue; - nop_out(*ptr, 1); + nop_out(ptr, 1); }; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } struct smp_alt_module { @@ -344,10 +402,17 @@ void apply_paravirt(struct paravirt_patc { struct paravirt_patch *p; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + for (p = start; p < end; p++) { unsigned int used; + u8 *instr = p->instr + __KERNEL_TEXT_OFFSET; - used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, + used = paravirt_ops.patch(p->instrtype, p->clobbers, instr, p->len); #ifdef CONFIG_DEBUG_PARAVIRT { @@ -355,17 +420,20 @@ void apply_paravirt(struct paravirt_patc /* Deliberately clobber regs using "not %reg" to find bugs. */ for (i = 0; i < 3; i++) { if (p->len - used >= 2 && (p->clobbers & (1 << i))) { - memcpy(p->instr + used, "\xf7\xd0", 2); - p->instr[used+1] |= i; - used += 2; + instr[used++] = 0xf7; + instr[used++] = 0xd0 | i; } } } #endif /* Pad the rest with nops */ - nop_out(p->instr + used, p->len - used); + nop_out(instr + used, p->len - used); } +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + /* Sync to be conservative, in case we patched following instructions */ sync_core(); } diff -urNp linux-2.6.21.5/arch/i386/kernel/apm.c linux-2.6.21.5/arch/i386/kernel/apm.c --- linux-2.6.21.5/arch/i386/kernel/apm.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/apm.c 2007-05-24 22:04:52.000000000 -0400 @@ -236,7 +236,7 @@ #include "io_ports.h" -extern void machine_real_restart(unsigned char *, int); +extern void machine_real_restart(const unsigned char *, unsigned int); #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -609,9 +609,18 @@ static u8 apm_bios_call(u32 func, u32 eb struct desc_struct save_desc_40; struct desc_struct *gdt; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + cpus = apm_save_cpus(); cpu = get_cpu(); + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; @@ -622,6 +631,11 @@ static u8 apm_bios_call(u32 func, u32 eb APM_DO_RESTORE_SEGS; apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); apm_restore_cpus(cpus); @@ -652,9 +666,18 @@ static u8 apm_bios_call_simple(u32 func, struct desc_struct save_desc_40; struct desc_struct *gdt; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + cpus = apm_save_cpus(); cpu = get_cpu(); + +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + gdt = get_cpu_gdt_table(cpu); save_desc_40 = gdt[0x40 / 8]; gdt[0x40 / 8] = bad_bios_desc; @@ -665,6 +688,11 @@ static u8 apm_bios_call_simple(u32 func, APM_DO_RESTORE_SEGS; apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + put_cpu(); apm_restore_cpus(cpus); return error; @@ -932,7 +960,7 @@ recalc: static void apm_power_off(void) { - unsigned char po_bios_call[] = { + const unsigned char po_bios_call[] = { 0xb8, 0x00, 0x10, /* movw $0x1000,ax */ 0x8e, 0xd0, /* movw ax,ss */ 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */ @@ -1872,7 +1900,10 @@ static const struct file_operations apm_ static struct miscdevice apm_device = { APM_MINOR_DEV, "apm_bios", - &apm_bios_fops + &apm_bios_fops, + {NULL, NULL}, + NULL, + NULL }; @@ -1982,210 +2013,210 @@ static struct dmi_system_id __initdata a print_if_true, KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.", { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), - DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), }, + DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), }, NULL }, { /* Handle problems with APM on the C600 */ broken_ps2_resume, "Dell Latitude C600", { DMI_MATCH(DMI_SYS_VENDOR, "Dell"), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), }, + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), }, NULL }, { /* Allow interrupts during suspend on Dell Latitude laptops*/ set_apm_ints, "Dell Latitude", { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), } + DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), }, NULL }, { /* APM crashes */ apm_is_horked, "Dell Inspiron 2500", { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, + DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, NULL }, { /* Allow interrupts during suspend on Dell Inspiron laptops*/ set_apm_ints, "Dell Inspiron", { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), }, + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), }, NULL }, { /* Handle problems with APM on Inspiron 5000e */ broken_apm_power, "Dell Inspiron 5000e", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "A04"), - DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), }, + DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), }, NULL }, { /* Handle problems with APM on Inspiron 2500 */ broken_apm_power, "Dell Inspiron 2500", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "A12"), - DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), }, + DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), }, NULL }, { /* APM crashes */ apm_is_horked, "Dell Dimension 4100", { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, + DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, NULL }, { /* Allow interrupts during suspend on Compaq Laptops*/ set_apm_ints, "Compaq 12XL125", { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"), DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION,"4.06"), }, + DMI_MATCH(DMI_BIOS_VERSION,"4.06"), }, NULL }, { /* Allow interrupts during APM or the clock goes slow */ set_apm_ints, "ASUSTeK", { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), }, + DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), }, NULL }, { /* APM blows on shutdown */ apm_is_horked, "ABIT KX7-333[R]", { DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"), - DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), }, + DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), }, NULL }, { /* APM crashes */ apm_is_horked, "Trigem Delhi3", { DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"), - DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), }, + DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), }, NULL }, { /* APM crashes */ apm_is_horked, "Fujitsu-Siemens", { DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"), - DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), }, + DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), }, NULL }, { /* APM crashes */ apm_is_horked_d850md, "Intel D850MD", { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), }, + DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), }, NULL }, { /* APM crashes */ apm_is_horked, "Intel D810EMO", { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), - DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), }, + DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), }, NULL }, { /* APM crashes */ apm_is_horked, "Dell XPS-Z", { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."), DMI_MATCH(DMI_BIOS_VERSION, "A11"), - DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), }, + DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), }, NULL }, { /* APM crashes */ apm_is_horked, "Sharp PC-PJ/AX", { DMI_MATCH(DMI_SYS_VENDOR, "SHARP"), DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"), DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"), - DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), }, + DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), }, NULL }, { /* APM crashes */ apm_is_horked, "Dell Inspiron 2500", { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"), DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"), DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"), - DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, + DMI_MATCH(DMI_BIOS_VERSION,"A11"), }, NULL }, { /* APM idle hangs */ apm_likes_to_melt, "Jabil AMD", { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), }, + DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), }, NULL }, { /* APM idle hangs */ apm_likes_to_melt, "AMI Bios", { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."), - DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), }, + DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-N505X(DE) */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "R0206H"), - DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), }, + DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-N505VX */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"), - DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), }, + DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-XG29 */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"), - DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), }, + DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-Z600NE */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"), - DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), }, + DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-Z600NE */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"), - DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), }, + DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"), - DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), }, + DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-Z505LS */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"), - DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), }, + DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-Z505LS */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"), - DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), }, + DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-Z505LS (with updated BIOS) */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"), - DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), }, + DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-F104K */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"), - DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), }, + DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"), - DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), }, + DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-C1VE */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"), - DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), }, + DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), }, NULL }, { /* Handle problems with APM on Sony Vaio PCG-C1VE */ swab_apm_power_in_minutes, "Sony VAIO", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"), - DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), }, + DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), }, NULL }, { /* broken PM poweroff bios */ set_realmode_power_off, "Award Software v4.60 PGMA", { DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."), DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"), - DMI_MATCH(DMI_BIOS_DATE, "134526184"), }, + DMI_MATCH(DMI_BIOS_DATE, "134526184"), }, NULL }, /* Generic per vendor APM settings */ { /* Allow interrupts during suspend on IBM laptops */ set_apm_ints, "IBM", - { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), }, + { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), }, NULL }, - { } + { NULL, NULL, {DMI_MATCH(DMI_NONE, NULL)}, NULL} }; /* diff -urNp linux-2.6.21.5/arch/i386/kernel/asm-offsets.c linux-2.6.21.5/arch/i386/kernel/asm-offsets.c --- linux-2.6.21.5/arch/i386/kernel/asm-offsets.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/asm-offsets.c 2007-05-24 22:04:52.000000000 -0400 @@ -16,6 +16,7 @@ #include #include #include +#include #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -52,6 +53,7 @@ void foo(void) OFFSET(TI_exec_domain, thread_info, exec_domain); OFFSET(TI_flags, thread_info, flags); OFFSET(TI_status, thread_info, status); + OFFSET(TI_cpu, thread_info, cpu); OFFSET(TI_preempt_count, thread_info, preempt_count); OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_restart_block, thread_info, restart_block); @@ -94,12 +96,14 @@ void foo(void) sizeof(struct tss_struct)); DEFINE(PAGE_SIZE_asm, PAGE_SIZE); + DEFINE(PTRS_PER_PTE_asm, PTRS_PER_PTE); DEFINE(VDSO_PRELINK, VDSO_PRELINK); OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); BLANK(); - OFFSET(PDA_cpu, i386_pda, cpu_number); + DEFINE(PDA_size, sizeof __cpu_pda[0]); + OFFSET(PDA_cpu, i386_pda, cpu_number); OFFSET(PDA_pcurrent, i386_pda, pcurrent); #ifdef CONFIG_PARAVIRT @@ -110,5 +114,6 @@ void foo(void) OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit); OFFSET(PARAVIRT_iret, paravirt_ops, iret); OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); + OFFSET(PARAVIRT_write_cr0, paravirt_ops, write_cr0); #endif } diff -urNp linux-2.6.21.5/arch/i386/kernel/cpu/common.c linux-2.6.21.5/arch/i386/kernel/cpu/common.c --- linux-2.6.21.5/arch/i386/kernel/cpu/common.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/cpu/common.c 2007-06-18 18:29:52.000000000 -0400 @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -22,16 +21,17 @@ #include "cpu.h" -DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); -EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); - -struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; EXPORT_SYMBOL(_cpu_pda); static int cachesize_override __cpuinitdata = -1; static int disable_x86_fxsr __cpuinitdata; static int disable_x86_serial_nr __cpuinitdata = 1; + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_KERNEXEC) +static int disable_x86_sep __cpuinitdata = 1; +#else static int disable_x86_sep __cpuinitdata; +#endif struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -609,52 +609,6 @@ struct pt_regs * __devinit idle_regs(str return regs; } -static __cpuinit int alloc_gdt(int cpu) -{ - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); - struct desc_struct *gdt; - struct i386_pda *pda; - - gdt = (struct desc_struct *)cpu_gdt_descr->address; - pda = cpu_pda(cpu); - - /* - * This is a horrible hack to allocate the GDT. The problem - * is that cpu_init() is called really early for the boot CPU - * (and hence needs bootmem) but much later for the secondary - * CPUs, when bootmem will have gone away - */ - if (NODE_DATA(0)->bdata->node_bootmem_map) { - BUG_ON(gdt != NULL || pda != NULL); - - gdt = alloc_bootmem_pages(PAGE_SIZE); - pda = alloc_bootmem(sizeof(*pda)); - /* alloc_bootmem(_pages) panics on failure, so no check */ - - memset(gdt, 0, PAGE_SIZE); - memset(pda, 0, sizeof(*pda)); - } else { - /* GDT and PDA might already have been allocated if - this is a CPU hotplug re-insertion. */ - if (gdt == NULL) - gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); - - if (pda == NULL) - pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); - - if (unlikely(!gdt || !pda)) { - free_pages((unsigned long)gdt, 0); - kfree(pda); - return 0; - } - } - - cpu_gdt_descr->address = (unsigned long)gdt; - cpu_pda(cpu) = pda; - - return 1; -} - /* Initial PDA used by boot CPU */ struct i386_pda boot_pda = { ._pda = &boot_pda, @@ -672,59 +626,43 @@ static inline void set_kernel_fs(void) /* Initialize the CPU's GDT and PDA. The boot CPU does this for itself, but secondaries find this done for them. */ -__cpuinit int init_gdt(int cpu, struct task_struct *idle) +__cpuinit void init_gdt(int cpu, struct task_struct *idle) { - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); - struct desc_struct *gdt; - struct i386_pda *pda; - - /* For non-boot CPUs, the GDT and PDA should already have been - allocated. */ - if (!alloc_gdt(cpu)) { - printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); - return 0; - } - - gdt = (struct desc_struct *)cpu_gdt_descr->address; - pda = cpu_pda(cpu); - - BUG_ON(gdt == NULL || pda == NULL); + struct desc_struct *gdt = get_cpu_gdt_table(cpu); + struct i386_pda *pda = __cpu_pda + cpu; + + cpu_gdt_descr[cpu].address = gdt; /* * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - memcpy(gdt, cpu_gdt_table, GDT_SIZE); - cpu_gdt_descr->size = GDT_SIZE - 1; + if (cpu) + memcpy(gdt, cpu_gdt_table, GDT_SIZE); + cpu_gdt_descr[cpu].size = GDT_SIZE - 1; pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, (u32 *)&gdt[GDT_ENTRY_PDA].b, (unsigned long)pda, sizeof(*pda) - 1, - 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ + 0x80 | DESCTYPE_S | 0x3, 0x4); /* present read-write accessed data segment */ - memset(pda, 0, sizeof(*pda)); - pda->_pda = pda; - pda->cpu_number = cpu; pda->pcurrent = idle; - - return 1; + pda->irq_regs = NULL; } void __cpuinit cpu_set_gdt(int cpu) { - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); - /* Reinit these anyway, even if they've already been done (on the boot CPU, this will transition from the boot gdt+pda to the real ones). */ - load_gdt(cpu_gdt_descr); + load_gdt(&cpu_gdt_descr[cpu]); set_kernel_fs(); } /* Common CPU init for both boot and secondary CPUs */ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) { - struct tss_struct * t = &per_cpu(init_tss, cpu); + struct tss_struct * t = init_tss + cpu; struct thread_struct *thread = &curr->thread; if (cpu_test_and_set(cpu, cpu_initialized)) { @@ -805,12 +743,7 @@ void __cpuinit cpu_init(void) /* Set up the real GDT and PDA, so we can transition from the boot versions. */ - if (!init_gdt(cpu, curr)) { - /* failed to allocate something; not much we can do... */ - for (;;) - local_irq_enable(); - } - + init_gdt(cpu, curr); cpu_set_gdt(cpu); _cpu_init(cpu, curr); } diff -urNp linux-2.6.21.5/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c linux-2.6.21.5/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c --- linux-2.6.21.5/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c 2007-05-24 22:04:52.000000000 -0400 @@ -563,7 +563,7 @@ static struct dmi_system_id sw_any_bug_d DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), }, }, - { } + { NULL, NULL, {DMI_MATCH(DMI_NONE, NULL)}, NULL } }; #endif diff -urNp linux-2.6.21.5/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c linux-2.6.21.5/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c --- linux-2.6.21.5/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c 2007-05-24 22:04:52.000000000 -0400 @@ -229,7 +229,7 @@ static struct cpu_model models[] = { &cpu_ids[CPU_MP4HT_D0], NULL, 0, NULL }, { &cpu_ids[CPU_MP4HT_E0], NULL, 0, NULL }, - { NULL, } + { NULL, NULL, 0, NULL} }; #undef _BANIAS #undef BANIAS @@ -404,7 +404,7 @@ static struct dmi_system_id sw_any_bug_d DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"), }, }, - { } + { NULL, NULL, {DMI_MATCH(DMI_NONE, NULL)}, NULL } }; #endif diff -urNp linux-2.6.21.5/arch/i386/kernel/cpu/mcheck/therm_throt.c linux-2.6.21.5/arch/i386/kernel/cpu/mcheck/therm_throt.c --- linux-2.6.21.5/arch/i386/kernel/cpu/mcheck/therm_throt.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/cpu/mcheck/therm_throt.c 2007-05-24 22:04:52.000000000 -0400 @@ -148,7 +148,7 @@ static __cpuinit int thermal_throttle_cp return NOTIFY_OK; } -static struct notifier_block thermal_throttle_cpu_notifier = +static __cpuinitdata struct notifier_block thermal_throttle_cpu_notifier = { .notifier_call = thermal_throttle_cpu_callback, }; diff -urNp linux-2.6.21.5/arch/i386/kernel/cpu/mtrr/generic.c linux-2.6.21.5/arch/i386/kernel/cpu/mtrr/generic.c --- linux-2.6.21.5/arch/i386/kernel/cpu/mtrr/generic.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/cpu/mtrr/generic.c 2007-05-24 22:04:52.000000000 -0400 @@ -21,7 +21,7 @@ struct mtrr_state { }; static unsigned long smp_changes_mask; -static struct mtrr_state mtrr_state = {}; +static struct mtrr_state mtrr_state; #undef MODULE_PARAM_PREFIX #define MODULE_PARAM_PREFIX "mtrr." diff -urNp linux-2.6.21.5/arch/i386/kernel/crash.c linux-2.6.21.5/arch/i386/kernel/crash.c --- linux-2.6.21.5/arch/i386/kernel/crash.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/crash.c 2007-05-24 22:04:52.000000000 -0400 @@ -55,7 +55,7 @@ static int crash_nmi_callback(struct not return NOTIFY_STOP; local_irq_disable(); - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff -urNp linux-2.6.21.5/arch/i386/kernel/doublefault.c linux-2.6.21.5/arch/i386/kernel/doublefault.c --- linux-2.6.21.5/arch/i386/kernel/doublefault.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/doublefault.c 2007-05-24 22:04:52.000000000 -0400 @@ -11,17 +11,17 @@ #define DOUBLEFAULT_STACKSIZE (1024) static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; -#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) +#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE-2) #define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000) static void doublefault_fn(void) { - struct Xgt_desc_struct gdt_desc = {0, 0}; + struct Xgt_desc_struct gdt_desc = {0, NULL, 0}; unsigned long gdt, tss; store_gdt(&gdt_desc); - gdt = gdt_desc.address; + gdt = (unsigned long)gdt_desc.address; printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); @@ -57,10 +57,10 @@ struct tss_struct doublefault_tss __cach .eip = (unsigned long) doublefault_fn, .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */ .esp = STACK_START, - .es = __USER_DS, + .es = __KERNEL_DS, .cs = __KERNEL_CS, .ss = __KERNEL_DS, - .ds = __USER_DS, + .ds = __KERNEL_DS, .__cr3 = __pa(swapper_pg_dir) }; diff -urNp linux-2.6.21.5/arch/i386/kernel/efi.c linux-2.6.21.5/arch/i386/kernel/efi.c --- linux-2.6.21.5/arch/i386/kernel/efi.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/efi.c 2007-05-24 22:04:52.000000000 -0400 @@ -63,82 +63,43 @@ extern void * boot_ioremap(unsigned long static unsigned long efi_rt_eflags; static DEFINE_SPINLOCK(efi_rt_lock); -static pgd_t efi_bak_pg_dir_pointer[2]; +static pgd_t __initdata efi_bak_pg_dir_pointer[KERNEL_PGD_PTRS] __attribute__ ((aligned (4096))); static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) { - unsigned long cr4; - unsigned long temp; - struct Xgt_desc_struct *cpu_gdt_descr; - spin_lock(&efi_rt_lock); local_irq_save(efi_rt_eflags); - cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - - /* - * If I don't have PSE, I should just duplicate two entries in page - * directory. If I have PSE, I just need to duplicate one entry in - * page directory. - */ - cr4 = read_cr4(); - - if (cr4 & X86_CR4_PSE) { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - swapper_pg_dir[0].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - } else { - efi_bak_pg_dir_pointer[0].pgd = - swapper_pg_dir[pgd_index(0)].pgd; - efi_bak_pg_dir_pointer[1].pgd = - swapper_pg_dir[pgd_index(0x400000)].pgd; - swapper_pg_dir[pgd_index(0)].pgd = - swapper_pg_dir[pgd_index(PAGE_OFFSET)].pgd; - temp = PAGE_OFFSET + 0x400000; - swapper_pg_dir[pgd_index(0x400000)].pgd = - swapper_pg_dir[pgd_index(temp)].pgd; - } + clone_pgd_range(efi_bak_pg_dir_pointer, swapper_pg_dir, KERNEL_PGD_PTRS); + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); /* * After the lock is released, the original page table is restored. */ - local_flush_tlb(); + __flush_tlb_all(); - cpu_gdt_descr->address = __pa(cpu_gdt_descr->address); - load_gdt(cpu_gdt_descr); + cpu_gdt_descr[0].address = __pa(cpu_gdt_descr[0].address); + load_gdt((struct Xgt_desc_struct *) __pa(&cpu_gdt_descr[0])); } static void efi_call_phys_epilog(void) __releases(efi_rt_lock) { - unsigned long cr4; - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - - cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); - load_gdt(cpu_gdt_descr); - - cr4 = read_cr4(); + cpu_gdt_descr[0].address = (unsigned long) __va(cpu_gdt_descr[0].address); + load_gdt(&cpu_gdt_descr[0]); - if (cr4 & X86_CR4_PSE) { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - } else { - swapper_pg_dir[pgd_index(0)].pgd = - efi_bak_pg_dir_pointer[0].pgd; - swapper_pg_dir[pgd_index(0x400000)].pgd = - efi_bak_pg_dir_pointer[1].pgd; - } + clone_pgd_range(swapper_pg_dir, efi_bak_pg_dir_pointer, KERNEL_PGD_PTRS); /* * After the lock is released, the original page table is restored. */ - local_flush_tlb(); + __flush_tlb_all(); local_irq_restore(efi_rt_eflags); spin_unlock(&efi_rt_lock); } -static efi_status_t +static efi_status_t __init phys_efi_set_virtual_address_map(unsigned long memory_map_size, unsigned long descriptor_size, u32 descriptor_version, @@ -154,7 +115,7 @@ phys_efi_set_virtual_address_map(unsigne return status; } -static efi_status_t +static efi_status_t __init phys_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) { efi_status_t status; diff -urNp linux-2.6.21.5/arch/i386/kernel/efi_stub.S linux-2.6.21.5/arch/i386/kernel/efi_stub.S --- linux-2.6.21.5/arch/i386/kernel/efi_stub.S 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/efi_stub.S 2007-05-24 22:04:52.000000000 -0400 @@ -6,6 +6,7 @@ */ #include +#include #include /* @@ -20,7 +21,7 @@ * service functions will comply with gcc calling convention, too. */ -.text +__INIT ENTRY(efi_call_phys) /* * 0. The function can only be called in Linux kernel. So CS has been @@ -36,9 +37,7 @@ ENTRY(efi_call_phys) * The mapping of lower virtual memory has been created in prelog and * epilog. */ - movl $1f, %edx - subl $__PAGE_OFFSET, %edx - jmp *%edx + jmp 1f-__PAGE_OFFSET 1: /* @@ -47,14 +46,8 @@ ENTRY(efi_call_phys) * parameter 2, ..., param n. To make things easy, we save the return * address of efi_call_phys in a global variable. */ - popl %edx - movl %edx, saved_return_addr - /* get the function pointer into ECX*/ - popl %ecx - movl %ecx, efi_rt_function_ptr - movl $2f, %edx - subl $__PAGE_OFFSET, %edx - pushl %edx + popl (saved_return_addr) + popl (efi_rt_function_ptr) /* * 3. Clear PG bit in %CR0. @@ -73,9 +66,8 @@ ENTRY(efi_call_phys) /* * 5. Call the physical function. */ - jmp *%ecx + call *(efi_rt_function_ptr-__PAGE_OFFSET) -2: /* * 6. After EFI runtime service returns, control will return to * following instruction. We'd better readjust stack pointer first. @@ -85,37 +77,29 @@ ENTRY(efi_call_phys) /* * 7. Restore PG bit */ - movl %cr0, %edx - orl $0x80000000, %edx - movl %edx, %cr0 - jmp 1f -1: /* * 8. Now restore the virtual mode from flat mode by * adding EIP with PAGE_OFFSET. */ - movl $1f, %edx - jmp *%edx + movl %cr0, %edx + orl $0x80000000, %edx + movl %edx, %cr0 + jmp 1f+__PAGE_OFFSET 1: /* * 9. Balance the stack. And because EAX contain the return value, * we'd better not clobber it. */ - leal efi_rt_function_ptr, %edx - movl (%edx), %ecx - pushl %ecx + pushl (efi_rt_function_ptr) /* - * 10. Push the saved return address onto the stack and return. + * 10. Return to the saved return address. */ - leal saved_return_addr, %edx - movl (%edx), %ecx - pushl %ecx - ret + jmpl *(saved_return_addr) .previous -.data +__INITDATA saved_return_addr: .long 0 efi_rt_function_ptr: diff -urNp linux-2.6.21.5/arch/i386/kernel/entry.S linux-2.6.21.5/arch/i386/kernel/entry.S --- linux-2.6.21.5/arch/i386/kernel/entry.S 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/entry.S 2007-05-24 22:04:52.000000000 -0400 @@ -49,7 +49,6 @@ #include #include #include -#include #include #include "irq_vectors.h" @@ -97,7 +96,7 @@ VM_MASK = 0x00020000 #define resume_userspace_sig resume_userspace #endif -#define SAVE_ALL \ +#define __SAVE_ALL(_DS) \ cld; \ pushl %fs; \ CFI_ADJUST_CFA_OFFSET 4;\ @@ -129,12 +128,26 @@ VM_MASK = 0x00020000 pushl %ebx; \ CFI_ADJUST_CFA_OFFSET 4;\ CFI_REL_OFFSET ebx, 0;\ - movl $(__USER_DS), %edx; \ + movl $(_DS), %edx; \ movl %edx, %ds; \ movl %edx, %es; \ movl $(__KERNEL_PDA), %edx; \ movl %edx, %fs +#ifdef CONFIG_PAX_KERNEXEC +#define SAVE_ALL \ + __SAVE_ALL(__KERNEL_DS); \ + GET_CR0_INTO_EDX; \ + movl %edx, %esi; \ + orl $0x10000, %edx; \ + xorl %edx, %esi; \ + SET_CR0_FROM_EDX +#elif defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) || defined(CONFIG_PAX_MEMORY_UDEREF) +#define SAVE_ALL __SAVE_ALL(__KERNEL_DS) +#else +#define SAVE_ALL __SAVE_ALL(__USER_DS) +#endif + #define RESTORE_INT_REGS \ popl %ebx; \ CFI_ADJUST_CFA_OFFSET -4;\ @@ -248,7 +261,17 @@ check_userspace: movb PT_CS(%esp), %al andl $(VM_MASK | SEGMENT_RPL_MASK), %eax cmpl $USER_RPL, %eax + +#ifdef CONFIG_PAX_KERNEXEC + jae resume_userspace + + GET_CR0_INTO_EDX + xorl %esi, %edx + SET_CR0_FROM_EDX + jmp resume_kernel +#else jb resume_kernel # not returning to v8086 or userspace +#endif ENTRY(resume_userspace) DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt @@ -308,10 +331,9 @@ sysenter_past_esp: #ifndef CONFIG_COMPAT_VDSO /* * Push current_thread_info()->sysenter_return to the stack. - * A tiny bit of offset fixup is necessary - 4*4 means the 4 words - * pushed above; +8 corresponds to copy_thread's esp0 setting. */ - pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) + GET_THREAD_INFO(%ebp) + pushl TI_sysenter_return(%ebp) #else pushl $SYSENTER_RETURN #endif @@ -322,9 +344,17 @@ sysenter_past_esp: * Load the potential sixth argument from user stack. * Careful about security. */ + movl 12(%esp),%ebp + +#ifdef CONFIG_PAX_MEMORY_UDEREF + mov 16(%esp),%ds +1: movl %ds:(%ebp),%ebp +#else cmpl $__PAGE_OFFSET-3,%ebp jae syscall_fault 1: movl (%ebp),%ebp +#endif + .section __ex_table,"a" .align 4 .long 1b,syscall_fault @@ -347,20 +377,37 @@ sysenter_past_esp: movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work + +#ifdef CONFIG_PAX_RANDKSTACK + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + call pax_randomize_kstack + popl %eax + CFI_ADJUST_CFA_OFFSET -4 +#endif + /* if something modifies registers it must also disable sysexit */ movl PT_EIP(%esp), %edx movl PT_OLDESP(%esp), %ecx xorl %ebp,%ebp TRACE_IRQS_ON 1: mov PT_FS(%esp), %fs +2: mov PT_DS(%esp), %ds +3: mov PT_ES(%esp), %es ENABLE_INTERRUPTS_SYSEXIT CFI_ENDPROC .pushsection .fixup,"ax" -2: movl $0,PT_FS(%esp) +4: movl $0,PT_FS(%esp) jmp 1b +5: movl $0,PT_DS(%esp) + jmp 2b +6: movl $0,PT_ES(%esp) + jmp 3b .section __ex_table,"a" .align 4 - .long 1b,2b + .long 1b,4b + .long 2b,5b + .long 3b,6b .popsection ENDPROC(sysenter_entry) @@ -393,6 +440,10 @@ syscall_exit: testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work +#ifdef CONFIG_PAX_RANDKSTACK + call pax_randomize_kstack +#endif + restore_all: movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS # Warning: PT_OLDSS(%esp) contains the wrong/random values if we @@ -561,8 +612,7 @@ END(syscall_badsys) #define FIXUP_ESPFIX_STACK \ /* since we are on a wrong stack, we cant make it a C code :( */ \ movl %fs:PDA_cpu, %ebx; \ - PER_CPU(cpu_gdt_descr, %ebx); \ - movl GDS_address(%ebx), %ebx; \ + movl GDS_address+cpu_gdt_descr(,%ebx,8), %ebx; \ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ addl %esp, %eax; \ pushl $__KERNEL_DS; \ @@ -587,7 +637,7 @@ END(syscall_badsys) * Build the entry stubs and pointer table with * some assembler magic. */ -.data +.section .rodata,"a",@progbits ENTRY(interrupt) .text @@ -692,12 +742,21 @@ error_code: popl %ecx CFI_ADJUST_CFA_OFFSET -4 /*CFI_REGISTER es, ecx*/ + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_EDX + movl %edx, %esi + orl $0x10000, %edx + xorl %edx, %esi + SET_CR0_FROM_EDX +#endif + movl PT_FS(%esp), %edi # get the function address movl PT_ORIG_EAX(%esp), %edx # get the error code movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart mov %ecx, PT_FS(%esp) /*CFI_REL_OFFSET fs, ES*/ - movl $(__USER_DS), %ecx + movl $(__KERNEL_DS), %ecx movl %ecx, %ds movl %ecx, %es movl %esp,%eax # pt_regs pointer @@ -831,6 +890,13 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_EDX + xorl %esi, %edx + SET_CR0_FROM_EDX +#endif + jmp restore_nocheck_notrace CFI_ENDPROC @@ -871,6 +937,13 @@ nmi_espfix_stack: FIXUP_ESPFIX_STACK # %eax == %esp xorl %edx,%edx # zero error code call do_nmi + +#ifdef CONFIG_PAX_KERNEXEC + GET_CR0_INTO_EDX + xorl %esi, %edx + SET_CR0_FROM_EDX +#endif + RESTORE_REGS lss 12+4(%esp), %esp # back to espfix stack CFI_ADJUST_CFA_OFFSET -24 @@ -1034,7 +1107,6 @@ ENTRY(kernel_thread_helper) CFI_ENDPROC ENDPROC(kernel_thread_helper) -.section .rodata,"a" #include "syscall_table.S" syscall_table_size=(.-sys_call_table) diff -urNp linux-2.6.21.5/arch/i386/kernel/head.S linux-2.6.21.5/arch/i386/kernel/head.S --- linux-2.6.21.5/arch/i386/kernel/head.S 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/head.S 2007-06-18 18:29:52.000000000 -0400 @@ -45,6 +45,16 @@ */ #define INIT_MAP_BEYOND_END (128*1024) +#ifdef CONFIG_PAX_KERNEXEC +/* PaX: fill first page in .text with int3 to catch NULL derefs in kernel mode */ +.fill 4096,1,0xcc +#endif + +/* + * Real beginning of normal "text" segment + */ +ENTRY(stext) +ENTRY(_stext) /* * 32-bit kernel entrypoint; only used by the boot CPU. On entry, @@ -73,6 +83,45 @@ ENTRY(startup_32) movl %eax,%fs movl %eax,%gs + /* get the PDA pointer */ + movl $boot_pda, %eax + + /* slot the PDA address into the GDT */ + mov %ax, (cpu_gdt_table - __PAGE_OFFSET + __KERNEL_PDA+0+2) /* base & 0x0000ffff */ + shr $16, %eax + mov %al, (cpu_gdt_table - __PAGE_OFFSET + __KERNEL_PDA+4+0) /* base & 0x00ff0000 */ + mov %ah, (cpu_gdt_table - __PAGE_OFFSET + __KERNEL_PDA+4+3) /* base & 0xff000000 */ + +#ifdef CONFIG_PAX_MEMORY_UDEREF + /* check for VMware */ + movl $0x564d5868,%eax + xorl %ebx,%ebx + movl $0xa,%ecx + movl $0x5658,%edx + in (%dx),%eax + cmpl $0x564d5868,%ebx + jz 1f + + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c09700),%eax + movl %eax,(cpu_gdt_table - __PAGE_OFFSET + GDT_ENTRY_KERNEL_DS * 8 + 4) +1: + movl $((((__PAGE_OFFSET-1) & 0xf0000000) >> 12) | 0x00c0f300),%eax + movl %eax,(cpu_gdt_table - __PAGE_OFFSET + GDT_ENTRY_DEFAULT_USER_DS * 8 + 4) +#endif + +#ifdef CONFIG_PAX_KERNEXEC + movl $__KERNEL_TEXT_OFFSET,%eax + movw %ax,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 2) + rorl $16,%eax + movb %al,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 4) + movb %ah,(cpu_gdt_table - __PAGE_OFFSET + __KERNEL_CS + 7) + + movb %al,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 4) + movb %ah,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 7) + rorl $16,%eax + movw %ax,(boot_gdt_table - __PAGE_OFFSET + __BOOT_CS + 2) +#endif + /* * Clear BSS first so that there are no surprises... * No need to cld as DF is already clear from cld above... @@ -120,24 +169,42 @@ ENTRY(startup_32) * Warning: don't use %esi or the stack in this code. However, %esp * can be used as a GPR if you really need it... */ -page_pde_offset = (__PAGE_OFFSET >> 20); - +#ifdef CONFIG_X86_PAE +page_pde_offset = ((__PAGE_OFFSET >> 21) * (PAGE_SIZE_asm / PTRS_PER_PTE_asm)); +#else +page_pde_offset = ((__PAGE_OFFSET >> 22) * (PAGE_SIZE_asm / PTRS_PER_PTE_asm)); +#endif movl $(pg0 - __PAGE_OFFSET), %edi +#ifdef CONFIG_X86_PAE + movl $(swapper_pm_dir - __PAGE_OFFSET), %edx +#else movl $(swapper_pg_dir - __PAGE_OFFSET), %edx - movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */ +#endif + movl $0x063, %eax /* 0x063 = DIRTY+ACCESSED+PRESENT+RW */ 10: - leal 0x007(%edi),%ecx /* Create PDE entry */ + leal 0x063(%edi),%ecx /* Create PDE entry */ movl %ecx,(%edx) /* Store identity PDE entry */ movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ +#ifdef CONFIG_X86_PAE + movl $0,4(%edx) + movl $0,page_pde_offset+4(%edx) + addl $8,%edx + movl $512, %ecx +#else addl $4,%edx movl $1024, %ecx +#endif 11: stosl +#ifdef CONFIG_X86_PAE + movl $0,(%edi) + addl $4,%edi +#endif addl $0x1000,%eax loop 11b /* End condition: we must map up to and including INIT_MAP_BEYOND_END */ - /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */ - leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp + /* bytes beyond the end of our own page tables; the +0x063 is the attribute bits */ + leal (INIT_MAP_BEYOND_END+0x063)(%edi),%ebp cmpl %ebp,%eax jb 10b movl %edi,(init_pg_tables_end - __PAGE_OFFSET) @@ -169,6 +236,11 @@ ENTRY(startup_32_smp) movl %eax,%fs movl %eax,%gs + /* This is a secondary processor (AP) */ + xorl %ebx,%ebx + incl %ebx +#endif /* CONFIG_SMP */ + /* * New page tables may be in 4Mbyte page mode and may * be using the global pages. @@ -184,26 +256,27 @@ ENTRY(startup_32_smp) * not yet offset PAGE_OFFSET.. */ #define cr4_bits mmu_cr4_features-__PAGE_OFFSET +3: movl cr4_bits,%edx andl %edx,%edx - jz 6f + jz 5f movl %cr4,%eax # Turn on paging options (PSE,PAE,..) orl %edx,%eax movl %eax,%cr4 - btl $5, %eax # check if PAE is enabled - jnc 6f +#ifdef CONFIG_X86_PAE + movl %ebx,%edi /* Check if extended functions are implemented */ movl $0x80000000, %eax cpuid cmpl $0x80000000, %eax - jbe 6f + jbe 4f mov $0x80000001, %eax cpuid /* Execute Disable bit supported? */ btl $20, %edx - jnc 6f + jnc 4f /* Setup EFER (Extended Feature Enable Register) */ movl $0xc0000080, %ecx @@ -213,13 +286,13 @@ ENTRY(startup_32_smp) /* Make changes effective */ wrmsr -6: - /* This is a secondary processor (AP) */ - xorl %ebx,%ebx - incl %ebx + btsl $63,__supported_pte_mask-__PAGE_OFFSET + movl $1,nx_enabled-__PAGE_OFFSET -#endif /* CONFIG_SMP */ -3: +4: + movl %edi,%ebx +#endif +5: /* * Enable paging @@ -244,9 +317,7 @@ ENTRY(startup_32_smp) #ifdef CONFIG_SMP andl %ebx,%ebx - jz 1f /* Initial CPU cleans BSS */ - jmp checkCPUtype -1: + jnz checkCPUtype /* Initial CPU cleans BSS */ #endif /* CONFIG_SMP */ /* @@ -318,14 +389,14 @@ is386: movl $2,%ecx # set MP movl %eax,%cr0 call check_x87 - call setup_pda - lgdt early_gdt_descr + GET_THREAD_INFO(%ecx) + movl TI_cpu(%ecx),%ecx + lgdt cpu_gdt_descr(,%ecx,8) lidt idt_descr ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers movl %eax,%ss # after changing gdt. - movl $(__USER_DS),%eax # DS/ES contains default USER segment movl %eax,%ds movl %eax,%es @@ -366,23 +437,6 @@ check_x87: ret /* - * Point the GDT at this CPU's PDA. On boot this will be - * cpu_gdt_table and boot_pda; for secondary CPUs, these will be - * that CPU's GDT and PDA. - */ -ENTRY(setup_pda) - /* get the PDA pointer */ - movl start_pda, %eax - - /* slot the PDA address into the GDT */ - mov early_gdt_descr+2, %ecx - mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ - shr $16, %eax - mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ - mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ - ret - -/* * setup_idt * * sets up a idt with 256 entries pointing to @@ -470,8 +524,8 @@ hlt_loop: /* This is the default interrupt "handler" :-) */ ALIGN ignore_int: - cld #ifdef CONFIG_PRINTK + cld pushl %eax pushl %ecx pushl %edx @@ -506,7 +560,7 @@ ignore_int: #ifdef CONFIG_PARAVIRT startup_paravirt: cld - movl $(init_thread_union+THREAD_SIZE),%esp + movl $(init_thread_union+THREAD_SIZE-8),%esp /* We take pains to preserve all the regs. */ pushl %edx @@ -535,30 +589,63 @@ unhandled_paravirt: ud2 #endif -/* - * Real beginning of normal "text" segment - */ -ENTRY(stext) -ENTRY(_stext) - -/* - * BSS section - */ -.section ".bss.page_aligned","w" +.section .swapper_pg_dir,"a",@progbits ENTRY(swapper_pg_dir) +#ifdef CONFIG_X86_PAE + .long swapper_pm_dir-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*8-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*16-__PAGE_OFFSET+1 + .long 0 + .long swapper_pm_dir+512*24-__PAGE_OFFSET+1 + .long 0 +#else .fill 1024,4,0 +#endif + +#ifdef CONFIG_X86_PAE +.section .swapper_pm_dir,"a",@progbits +ENTRY(swapper_pm_dir) + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 + .fill 512,8,0 +#endif + +.section .empty_zero_page,"a",@progbits ENTRY(empty_zero_page) .fill 4096,1,0 /* - * This starts the data section. - */ -.data -ENTRY(start_pda) - .long boot_pda + * The IDT has to be page-aligned to simplify the Pentium + * F0 0F bug workaround.. We have a special link segment + * for this. + */ +.section .idt,"a",@progbits +ENTRY(idt_table) + .fill 256,8,0 + +.section .rodata,"a",@progbits +cpu=0 +ENTRY(_cpu_pda) +.rept NR_CPUS + .long __cpu_pda + cpu*PDA_size +cpu=cpu+1 +.endr + +cpu=0 +ENTRY(__cpu_pda) +.rept NR_CPUS +1: .long 1b + .long cpu + .long 0 + .long 0 +cpu=cpu+1 +.endr ENTRY(stack_start) - .long init_thread_union+THREAD_SIZE + .long init_thread_union+THREAD_SIZE-8 .long __BOOT_DS ready: .byte 0 @@ -597,10 +684,13 @@ idt_descr: # boot GDT descriptor (later on used by CPU#0): .word 0 # 32 bit align gdt_desc.address -ENTRY(early_gdt_descr) +ENTRY(cpu_gdt_descr) .word GDT_ENTRIES*8-1 .long cpu_gdt_table + .word 0 # 32 bit align gdt_desc.address + .fill NR_CPUS*8-6,1,0 # space for the other GDT descriptors + /* * The boot_gdt_table must mirror the equivalent in setup.S and is * used only for booting. @@ -608,13 +698,13 @@ ENTRY(early_gdt_descr) .align L1_CACHE_BYTES ENTRY(boot_gdt_table) .fill GDT_ENTRY_BOOT_CS,8,0 - .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* kernel 4GB data at 0x00000000 */ /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ - .align L1_CACHE_BYTES + .align PAGE_SIZE_asm ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ @@ -629,10 +719,10 @@ ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* 0x53 reserved */ .quad 0x0000000000000000 /* 0x5b reserved */ - .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ - .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ - .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ - .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ + .quad 0x00cf9b000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ + .quad 0x00cf93000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ + .quad 0x00cffb000000ffff /* 0x73 user 4GB code at 0x00000000 */ + .quad 0x00cff3000000ffff /* 0x7b user 4GB data at 0x00000000 */ .quad 0x0000000000000000 /* 0x80 TSS descriptor */ .quad 0x0000000000000000 /* 0x88 LDT descriptor */ @@ -642,24 +732,30 @@ ENTRY(cpu_gdt_table) * They code segments and data segments have fixed 64k limits, * the transfer segment sizes are set at run time. */ - .quad 0x00409a000000ffff /* 0x90 32-bit code */ - .quad 0x00009a000000ffff /* 0x98 16-bit code */ - .quad 0x000092000000ffff /* 0xa0 16-bit data */ - .quad 0x0000920000000000 /* 0xa8 16-bit data */ - .quad 0x0000920000000000 /* 0xb0 16-bit data */ + .quad 0x00409b000000ffff /* 0x90 32-bit code */ + .quad 0x00009b000000ffff /* 0x98 16-bit code */ + .quad 0x000093000000ffff /* 0xa0 16-bit data */ + .quad 0x0000930000000000 /* 0xa8 16-bit data */ + .quad 0x0000930000000000 /* 0xb0 16-bit data */ /* * The APM segments have byte granularity and their bases * are set at run time. All have 64k limits. */ - .quad 0x00409a000000ffff /* 0xb8 APM CS code */ - .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ - .quad 0x004092000000ffff /* 0xc8 APM DS data */ + .quad 0x00409b000000ffff /* 0xb8 APM CS code */ + .quad 0x00009b000000ffff /* 0xc0 APM CS 16 code (16 bit) */ + .quad 0x004093000000ffff /* 0xc8 APM DS data */ - .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */ - .quad 0x00cf92000000ffff /* 0xd8 - PDA */ + .quad 0x00c0930000000000 /* 0xd0 - ESPFIX SS */ + .quad 0x00c093000000ffff /* 0xd8 - PDA */ .quad 0x0000000000000000 /* 0xe0 - unused */ .quad 0x0000000000000000 /* 0xe8 - unused */ .quad 0x0000000000000000 /* 0xf0 - unused */ .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ + /* Be sure this is zeroed to avoid false validations in Xen */ + .fill PAGE_SIZE_asm / 8 - GDT_ENTRIES,8,0 + +#ifdef CONFIG_SMP + .fill (NR_CPUS-1) * (PAGE_SIZE_asm / 8),8,0 /* other CPU's GDT */ +#endif diff -urNp linux-2.6.21.5/arch/i386/kernel/i386_ksyms.c linux-2.6.21.5/arch/i386/kernel/i386_ksyms.c --- linux-2.6.21.5/arch/i386/kernel/i386_ksyms.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/i386_ksyms.c 2007-05-24 22:04:52.000000000 -0400 @@ -2,12 +2,16 @@ #include #include +EXPORT_SYMBOL_GPL(cpu_gdt_table); + EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); +EXPORT_SYMBOL(csum_partial_copy_generic_to_user); +EXPORT_SYMBOL(csum_partial_copy_generic_from_user); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff -urNp linux-2.6.21.5/arch/i386/kernel/i8259.c linux-2.6.21.5/arch/i386/kernel/i8259.c --- linux-2.6.21.5/arch/i386/kernel/i8259.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/i8259.c 2007-05-24 22:04:52.000000000 -0400 @@ -351,7 +351,7 @@ static irqreturn_t math_error_irq(int cp * New motherboards sometimes make IRQ 13 be a PCI interrupt, * so allow interrupt sharing. */ -static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL }; +static struct irqaction fpu_irq = { math_error_irq, 0, CPU_MASK_NONE, "fpu", NULL, NULL, 0, NULL }; void __init init_ISA_irqs (void) { diff -urNp linux-2.6.21.5/arch/i386/kernel/init_task.c linux-2.6.21.5/arch/i386/kernel/init_task.c --- linux-2.6.21.5/arch/i386/kernel/init_task.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/init_task.c 2007-05-24 22:04:52.000000000 -0400 @@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task); * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. */ -DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; +struct tss_struct init_tss[NR_CPUS] ____cacheline_internodealigned_in_smp = { [0 ... NR_CPUS-1] = INIT_TSS }; diff -urNp linux-2.6.21.5/arch/i386/kernel/io_apic.c linux-2.6.21.5/arch/i386/kernel/io_apic.c --- linux-2.6.21.5/arch/i386/kernel/io_apic.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/io_apic.c 2007-05-24 22:04:52.000000000 -0400 @@ -357,8 +357,8 @@ static void set_ioapic_affinity_irq(unsi # define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) # define Dprintk(x...) do { TDprintk(x); } while (0) # else -# define TDprintk(x...) -# define Dprintk(x...) +# define TDprintk(x...) do {} while (0) +# define Dprintk(x...) do {} while (0) # endif #define IRQBALANCE_CHECK_ARCH -999 diff -urNp linux-2.6.21.5/arch/i386/kernel/ioport.c linux-2.6.21.5/arch/i386/kernel/ioport.c --- linux-2.6.21.5/arch/i386/kernel/ioport.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/ioport.c 2007-05-24 22:04:52.000000000 -0400 @@ -16,6 +16,7 @@ #include #include #include +#include /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) @@ -64,9 +65,16 @@ asmlinkage long sys_ioperm(unsigned long if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; +#ifdef CONFIG_GRKERNSEC_IO + if (turn_on) { + gr_handle_ioperm(); +#else if (turn_on && !capable(CAP_SYS_RAWIO)) +#endif return -EPERM; - +#ifdef CONFIG_GRKERNSEC_IO + } +#endif /* * If it's the first ioperm() call in this thread's lifetime, set the * IO bitmap up. ioperm() is much less timing critical than clone(), @@ -89,7 +97,7 @@ asmlinkage long sys_ioperm(unsigned long * because the ->io_bitmap_max value must match the bitmap * contents: */ - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); @@ -143,8 +151,13 @@ asmlinkage long sys_iopl(unsigned long u return -EINVAL; /* Trying to gain more privileges? */ if (level > old) { +#ifdef CONFIG_GRKERNSEC_IO + gr_handle_iopl(); + return -EPERM; +#else if (!capable(CAP_SYS_RAWIO)) return -EPERM; +#endif } t->iopl = level << 12; regs->eflags = (regs->eflags & ~X86_EFLAGS_IOPL) | t->iopl; diff -urNp linux-2.6.21.5/arch/i386/kernel/irq.c linux-2.6.21.5/arch/i386/kernel/irq.c --- linux-2.6.21.5/arch/i386/kernel/irq.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/irq.c 2007-05-24 22:04:52.000000000 -0400 @@ -114,7 +114,7 @@ fastcall unsigned int do_IRQ(struct pt_r int arg1, arg2, ebx; /* build the stack frame on the IRQ stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)) - 2; irqctx->tinfo.task = curctx->tinfo.task; irqctx->tinfo.previous_esp = current_stack_pointer; @@ -151,10 +151,10 @@ fastcall unsigned int do_IRQ(struct pt_r * gcc's 3.0 and earlier don't handle that correctly. */ static char softirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__aligned__(THREAD_SIZE), __section__(".bss.page_aligned"))); static char hardirq_stack[NR_CPUS * THREAD_SIZE] - __attribute__((__aligned__(THREAD_SIZE))); + __attribute__((__aligned__(THREAD_SIZE), __section__(".bss.page_aligned"))); /* * allocate per-cpu stacks for hardirq and for softirq processing @@ -214,7 +214,7 @@ asmlinkage void do_softirq(void) irqctx->tinfo.previous_esp = current_stack_pointer; /* build the stack frame on the softirq stack */ - isp = (u32*) ((char*)irqctx + sizeof(*irqctx)); + isp = (u32*) ((char*)irqctx + sizeof(*irqctx)) - 2; asm volatile( " xchgl %%ebx,%%esp \n" diff -urNp linux-2.6.21.5/arch/i386/kernel/kprobes.c linux-2.6.21.5/arch/i386/kernel/kprobes.c --- linux-2.6.21.5/arch/i386/kernel/kprobes.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/kprobes.c 2007-05-24 22:04:52.000000000 -0400 @@ -661,7 +661,7 @@ int __kprobes kprobe_exceptions_notify(s struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - if (args->regs && user_mode_vm(args->regs)) + if (args->regs && user_mode(args->regs)) return ret; switch (val) { diff -urNp linux-2.6.21.5/arch/i386/kernel/ldt.c linux-2.6.21.5/arch/i386/kernel/ldt.c --- linux-2.6.21.5/arch/i386/kernel/ldt.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/ldt.c 2007-05-24 22:04:52.000000000 -0400 @@ -103,6 +103,22 @@ int init_new_context(struct task_struct retval = copy_ldt(&mm->context, &old_mm->context); up(&old_mm->context.sem); } + + if (tsk == current) { + mm->context.vdso = ~0UL; + +#if defined(CONFIG_PAX_PAGEEXEC) || defined(CONFIG_PAX_SEGMEXEC) + mm->context.user_cs_base = 0UL; + mm->context.user_cs_limit = ~0UL; + +#if defined(CONFIG_PAX_PAGEEXEC) && defined(CONFIG_SMP) + cpus_clear(mm->context.cpu_user_cs_mask); +#endif + +#endif + + } + return retval; } @@ -213,6 +229,13 @@ static int write_ldt(void __user * ptr, } } +#ifdef CONFIG_PAX_SEGMEXEC + if ((mm->pax_flags & MF_PAX_SEGMEXEC) && (ldt_info.contents & MODIFY_LDT_CONTENTS_CODE)) { + error = -EINVAL; + goto out_unlock; + } +#endif + entry_1 = LDT_entry_a(&ldt_info); entry_2 = LDT_entry_b(&ldt_info); if (oldmode) diff -urNp linux-2.6.21.5/arch/i386/kernel/machine_kexec.c linux-2.6.21.5/arch/i386/kernel/machine_kexec.c --- linux-2.6.21.5/arch/i386/kernel/machine_kexec.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/machine_kexec.c 2007-05-24 22:04:52.000000000 -0400 @@ -29,25 +29,25 @@ static u32 kexec_pmd1[1024] PAGE_ALIGNED static u32 kexec_pte0[1024] PAGE_ALIGNED; static u32 kexec_pte1[1024] PAGE_ALIGNED; -static void set_idt(void *newidt, __u16 limit) +static void set_idt(struct desc_struct *newidt, __u16 limit) { struct Xgt_desc_struct curidt; /* ia32 supports unaliged loads & stores */ curidt.size = limit; - curidt.address = (unsigned long)newidt; + curidt.address = newidt; load_idt(&curidt); }; -static void set_gdt(void *newgdt, __u16 limit) +static void set_gdt(struct desc_struct *newgdt, __u16 limit) { struct Xgt_desc_struct curgdt; /* ia32 supports unaligned loads & stores */ curgdt.size = limit; - curgdt.address = (unsigned long)newgdt; + curgdt.address = newgdt; load_gdt(&curgdt); }; diff -urNp linux-2.6.21.5/arch/i386/kernel/module.c linux-2.6.21.5/arch/i386/kernel/module.c --- linux-2.6.21.5/arch/i386/kernel/module.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/module.c 2007-05-24 22:04:52.000000000 -0400 @@ -23,6 +23,8 @@ #include #include +#include + #if 0 #define DEBUGP printk #else @@ -33,9 +35,30 @@ void *module_alloc(unsigned long size) { if (size == 0) return NULL; + +#ifdef CONFIG_PAX_KERNEXEC + return vmalloc(size); +#else return vmalloc_exec(size); +#endif + } +#ifdef CONFIG_PAX_KERNEXEC +void *module_alloc_exec(unsigned long size) +{ + struct vm_struct *area; + + if (size == 0) + return NULL; + + area = __get_vm_area(size, 0, (unsigned long)&MODULES_VADDR, (unsigned long)&MODULES_END); + if (area) + return area->addr; + + return NULL; +} +#endif /* Free memory returned from module_alloc */ void module_free(struct module *mod, void *module_region) @@ -45,6 +68,45 @@ void module_free(struct module *mod, voi table entries. */ } +#ifdef CONFIG_PAX_KERNEXEC +void module_free_exec(struct module *mod, void *module_region) +{ + struct vm_struct **p, *tmp; + + if (!module_region) + return; + + if ((PAGE_SIZE-1) & (unsigned long)module_region) { + printk(KERN_ERR "Trying to module_free_exec() bad address (%p)\n", module_region); + WARN_ON(1); + return; + } + + write_lock(&vmlist_lock); + for (p = &vmlist ; (tmp = *p) != NULL ;p = &tmp->next) + if (tmp->addr == module_region) + break; + + if (tmp) { + unsigned long cr0; + + pax_open_kernel(cr0); + memset(tmp->addr, 0xCC, tmp->size); + pax_close_kernel(cr0); + + *p = tmp->next; + kfree(tmp); + } + write_unlock(&vmlist_lock); + + if (!tmp) { + printk(KERN_ERR "Trying to module_free_exec() nonexistent vm area (%p)\n", + module_region); + WARN_ON(1); + } +} +#endif + /* We don't need anything special. */ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, @@ -63,14 +125,16 @@ int apply_relocate(Elf32_Shdr *sechdrs, unsigned int i; Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; Elf32_Sym *sym; - uint32_t *location; + uint32_t *plocation, location; DEBUGP("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; + plocation = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + rel[i].r_offset; + location = (uint32_t)plocation; + if (sechdrs[sechdrs[relsec].sh_info].sh_flags & SHF_EXECINSTR) + plocation = (void *)plocation + __KERNEL_TEXT_OFFSET; /* This is the symbol it is referring to. Note that all undefined symbols have been resolved. */ sym = (Elf32_Sym *)sechdrs[symindex].sh_addr @@ -79,11 +143,11 @@ int apply_relocate(Elf32_Shdr *sechdrs, switch (ELF32_R_TYPE(rel[i].r_info)) { case R_386_32: /* We add the value into the location given */ - *location += sym->st_value; + *plocation += sym->st_value; break; case R_386_PC32: /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; + *plocation += sym->st_value - location; break; default: printk(KERN_ERR "module %s: Unknown relocation: %u\n", diff -urNp linux-2.6.21.5/arch/i386/kernel/paravirt.c linux-2.6.21.5/arch/i386/kernel/paravirt.c --- linux-2.6.21.5/arch/i386/kernel/paravirt.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/paravirt.c 2007-05-24 22:04:52.000000000 -0400 @@ -89,7 +89,7 @@ static unsigned native_patch(u8 type, u1 if (len < insn_len) return len; - memcpy(insns, native_insns[type].start, insn_len); + memcpy(insns, native_insns[type].start + __KERNEL_TEXT_OFFSET, insn_len); return insn_len; } @@ -337,16 +337,40 @@ static unsigned long native_store_tr(voi static void native_load_tls(struct thread_struct *t, unsigned int cpu) { + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] C(0); C(1); C(2); #undef C + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high) { u32 *lp = (u32 *)((char *)dt + entry*8); + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + lp[0] = entry_low; lp[1] = entry_high; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) @@ -483,7 +507,7 @@ static int __init print_banner(void) } core_initcall(print_banner); -struct paravirt_ops paravirt_ops = { +struct paravirt_ops __attribute__((__section__(".rodata"))) paravirt_ops = { .name = "bare hardware", .paravirt_enabled = 0, .kernel_rpl = 0, diff -urNp linux-2.6.21.5/arch/i386/kernel/process.c linux-2.6.21.5/arch/i386/kernel/process.c --- linux-2.6.21.5/arch/i386/kernel/process.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/process.c 2007-05-25 05:00:26.000000000 -0400 @@ -71,7 +71,7 @@ EXPORT_SYMBOL(boot_option_idle_override) */ unsigned long thread_saved_pc(struct task_struct *tsk) { - return ((unsigned long *)tsk->thread.esp)[3]; + return tsk->thread.eip; } /* @@ -301,7 +301,7 @@ void show_regs(struct pt_regs * regs) printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode_vm(regs)) + if (user_mode(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s %.*s)\n", regs->eflags, print_tainted(), init_utsname()->release, @@ -341,8 +341,8 @@ int kernel_thread(int (*fn)(void *), voi regs.ebx = (unsigned long) fn; regs.edx = (unsigned long) arg; - regs.xds = __USER_DS; - regs.xes = __USER_DS; + regs.xds = __KERNEL_DS; + regs.xes = __KERNEL_DS; regs.xfs = __KERNEL_PDA; regs.orig_eax = -1; regs.eip = (unsigned long) kernel_thread_helper; @@ -364,7 +364,7 @@ void exit_thread(void) struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; @@ -385,6 +385,7 @@ void flush_thread(void) { struct task_struct *tsk = current; + __asm__("mov %0,%%gs\n" : : "r" (0) : "memory"); memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); clear_tsk_thread_flag(tsk, TIF_DEBUG); @@ -418,7 +419,7 @@ int copy_thread(int nr, unsigned long cl struct task_struct *tsk; int err; - childregs = task_pt_regs(p); + childregs = task_stack_page(p) + THREAD_SIZE - sizeof(struct pt_regs) - 8; *childregs = *regs; childregs->eax = 0; childregs->esp = esp; @@ -460,6 +461,11 @@ int copy_thread(int nr, unsigned long cl if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) goto out; +#ifdef CONFIG_PAX_SEGMEXEC + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + goto out; +#endif + desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN; desc->a = LDT_entry_a(&info); desc->b = LDT_entry_b(&info); @@ -639,7 +645,7 @@ struct task_struct fastcall * __switch_t struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -667,6 +673,11 @@ struct task_struct fastcall * __switch_t */ savesegment(gs, prev->gs); +#ifdef CONFIG_PAX_MEMORY_UDEREF + if (!segment_eq(task_thread_info(prev_p)->addr_limit, task_thread_info(next_p)->addr_limit)) + __set_fs(task_thread_info(next_p)->addr_limit, cpu); +#endif + /* * Load the per-thread Thread-Local Storage descriptor. */ @@ -833,6 +844,12 @@ asmlinkage int sys_set_thread_area(struc if (copy_from_user(&info, u_info, sizeof(info))) return -EFAULT; + +#ifdef CONFIG_PAX_SEGMEXEC + if ((current->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + return -EINVAL; +#endif + idx = info.entry_number; /* @@ -921,9 +938,28 @@ asmlinkage int sys_get_thread_area(struc return 0; } -unsigned long arch_align_stack(unsigned long sp) +#ifdef CONFIG_PAX_RANDKSTACK +asmlinkage void pax_randomize_kstack(void) { - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) - sp -= get_random_int() % 8192; - return sp & ~0xf; + struct tss_struct *tss; + unsigned long time; + + if (!randomize_va_space) + return; + + tss = init_tss + smp_processor_id(); + rdtscl(time); + + /* P4 seems to return a 0 LSB, ignore it */ +#ifdef CONFIG_MPENTIUM4 + time &= 0x1EUL; + time <<= 2; +#else + time &= 0xFUL; + time <<= 3; +#endif + + tss->esp0 ^= time; + current->thread.esp0 = tss->esp0; } +#endif diff -urNp linux-2.6.21.5/arch/i386/kernel/ptrace.c linux-2.6.21.5/arch/i386/kernel/ptrace.c --- linux-2.6.21.5/arch/i386/kernel/ptrace.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/ptrace.c 2007-05-24 22:04:52.000000000 -0400 @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -162,15 +163,15 @@ static unsigned long convert_eip_to_line * and APM bios ones we just ignore here. */ if (seg & LDT_SEGMENT) { - u32 *desc; + struct desc_struct *desc; unsigned long base; down(&child->mm->context.sem); - desc = child->mm->context.ldt + (seg & ~7); - base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000); + desc = &child->mm->context.ldt[seg >> 3]; + base = (desc->a >> 16) | ((desc->b & 0xff) << 16) | (desc->b & 0xff000000); /* 16-bit code segment? */ - if (!((desc[1] >> 22) & 1)) + if (!((desc->b >> 22) & 1)) addr &= 0xffff; addr += base; up(&child->mm->context.sem); @@ -335,6 +336,11 @@ ptrace_set_thread_area(struct task_struc if (copy_from_user(&info, user_desc, sizeof(info))) return -EFAULT; +#ifdef CONFIG_PAX_SEGMEXEC + if ((child->mm->pax_flags & MF_PAX_SEGMEXEC) && (info.contents & MODIFY_LDT_CONTENTS_CODE)) + return -EINVAL; +#endif + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) return -EINVAL; @@ -425,6 +431,17 @@ long arch_ptrace(struct task_struct *chi if(addr == (long) &dummy->u_debugreg[5]) break; if(addr < (long) &dummy->u_debugreg[4] && ((unsigned long) data) >= TASK_SIZE-3) break; + +#ifdef CONFIG_GRKERNSEC + if(addr >= (long) &dummy->u_debugreg[0] && + addr <= (long) &dummy->u_debugreg[3]){ + long reg = (addr - (long) &dummy->u_debugreg[0]) >> 2; + long type = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 4*reg)) & 3; + long align = (child->thread.debugreg[7] >> (DR_CONTROL_SHIFT + 2 + 4*reg)) & 3; + if((type & 1) && (data & align)) + break; + } +#endif /* Sanity-check data. Take one half-byte at once with * check = (val >> (16 + 4*i)) & 0xf. It contains the @@ -641,7 +658,7 @@ void send_sigtrap(struct task_struct *ts info.si_code = TRAP_BRKPT; /* User-mode eip? */ - info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; + info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; /* Send us the fakey SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); diff -urNp linux-2.6.21.5/arch/i386/kernel/reboot.c linux-2.6.21.5/arch/i386/kernel/reboot.c --- linux-2.6.21.5/arch/i386/kernel/reboot.c 2007-06-18 18:28:31.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/reboot.c 2007-06-18 18:32:27.000000000 -0400 @@ -25,7 +25,7 @@ void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -static int reboot_mode; +static unsigned short reboot_mode; static int reboot_thru_bios; #ifdef CONFIG_SMP @@ -128,7 +128,7 @@ static struct dmi_system_id __initdata r DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), }, }, - { } + { NULL, NULL, {{0, NULL}}, NULL} }; static int __init reboot_init(void) @@ -146,18 +146,18 @@ core_initcall(reboot_init); doesn't work with at least one type of 486 motherboard. It is easy to stop this code working; hence the copious comments. */ -static unsigned long long +static const struct desc_struct real_mode_gdt_entries [3] = { - 0x0000000000000000ULL, /* Null descriptor */ - 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */ - 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ + {0x00000000, 0x00000000}, /* Null descriptor */ + {0x0000ffff, 0x00009b00}, /* 16-bit real-mode 64k code at 0x00000000 */ + {0x0100ffff, 0x00009300} /* 16-bit real-mode 64k data at 0x00000100 */ }; -static struct Xgt_desc_struct -real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, -real_mode_idt = { 0x3ff, 0 }, -no_idt = { 0, 0 }; +static const struct Xgt_desc_struct +real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries, 0 }, +real_mode_idt = { 0x3ff, NULL, 0 }, +no_idt = { 0, NULL, 0 }; /* This is 16-bit protected mode code to disable paging and the cache, @@ -179,7 +179,7 @@ no_idt = { 0, 0 }; More could be done here to set up the registers as if a CPU reset had occurred; hopefully real BIOSs don't assume much. */ -static unsigned char real_mode_switch [] = +static const unsigned char real_mode_switch [] = { 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */ 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */ @@ -193,7 +193,7 @@ static unsigned char real_mode_switch [] 0x24, 0x10, /* f: andb $0x10,al */ 0x66, 0x0f, 0x22, 0xc0 /* movl %eax,%cr0 */ }; -static unsigned char jump_to_bios [] = +static const unsigned char jump_to_bios [] = { 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */ }; @@ -203,10 +203,14 @@ static unsigned char jump_to_bios [] = * specified by the code and length parameters. * We assume that length will aways be less that 100! */ -void machine_real_restart(unsigned char *code, int length) +void machine_real_restart(const unsigned char *code, unsigned int length) { unsigned long flags; +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; +#endif + local_irq_disable(); /* Write zero to CMOS register number 0x0f, which the BIOS POST @@ -227,8 +231,16 @@ void machine_real_restart(unsigned char from the kernel segment. This assumes the kernel segment starts at virtual address PAGE_OFFSET. */ - memcpy (swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, - sizeof (swapper_pg_dir [0]) * KERNEL_PGD_PTRS); +#ifdef CONFIG_PAX_KERNEXEC + pax_open_kernel(cr0); +#endif + + clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif /* * Use `swapper_pg_dir' as our page directory. @@ -241,7 +253,7 @@ void machine_real_restart(unsigned char REBOOT.COM programs, and the previous reset routine did this too. */ - *((unsigned short *)0x472) = reboot_mode; + __put_user(reboot_mode, (unsigned short __user *)0x472); /* For the switch to real mode, copy some code to low memory. It has to be in the first 64k because it is running in 16-bit mode, and it @@ -249,9 +261,9 @@ void machine_real_restart(unsigned char off paging. Copy it near the end of the first page, out of the way of BIOS variables. */ - memcpy ((void *) (0x1000 - sizeof (real_mode_switch) - 100), + flags = __copy_to_user_inatomic((void __user *) (0x1000 - sizeof (real_mode_switch) - 100), real_mode_switch, sizeof (real_mode_switch)); - memcpy ((void *) (0x1000 - 100), code, length); + flags = __copy_to_user_inatomic((void __user *) (0x1000 - 100), code, length); /* Set up the IDT for real mode. */ @@ -333,7 +345,7 @@ void machine_emergency_restart(void) __asm__ __volatile__("int3"); } /* rebooting needs to touch the page at absolute addr 0 */ - *((unsigned short *)__va(0x472)) = reboot_mode; + __put_user(reboot_mode, (unsigned short __user *)0x472); for (;;) { mach_reboot_fixups(); /* for board specific fixups */ mach_reboot(); diff -urNp linux-2.6.21.5/arch/i386/kernel/setup.c linux-2.6.21.5/arch/i386/kernel/setup.c --- linux-2.6.21.5/arch/i386/kernel/setup.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/setup.c 2007-05-24 22:04:52.000000000 -0400 @@ -82,7 +82,11 @@ struct cpuinfo_x86 new_cpu_data __cpuini struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); +#ifdef CONFIG_X86_PAE +unsigned long mmu_cr4_features = X86_CR4_PAE; +#else unsigned long mmu_cr4_features; +#endif /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; @@ -404,8 +408,8 @@ void __init setup_bootmem_allocator(void * the (very unlikely) case of us accidentally initializing the * bootmem allocator with an invalid RAM area. */ - reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) + - bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text)); + reserve_bootmem(LOAD_PHYSICAL_ADDR, (PFN_PHYS(min_low_pfn) + + bootmap_size + PAGE_SIZE-1) - LOAD_PHYSICAL_ADDR); /* * reserve physical page 0 - it's a special BIOS page on many boxes, @@ -559,14 +563,14 @@ void __init setup_arch(char **cmdline_p) if (!MOUNT_ROOT_RDONLY) root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) _text; - init_mm.end_code = (unsigned long) _etext; + init_mm.start_code = (unsigned long) _text + __KERNEL_TEXT_OFFSET; + init_mm.end_code = (unsigned long) _etext + __KERNEL_TEXT_OFFSET; init_mm.end_data = (unsigned long) _edata; init_mm.brk = init_pg_tables_end + PAGE_OFFSET; - code_resource.start = virt_to_phys(_text); - code_resource.end = virt_to_phys(_etext)-1; - data_resource.start = virt_to_phys(_etext); + code_resource.start = virt_to_phys(_text + __KERNEL_TEXT_OFFSET); + code_resource.end = virt_to_phys(_etext + __KERNEL_TEXT_OFFSET)-1; + data_resource.start = virt_to_phys(_data); data_resource.end = virt_to_phys(_edata)-1; parse_early_param(); diff -urNp linux-2.6.21.5/arch/i386/kernel/signal.c linux-2.6.21.5/arch/i386/kernel/signal.c --- linux-2.6.21.5/arch/i386/kernel/signal.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/signal.c 2007-05-24 22:04:52.000000000 -0400 @@ -351,9 +351,9 @@ static int setup_frame(int sig, struct k } if (current->binfmt->hasvdso) - restorer = (void *)VDSO_SYM(&__kernel_sigreturn); + restorer = (void __user *)VDSO_SYM(&__kernel_sigreturn); else - restorer = (void *)&frame->retcode; + restorer = (void __user *)&frame->retcode; if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; @@ -449,7 +449,8 @@ static int setup_rt_frame(int sig, struc goto give_sigsegv; /* Set up to return from userspace. */ - restorer = (void *)VDSO_SYM(&__kernel_rt_sigreturn); + + restorer = (void __user *)VDSO_SYM(&__kernel_rt_sigreturn); if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; err |= __put_user(restorer, &frame->pretcode); @@ -582,7 +583,7 @@ static void fastcall do_signal(struct pt * before reaching here, so testing against kernel * CS suffices. */ - if (!user_mode(regs)) + if (!user_mode_novm(regs)) return; if (test_thread_flag(TIF_RESTORE_SIGMASK)) diff -urNp linux-2.6.21.5/arch/i386/kernel/smpboot.c linux-2.6.21.5/arch/i386/kernel/smpboot.c --- linux-2.6.21.5/arch/i386/kernel/smpboot.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/smpboot.c 2007-05-24 22:04:52.000000000 -0400 @@ -53,7 +53,6 @@ #include #include #include -#include #include #include @@ -463,7 +462,6 @@ extern struct { void * esp; unsigned short ss; } stack_start; -extern struct i386_pda *start_pda; #ifdef CONFIG_NUMA @@ -812,10 +810,7 @@ static int __cpuinit do_boot_cpu(int api /* Pre-allocate and initialize the CPU's GDT and PDA so it doesn't have to do any memory allocation during the delicate CPU-bringup phase. */ - if (!init_gdt(cpu, idle)) { - printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); - return -1; /* ? */ - } + init_gdt(cpu, idle); idle->thread.eip = (unsigned long) start_secondary; /* start_eip had better be page-aligned! */ @@ -941,7 +936,6 @@ static int __cpuinit __smp_prepare_cpu(i DECLARE_COMPLETION_ONSTACK(done); struct warm_boot_cpu_info info; int apicid, ret; - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); apicid = x86_cpu_to_apicid[cpu]; if (apicid == BAD_APICID) { @@ -949,18 +943,6 @@ static int __cpuinit __smp_prepare_cpu(i goto exit; } - /* - * the CPU isn't initialized at boot time, allocate gdt table here. - * cpu_init will initialize it - */ - if (!cpu_gdt_descr->address) { - cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL); - if (!cpu_gdt_descr->address) - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); - ret = -ENOMEM; - goto exit; - } - info.complete = &done; info.apicid = apicid; info.cpu = cpu; diff -urNp linux-2.6.21.5/arch/i386/kernel/smp.c linux-2.6.21.5/arch/i386/kernel/smp.c --- linux-2.6.21.5/arch/i386/kernel/smp.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/smp.c 2007-05-24 22:04:52.000000000 -0400 @@ -104,7 +104,7 @@ * about nothing of note with C stepping upwards. */ -DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, }; +DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, {0} }; /* * the following functions deal with sending IPIs between CPUs. diff -urNp linux-2.6.21.5/arch/i386/kernel/syscall_table.S linux-2.6.21.5/arch/i386/kernel/syscall_table.S --- linux-2.6.21.5/arch/i386/kernel/syscall_table.S 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/syscall_table.S 2007-05-24 22:04:52.000000000 -0400 @@ -1,3 +1,4 @@ +.section .rodata,"a",@progbits ENTRY(sys_call_table) .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ .long sys_exit diff -urNp linux-2.6.21.5/arch/i386/kernel/sysenter.c linux-2.6.21.5/arch/i386/kernel/sysenter.c --- linux-2.6.21.5/arch/i386/kernel/sysenter.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/sysenter.c 2007-05-24 22:04:52.000000000 -0400 @@ -49,7 +49,7 @@ extern asmlinkage void sysenter_entry(vo void enable_sep_cpu(void) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = init_tss + cpu; if (!boot_cpu_has(X86_FEATURE_SEP)) { put_cpu(); @@ -106,9 +106,10 @@ int arch_setup_additional_pages(struct l struct mm_struct *mm = current->mm; unsigned long addr; int ret; + unsigned long vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|VM_ALWAYSDUMP; down_write(&mm->mmap_sem); - addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, MAP_EXECUTABLE); if (IS_ERR_VALUE(addr)) { ret = addr; goto up_fail; @@ -122,15 +123,17 @@ int arch_setup_additional_pages(struct l * without matching up the same kernel and hardware config to see * what PC values meant. */ - ret = install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| - VM_ALWAYSDUMP, - syscall_pages); + +#ifdef CONFIG_PAX_MPROTECT + if (mm->pax_flags & MF_PAX_MPROTECT) + vm_flags &= ~VM_MAYWRITE; +#endif + + ret = install_special_mapping(mm, addr, PAGE_SIZE, vm_flags, syscall_pages); if (ret) goto up_fail; - current->mm->context.vdso = (void *)addr; + current->mm->context.vdso = addr; current_thread_info()->sysenter_return = (void *)VDSO_SYM(&SYSENTER_RETURN); up_fail: @@ -140,8 +143,17 @@ up_fail: const char *arch_vma_name(struct vm_area_struct *vma) { - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + if (vma->vm_start == vma->vm_mm->context.vdso) + return "[vdso]"; + +#ifdef CONFIG_PAX_SEGMEXEC + if (!(vma->vm_mm->pax_flags & MF_PAX_SEGMEXEC) || !(vma->vm_flags & VM_MIRROR)) + return NULL; + + if (vma->vm_start + vma->vm_mirror == vma->vm_mm->context.vdso) return "[vdso]"; +#endif + return NULL; } diff -urNp linux-2.6.21.5/arch/i386/kernel/sys_i386.c linux-2.6.21.5/arch/i386/kernel/sys_i386.c --- linux-2.6.21.5/arch/i386/kernel/sys_i386.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/sys_i386.c 2007-05-24 22:04:52.000000000 -0400 @@ -100,6 +100,191 @@ out: return err; } +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + if (len > task_size) + return -ENOMEM; + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (len > mm->cached_hole_size) { + start_addr = addr = mm->free_area_cache; + } else { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + } + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE) && start_addr >= mm->mmap_base) { + start_addr = 0x00110000UL; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + start_addr += mm->delta_mmap & 0x03FFF000UL; +#endif + + if (mm->start_brk <= start_addr && start_addr < mm->mmap_base) + start_addr = addr = mm->mmap_base; + else + addr = start_addr; + } +#endif + +full_search: + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (task_size - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != mm->mmap_base) { + start_addr = addr = mm->mmap_base; + mm->cached_hole_size = 0; + goto full_search; + } + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + /* + * Remember the place where we stopped the search: + */ + mm->free_area_cache = addr + len; + return addr; + } + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + addr = vma->vm_end; + if (mm->start_brk <= addr && addr < mm->mmap_base) { + start_addr = addr = mm->mmap_base; + goto full_search; + } + } +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long base = mm->mmap_base, addr = addr0, task_size = TASK_SIZE; + +#ifdef CONFIG_PAX_SEGMEXEC + if (mm->pax_flags & MF_PAX_SEGMEXEC) + task_size = SEGMEXEC_TASK_SIZE; +#endif + + /* requested length too big for entire address space */ + if (len > task_size) + return -ENOMEM; + +#ifdef CONFIG_PAX_PAGEEXEC + if ((mm->pax_flags & MF_PAX_PAGEEXEC) && (flags & MAP_EXECUTABLE)) + goto bottomup; +#endif + +#ifdef CONFIG_PAX_RANDMMAP + if (!(mm->pax_flags & MF_PAX_RANDMMAP) || !filp) +#endif + + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (task_size - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + /* check if free_area_cache is useful for us */ + if (len <= mm->cached_hole_size) { + mm->cached_hole_size = 0; + mm->free_area_cache = mm->mmap_base; + } + + /* either no address requested or can't fit in requested address hole */ + addr = mm->free_area_cache; + + /* make sure it can fit in the remaining address space */ + if (addr > len) { + vma = find_vma(mm, addr-len); + if (!vma || addr <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr-len); + } + + if (mm->mmap_base < len) + goto bottomup; + + addr = mm->mmap_base-len; + + do { + /* + * Lookup failure means no vma is above this address, + * else if new region fits below vma->vm_start, + * return with success: + */ + vma = find_vma(mm, addr); + if (!vma || addr+len <= vma->vm_start) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + + /* remember the largest hole we saw so far */ + if (addr + mm->cached_hole_size < vma->vm_start) + mm->cached_hole_size = vma->vm_start - addr; + + /* try just below the current vma->vm_start */ + addr = vma->vm_start-len; + } while (len < vma->vm_start); + +bottomup: + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->mmap_base = TASK_UNMAPPED_BASE; + +#ifdef CONFIG_PAX_RANDMMAP + if (mm->pax_flags & MF_PAX_RANDMMAP) + mm->mmap_base += mm->delta_mmap; +#endif + + mm->free_area_cache = mm->mmap_base; + mm->cached_hole_size = ~0UL; + addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); + /* + * Restore the topdown base: + */ + mm->mmap_base = base; + mm->free_area_cache = base; + mm->cached_hole_size = ~0UL; + + return addr; +} struct sel_arg_struct { unsigned long n; diff -urNp linux-2.6.21.5/arch/i386/kernel/traps.c linux-2.6.21.5/arch/i386/kernel/traps.c --- linux-2.6.21.5/arch/i386/kernel/traps.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/traps.c 2007-06-18 18:29:52.000000000 -0400 @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_EISA #include @@ -66,12 +67,7 @@ asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ char ignore_fpu_irq = 0; -/* - * The IDT has to be page-aligned to simplify the Pentium - * F0 0F bug workaround.. We have a special link segment - * for this. - */ -struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; +extern struct desc_struct idt_table[256]; asmlinkage void divide_error(void); asmlinkage void debug(void); @@ -297,7 +293,7 @@ void show_registers(struct pt_regs *regs esp = (unsigned long) (®s->esp); savesegment(ss, ss); savesegment(gs, gs); - if (user_mode_vm(regs)) { + if (user_mode(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -335,11 +331,11 @@ void show_registers(struct pt_regs *regs printk(KERN_EMERG "Code: "); - eip = (u8 *)regs->eip - code_prologue; + eip = (u8 *)regs->eip - code_prologue + __KERNEL_TEXT_OFFSET; if (eip < (u8 *)PAGE_OFFSET || probe_kernel_address(eip, c)) { /* try starting at EIP */ - eip = (u8 *)regs->eip; + eip = (u8 *)regs->eip + __KERNEL_TEXT_OFFSET; code_len = code_len - code_prologue + 1; } for (i = 0; i < code_len; i++, eip++) { @@ -348,7 +344,7 @@ void show_registers(struct pt_regs *regs printk(" Bad EIP value."); break; } - if (eip == (u8 *)regs->eip) + if (eip == (u8 *)regs->eip + __KERNEL_TEXT_OFFSET) printk("<%02x> ", c); else printk("%02x ", c); @@ -361,6 +357,7 @@ int is_valid_bugaddr(unsigned long eip) { unsigned short ud2; + eip += __KERNEL_TEXT_OFFSET; if (eip < PAGE_OFFSET) return 0; if (probe_kernel_address((unsigned short *)eip, ud2)) @@ -467,7 +464,7 @@ void die(const char * str, struct pt_reg static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) { - if (!user_mode_vm(regs)) + if (!user_mode(regs)) die(str, regs, err); } @@ -485,7 +482,7 @@ static void __kprobes do_trap(int trapnr goto trap_signal; } - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto kernel_trap; trap_signal: { @@ -573,7 +570,7 @@ fastcall void __kprobes do_general_prote long error_code) { int cpu = get_cpu(); - struct tss_struct *tss = &per_cpu(init_tss, cpu); + struct tss_struct *tss = &init_tss[cpu]; struct thread_struct *thread = ¤t->thread; /* @@ -609,9 +606,25 @@ fastcall void __kprobes do_general_prote if (regs->eflags & VM_MASK) goto gp_in_vm86; - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto gp_in_kernel; +#ifdef CONFIG_PAX_PAGEEXEC + if (current->mm && (current->mm->pax_flags & MF_PAX_PAGEEXEC)) { + struct mm_struct *mm = current->mm; + unsigned long limit; + + down_write(&mm->mmap_sem); + limit = mm->context.user_cs_limit; + if (limit < TASK_SIZE) { + track_exec_limit(mm, limit, TASK_SIZE, PROT_EXEC); + up_write(&mm->mmap_sem); + return; + } + up_write(&mm->mmap_sem); + } +#endif + current->thread.error_code = error_code; current->thread.trap_no = 13; force_sig(SIGSEGV, current); @@ -627,6 +640,13 @@ gp_in_kernel: if (notify_die(DIE_GPF, "general protection fault", regs, error_code, 13, SIGSEGV) == NOTIFY_STOP) return; + +#ifdef CONFIG_PAX_KERNEXEC + if ((regs->xcs & 0xFFFF) == __KERNEL_CS) + die("PAX: suspicious general protection fault", regs, error_code); + else +#endif + die("general protection fault", regs, error_code); } } @@ -708,7 +728,7 @@ void __kprobes die_nmi(struct pt_regs *r /* If we are in kernel we are probably nested up pretty bad * and might aswell get out now while we still can. */ - if (!user_mode_vm(regs)) { + if (!user_mode(regs)) { current->thread.trap_no = 2; crash_kexec(regs); } @@ -840,7 +860,7 @@ fastcall void __kprobes do_debug(struct * check for kernel mode by just checking the CPL * of CS. */ - if (!user_mode(regs)) + if (!user_mode_novm(regs)) goto clear_TF_reenable; } @@ -1019,8 +1039,7 @@ fastcall unsigned long patch_espfix_desc unsigned long kesp) { int cpu = smp_processor_id(); - struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); - struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; + struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr[cpu].address; unsigned long base = (kesp - uesp) & -THREAD_SIZE; unsigned long new_kesp = kesp - base; unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; @@ -1079,7 +1098,7 @@ void __init trap_init_f00f_bug(void) * Update the IDT descriptor and reload the IDT so that * it uses the read-only mapped virtual address. */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); + idt_descr.address = (struct desc_struct *)fix_to_virt(FIX_F00F_IDT); load_idt(&idt_descr); } #endif diff -urNp linux-2.6.21.5/arch/i386/kernel/tsc.c linux-2.6.21.5/arch/i386/kernel/tsc.c --- linux-2.6.21.5/arch/i386/kernel/tsc.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/tsc.c 2007-05-24 22:04:52.000000000 -0400 @@ -313,7 +313,7 @@ static struct dmi_system_id __initdata b DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), }, }, - {} + { NULL, NULL, {{0, NULL}}, NULL} }; /* diff -urNp linux-2.6.21.5/arch/i386/kernel/vm86.c linux-2.6.21.5/arch/i386/kernel/vm86.c --- linux-2.6.21.5/arch/i386/kernel/vm86.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/vm86.c 2007-05-24 22:04:52.000000000 -0400 @@ -149,7 +149,7 @@ struct pt_regs * fastcall save_v86_state do_exit(SIGSEGV); } - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); current->thread.esp0 = current->thread.saved_esp0; current->thread.sysenter_cs = __KERNEL_CS; load_esp0(tss, ¤t->thread); @@ -325,7 +325,7 @@ static void do_sys_vm86(struct kernel_vm tsk->thread.saved_fs = info->regs32->xfs; savesegment(gs, tsk->thread.saved_gs); - tss = &per_cpu(init_tss, get_cpu()); + tss = init_tss + get_cpu(); tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; diff -urNp linux-2.6.21.5/arch/i386/kernel/vmi.c linux-2.6.21.5/arch/i386/kernel/vmi.c --- linux-2.6.21.5/arch/i386/kernel/vmi.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/vmi.c 2007-05-24 22:04:52.000000000 -0400 @@ -93,7 +93,19 @@ static char irq_save_disable_callout[] = static inline void patch_offset(unsigned char *eip, unsigned char *dest) { - *(unsigned long *)(eip+1) = dest-eip-5; + +#ifdef CONFIG_PAX_KERNEXEC + unsigned long cr0; + + pax_open_kernel(cr0); +#endif + + *(unsigned long *)(eip+1) = dest-eip-5; + +#ifdef CONFIG_PAX_KERNEXEC + pax_close_kernel(cr0); +#endif + } static unsigned patch_internal(int call, unsigned len, void *insns) @@ -511,14 +523,14 @@ static void vmi_set_pud(pud_t *pudp, pud static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - const pte_t pte = { 0 }; + const pte_t pte = __pte(0ULL); vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); } void vmi_pmd_clear(pmd_t *pmd) { - const pte_t pte = { 0 }; + const pte_t pte = __pte(0ULL); vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); } @@ -549,15 +561,13 @@ vmi_startup_ipi_hook(int phys_apicid, un ap.ss = __KERNEL_DS; ap.esp = (unsigned long) start_esp; - ap.ds = __USER_DS; - ap.es = __USER_DS; + ap.ds = __KERNEL_DS; + ap.es = __KERNEL_DS; ap.fs = __KERNEL_PDA; ap.gs = 0; ap.eflags = 0; - setup_pda(); - #ifdef CONFIG_X86_PAE /* efer should match BSP efer. */ if (cpu_has_nx) { diff -urNp linux-2.6.21.5/arch/i386/kernel/vmlinux.lds.S linux-2.6.21.5/arch/i386/kernel/vmlinux.lds.S --- linux-2.6.21.5/arch/i386/kernel/vmlinux.lds.S 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/kernel/vmlinux.lds.S 2007-05-24 22:04:52.000000000 -0400 @@ -21,6 +21,13 @@ #include #include #include +#include + +#ifdef CONFIG_X86_PAE +#define PMD_SHIFT 21 +#else +#define PMD_SHIFT 22 +#endif OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -30,90 +37,19 @@ _proxy_pda = 1; PHDRS { text PT_LOAD FLAGS(5); /* R_E */ - data PT_LOAD FLAGS(7); /* RWE */ + data PT_LOAD FLAGS(6); /* RW_ */ note PT_NOTE FLAGS(4); /* R__ */ } SECTIONS { . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; - phys_startup_32 = startup_32 - LOAD_OFFSET; - - .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { - _text = .; /* Text and read-only data */ - *(.text.head) - } :text = 0x9090 - - /* read-only */ - .text : AT(ADDR(.text) - LOAD_OFFSET) { - *(.text) - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.fixup) - *(.gnu.warning) - _etext = .; /* End of text section */ - } :text = 0x9090 - - . = ALIGN(16); /* Exception table */ - __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } - - RODATA - - BUG_TABLE - - . = ALIGN(4); - .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { - __tracedata_start = .; - *(.tracedata) - __tracedata_end = .; - } - - /* writeable */ - . = ALIGN(4096); - .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ - *(.data) - CONSTRUCTORS - } :data - - .paravirtprobe : AT(ADDR(.paravirtprobe) - LOAD_OFFSET) { - __start_paravirtprobe = .; - *(.paravirtprobe) - __stop_paravirtprobe = .; - } - - . = ALIGN(4096); - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(4096); - __nosave_end = .; - } - - . = ALIGN(4096); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.idt) - } + phys_startup_32 = startup_32 - LOAD_OFFSET + __KERNEL_TEXT_OFFSET; - . = ALIGN(32); - .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - *(.data.cacheline_aligned) - } - - /* rarely changed data like cpu maps */ - . = ALIGN(32); - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) - _edata = .; /* End of data section */ - } - - . = ALIGN(THREAD_SIZE); /* init_task */ - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) - } + .text.startup : AT(ADDR(.text.startup) - LOAD_OFFSET) { + BYTE(0xEA) /* jmp far */ + LONG(phys_startup_32) + SHORT(__BOOT_CS) + } :text = 0x9090 /* might get freed after init */ . = ALIGN(4096); @@ -142,14 +78,10 @@ SECTIONS . = ALIGN(4096); /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ - .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - __init_begin = .; - _sinittext = .; - *(.init.text) - _einittext = .; - } - .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { *(.init.data) } + .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { + __init_begin = .; + *(.init.data) + } . = ALIGN(16); .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { __setup_start = .; @@ -182,9 +114,6 @@ SECTIONS *(.parainstructions) __stop_parainstructions = .; } - /* .exit.text is discard at runtime, not link time, to deal with references - from .altinstructions and .eh_frame */ - .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) } .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) } #if defined(CONFIG_BLK_DEV_INITRD) . = ALIGN(4096); @@ -200,11 +129,135 @@ SECTIONS *(.data.percpu) __per_cpu_end = .; } + + /* read-only */ + + . = ALIGN(4096); /* Init code and data */ + .init.text (. - __KERNEL_TEXT_OFFSET) : AT(ADDR(.init.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + _sinittext = .; + *(.init.text) + _einittext = .; + } + + /* .exit.text is discard at runtime, not link time, to deal with references + from .altinstructions and .eh_frame */ + .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { *(.exit.text) } + +#ifdef CONFIG_PAX_KERNEXEC + .text.align : AT(ADDR(.text.align) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + . = ALIGN(__KERNEL_TEXT_OFFSET - LOAD_OFFSET) - 1; + BYTE(0) + } +#else . = ALIGN(4096); +#endif + /* freed after init ends here */ - + + .text.head : AT(ADDR(.text.head) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + __init_end = . + __KERNEL_TEXT_OFFSET; + _text = .; /* Text and read-only data */ + *(.text.head) + } :text = 0x9090 + + .text : AT(ADDR(.text) - LOAD_OFFSET + __KERNEL_TEXT_OFFSET) { + *(.text) + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + *(.fixup) + *(.gnu.warning) + _etext = .; /* End of text section */ + } :text = 0x9090 + + . += __KERNEL_TEXT_OFFSET; + . = ALIGN(16); /* Exception table */ + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + . = ALIGN(4096); + .rodata.page_aligned : AT(ADDR(.rodata.page_aligned) - LOAD_OFFSET) { + *(.empty_zero_page) + +#ifdef CONFIG_X86_PAE + *(.swapper_pm_dir) +#endif + + *(.swapper_pg_dir) + *(.idt) + } + + RODATA + + BUG_TABLE + + . = ALIGN(4); + .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) { + __tracedata_start = .; + *(.tracedata) + __tracedata_end = .; + } + +#ifdef CONFIG_PAX_KERNEXEC + . = ALIGN(4096); + + .module.text : AT(ADDR(.module.text) - LOAD_OFFSET) { + MODULES_VADDR = .; + . += (4 * 1024 * 1024); + . = ALIGN(1 << PMD_SHIFT) - 1; + BYTE(0) + MODULES_END = .; + } + +#else + . = ALIGN(32); +#endif + + /* writeable */ + . = ALIGN(4096); + .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ + _data = .; + *(.data) + CONSTRUCTORS + } :data + + .paravirtprobe : AT(ADDR(.paravirtprobe) - LOAD_OFFSET) { + __start_paravirtprobe = .; + *(.paravirtprobe) + __stop_paravirtprobe = .; + } + + . = ALIGN(4096); + .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { + __nosave_begin = .; + *(.data.nosave) + . = ALIGN(4096); + __nosave_end = .; + } + + . = ALIGN(32); + .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { + *(.data.cacheline_aligned) + } + + /* rarely changed data like cpu maps */ + . = ALIGN(32); + .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { + *(.data.read_mostly) + _edata = .; /* End of data section */ + } + + . = ALIGN(THREAD_SIZE); /* init_task */ + .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { + *(.data.init_task) + } + + . = ALIGN(4096); + .bss : AT(ADDR(.bss) - LOAD_OFFSET) { - __init_end = .; __bss_start = .; /* BSS */ *(.bss.page_aligned) *(.bss) diff -urNp linux-2.6.21.5/arch/i386/lib/checksum.S linux-2.6.21.5/arch/i386/lib/checksum.S --- linux-2.6.21.5/arch/i386/lib/checksum.S 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/lib/checksum.S 2007-05-24 22:04:52.000000000 -0400 @@ -26,7 +26,8 @@ */ #include - +#include + /* * computes a partial checksum, e.g. for TCP/UDP fragments */ @@ -280,12 +281,23 @@ unsigned int csum_partial_copy_generic ( .align 4 .globl csum_partial_copy_generic - +.globl csum_partial_copy_generic_to_user +.globl csum_partial_copy_generic_from_user + #ifndef CONFIG_X86_USE_PPRO_CHECKSUM #define ARGBASE 16 #define FP 12 - + +csum_partial_copy_generic_to_user: + pushl $(__USER_DS) + popl %es + jmp csum_partial_copy_generic + +csum_partial_copy_generic_from_user: + pushl $(__USER_DS) + popl %ds + csum_partial_copy_generic: subl $4,%esp pushl %edi @@ -304,7 +316,7 @@ csum_partial_copy_generic: jmp 4f SRC(1: movw (%esi), %bx ) addl $2, %esi -DST( movw %bx, (%edi) ) +DST( movw %bx, %es:(%edi) ) addl $2, %edi addw %bx, %ax adcl $0, %eax @@ -316,30 +328,30 @@ DST( movw %bx, (%edi) ) SRC(1: movl (%esi), %ebx ) SRC( movl 4(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) adcl %edx, %eax -DST( movl %edx, 4(%edi) ) +DST( movl %edx, %es:4(%edi) ) SRC( movl 8(%esi), %ebx ) SRC( movl 12(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 8(%edi) ) +DST( movl %ebx, %es:8(%edi) ) adcl %edx, %eax -DST( movl %edx, 12(%edi) ) +DST( movl %edx, %es:12(%edi) ) SRC( movl 16(%esi), %ebx ) SRC( movl 20(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 16(%edi) ) +DST( movl %ebx, %es:16(%edi) ) adcl %edx, %eax -DST( movl %edx, 20(%edi) ) +DST( movl %edx, %es:20(%edi) ) SRC( movl 24(%esi), %ebx ) SRC( movl 28(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 24(%edi) ) +DST( movl %ebx, %es:24(%edi) ) adcl %edx, %eax -DST( movl %edx, 28(%edi) ) +DST( movl %edx, %es:28(%edi) ) lea 32(%esi), %esi lea 32(%edi), %edi @@ -353,7 +365,7 @@ DST( movl %edx, 28(%edi) ) shrl $2, %edx # This clears CF SRC(3: movl (%esi), %ebx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +DST( movl %ebx, %es:(%edi) ) lea 4(%esi), %esi lea 4(%edi), %edi dec %edx @@ -365,12 +377,12 @@ DST( movl %ebx, (%edi) ) jb 5f SRC( movw (%esi), %cx ) leal 2(%esi), %esi -DST( movw %cx, (%edi) ) +DST( movw %cx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%ecx SRC(5: movb (%esi), %cl ) -DST( movb %cl, (%edi) ) +DST( movb %cl, %es:(%edi) ) 6: addl %ecx, %eax adcl $0, %eax 7: @@ -381,7 +393,7 @@ DST( movb %cl, (%edi) ) 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination - computing the rest # is too much work @@ -394,11 +406,15 @@ DST( movb %cl, (%edi) ) 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT,(%ebx) + movl $-EFAULT,%ss:(%ebx) jmp 5000b .previous + pushl %ss + popl %ds + pushl %ss + popl %es popl %ebx popl %esi popl %edi @@ -410,17 +426,28 @@ DST( movb %cl, (%edi) ) /* Version for PentiumII/PPro */ #define ROUND1(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ addl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)); #define ROUND(x) \ + nop; nop; nop; \ SRC(movl x(%esi), %ebx ) ; \ adcl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + DST(movl %ebx, %es:x(%edi)); #define ARGBASE 12 - + +csum_partial_copy_generic_to_user: + pushl $(__USER_DS) + popl %es + jmp csum_partial_copy_generic + +csum_partial_copy_generic_from_user: + pushl $(__USER_DS) + popl %ds + csum_partial_copy_generic: pushl %ebx pushl %edi @@ -439,7 +466,7 @@ csum_partial_copy_generic: subl %ebx, %edi lea -1(%esi),%edx andl $-32,%edx - lea 3f(%ebx,%ebx), %ebx + lea 3f(%ebx,%ebx,2), %ebx testl %esi, %esi jmp *%ebx 1: addl $64,%esi @@ -460,19 +487,19 @@ csum_partial_copy_generic: jb 5f SRC( movw (%esi), %dx ) leal 2(%esi), %esi -DST( movw %dx, (%edi) ) +DST( movw %dx, %es:(%edi) ) leal 2(%edi), %edi je 6f shll $16,%edx 5: SRC( movb (%esi), %dl ) -DST( movb %dl, (%edi) ) +DST( movb %dl, %es:(%edi) ) 6: addl %edx, %eax adcl $0, %eax 7: .section .fixup, "ax" 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) # zero the complete destination (computing the rest is too much work) movl ARGBASE+8(%esp),%edi # dst movl ARGBASE+12(%esp),%ecx # len @@ -480,10 +507,14 @@ DST( movb %dl, (%edi) ) rep; stosb jmp 7b 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT, (%ebx) + movl $-EFAULT, %ss:(%ebx) jmp 7b .previous + pushl %ss + popl %ds + pushl %ss + popl %es popl %esi popl %edi popl %ebx diff -urNp linux-2.6.21.5/arch/i386/lib/getuser.S linux-2.6.21.5/arch/i386/lib/getuser.S --- linux-2.6.21.5/arch/i386/lib/getuser.S 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/lib/getuser.S 2007-05-24 22:04:52.000000000 -0400 @@ -9,6 +9,7 @@ * return value. */ #include +#include /* @@ -30,8 +31,12 @@ __get_user_1: GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 1: movzbl (%eax),%edx xorl %eax,%eax + pushl %ss + pop %ds ret .align 4 @@ -42,7 +47,11 @@ __get_user_2: GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 2: movzwl -1(%eax),%edx + pushl %ss + pop %ds xorl %eax,%eax ret @@ -54,11 +63,17 @@ __get_user_4: GET_THREAD_INFO(%edx) cmpl TI_addr_limit(%edx),%eax jae bad_get_user + pushl $(__USER_DS) + popl %ds 3: movl -3(%eax),%edx + pushl %ss + pop %ds xorl %eax,%eax ret bad_get_user: + pushl %ss + pop %ds xorl %edx,%edx movl $-14,%eax ret diff -urNp linux-2.6.21.5/arch/i386/lib/mmx.c linux-2.6.21.5/arch/i386/lib/mmx.c --- linux-2.6.21.5/arch/i386/lib/mmx.c 2007-04-25 23:08:32.000000000 -0400 +++ linux-2.6.21.5/arch/i386/lib/mmx.c 2007-05-24 22:04:52.000000000 -0400 @@ -30,6 +30,7 @@ void *_mmx_memcpy(void *to, const void * { void *p; int i; + unsigned long cr0; if (unlikely(in_interrupt())) return __memcpy(to, from, len); @@ -40,52 +41,80 @@ void *_mmx_memcpy(void *to, const void * kernel_fpu_begin(); __asm__ __volatile__ ( - "1: prefetch (%0)\n" /* This set is 28 bytes */ - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" + "1: prefetch (%1)\n" /* This set is 28 bytes */ + " prefetch 64(%1)\n" + " prefetch 128(%1)\n" + " prefetch 192(%1)\n" + " prefetch 256(%1)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : "=&r" (cr0) : "r" (from) : "ax"); for(; i>5; i--) { __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movq 8(%0), %%mm1\n" - " movq 16(%0), %%mm2\n" - " movq 24(%0), %%mm3\n" - " movq %%mm0, (%1)\n" - " movq %%mm1, 8(%1)\n" - " movq %%mm2, 16(%1)\n" - " movq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm0\n" - " movq 40(%0), %%mm1\n" - " movq 48(%0), %%mm2\n" - " movq 56(%0), %%mm3\n" - " movq %%mm0, 32(%1)\n" - " movq %%mm1, 40(%1)\n" - " movq %%mm2, 48(%1)\n" - " movq %%mm3, 56(%1)\n" + "1: prefetch 320(%1)\n" + "2: movq (%1), %%mm0\n" + " movq 8(%1), %%mm1\n" + " movq 16(%1), %%mm2\n" + " movq 24(%1), %%mm3\n" + " movq %%mm0, (%2)\n" + " movq %%mm1, 8(%2)\n" + " movq %%mm2, 16(%2)\n" + " movq %%mm3, 24(%2)\n" + " movq 32(%1), %%mm0\n" + " movq 40(%1), %%mm1\n" + " movq 48(%1), %%mm2\n" + " movq 56(%1), %%mm3\n" + " movq %%mm0, 32(%2)\n" + " movq %%mm1, 40(%2)\n" + " movq %%mm2, 48(%2)\n" + " movq %%mm3, 56(%2)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } @@ -164,6 +193,7 @@ static void fast_clear_page(void *page) static void fast_copy_page(void *to, void *from) { int i; + unsigned long cr0; kernel_fpu_begin(); @@ -171,51 +201,79 @@ static void fast_copy_page(void *to, voi * but that is for later. -AV */ __asm__ __volatile__ ( - "1: prefetch (%0)\n" - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(%0)\n" + "1: prefetch (%1)\n" + " prefetch 64(%1)\n" + " prefetch 128(%1)\n" + " prefetch 192(%1)\n" + " prefetch 256(%1)\n" "2: \n" ".section .fixup, \"ax\"\n" - "3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + "3: \n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x1AEB, 1b\n" /* jmp on 26 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from) ); + : "=&r" (cr0) : "r" (from) : "ax"); for(i=0; i<(4096-320)/64; i++) { __asm__ __volatile__ ( - "1: prefetch 320(%0)\n" - "2: movq (%0), %%mm0\n" - " movntq %%mm0, (%1)\n" - " movq 8(%0), %%mm1\n" - " movntq %%mm1, 8(%1)\n" - " movq 16(%0), %%mm2\n" - " movntq %%mm2, 16(%1)\n" - " movq 24(%0), %%mm3\n" - " movntq %%mm3, 24(%1)\n" - " movq 32(%0), %%mm4\n" - " movntq %%mm4, 32(%1)\n" - " movq 40(%0), %%mm5\n" - " movntq %%mm5, 40(%1)\n" - " movq 48(%0), %%mm6\n" - " movntq %%mm6, 48(%1)\n" - " movq 56(%0), %%mm7\n" - " movntq %%mm7, 56(%1)\n" + "1: prefetch 320(%1)\n" + "2: movq (%1), %%mm0\n" + " movntq %%mm0, (%2)\n" + " movq 8(%1), %%mm1\n" + " movntq %%mm1, 8(%2)\n" + " movq 16(%1), %%mm2\n" + " movntq %%mm2, 16(%2)\n" + " movq 24(%1), %%mm3\n" + " movntq %%mm3, 24(%2)\n" + " movq 32(%1), %%mm4\n" + " movntq %%mm4, 32(%2)\n" + " movq 40(%1), %%mm5\n" + " movntq %%mm5, 40(%2)\n" + " movq 48(%1), %%mm6\n" + " movntq %%mm6, 48(%2)\n" + " movq 56(%1), %%mm7\n" + " movntq %%mm7, 56(%2)\n" ".section .fixup, \"ax\"\n" - "3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + "3:\n" + +#ifdef CONFIG_PAX_KERNEXEC + " movl %%cr0, %0\n" + " movl %0, %%eax\n" + " andl $0xFFFEFFFF, %%eax\n" + " movl %%eax, %%cr0\n" +#endif + + " movw $0x05EB, 1b\n" /* jmp on 5 bytes */ + +#ifdef CONFIG_PAX_KERNEXEC + " movl %0, %%cr0\n" +#endif + " jmp 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" " .align 4\n" " .long 1b, 3b\n" ".previous" - : : "r" (from), "r" (to) : "memory"); + : "=&r" (cr0) : "r" (from), "r" (to) : "memory", "ax"); from+=64; to+=64; } @@ -296,56 +354,84 @@ static void fast_clear_page(void *page) static void fast_copy_page(void *to, void *from) { int i; - - + unsigned long cr0; + kernel_fpu_begin(); __asm__ __volatile__ ( - "1: prefetch (%0)\n" - " prefetch 64(%0)\n" - " prefetch 128(%0)\n" - " prefetch 192(%0)\n" - " prefetch 256(