mirror of
https://gitlab.com/sortix/sortix.git
synced 2023-02-13 20:55:38 -05:00
Add paging no-execute and write protection support.
Enable the NX bit on x86_64 and set if not PROT_EXEC and enable the write protection mode (CR0.WP) that disables the default behavior where the kernel is able to write to read-only memory. Fix kernel broken assumptions it can access read-only memory and take care to never set PROT_KWRITE on user-space pages unless PROT_WRITE is also set, otherwise user-space will be able to write to read-only memory. This achieves X^W in the whole system except for the core kernel itself as it is currently don't know the purpose of pages when identity mapping the first 4 MiB.
This commit is contained in:
parent
c8bbd6e0aa
commit
2e3d7c45af
8 changed files with 80 additions and 41 deletions
|
@ -262,13 +262,14 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
|
|||
pheader->p_vaddr % pheader->p_align !=
|
||||
pheader->p_offset % pheader->p_align )
|
||||
return errno = EINVAL, 0;
|
||||
int prot = PROT_FORK | PROT_KREAD | PROT_KWRITE;
|
||||
int kprot = PROT_KWRITE | PROT_FORK;
|
||||
int prot = PROT_FORK;
|
||||
if ( pheader->p_flags & PF_X )
|
||||
prot |= PROT_EXEC;
|
||||
if ( pheader->p_flags & PF_R )
|
||||
prot |= PROT_READ;
|
||||
prot |= PROT_READ | PROT_KREAD;
|
||||
if ( pheader->p_flags & PF_W )
|
||||
prot |= PROT_WRITE;
|
||||
prot |= PROT_WRITE | PROT_KWRITE;
|
||||
|
||||
if ( pheader->p_vaddr < userspace_addr )
|
||||
return errno = EINVAL, 0;
|
||||
|
@ -284,7 +285,7 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
|
|||
struct segment segment;
|
||||
segment.addr = map_start;
|
||||
segment.size = map_size;
|
||||
segment.prot = prot;
|
||||
segment.prot = kprot;
|
||||
|
||||
assert(IsUserspaceSegment(&segment));
|
||||
|
||||
|
@ -298,7 +299,7 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
|
|||
return errno = EINVAL, 0;
|
||||
}
|
||||
|
||||
if ( !Memory::MapRange(segment.addr, segment.size, prot, PAGE_USAGE_USER_SPACE) )
|
||||
if ( !Memory::MapRange(segment.addr, segment.size, kprot, PAGE_USAGE_USER_SPACE) )
|
||||
{
|
||||
kthread_mutex_unlock(&process->segment_lock);
|
||||
kthread_mutex_unlock(&process->segment_write_lock);
|
||||
|
@ -313,11 +314,12 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
|
|||
return errno = EINVAL, 0;
|
||||
}
|
||||
|
||||
kthread_mutex_unlock(&process->segment_lock);
|
||||
kthread_mutex_unlock(&process->segment_write_lock);
|
||||
|
||||
memset((void*) segment.addr, 0, segment.size);
|
||||
memcpy((void*) pheader->p_vaddr, file + pheader->p_offset, pheader->p_filesz);
|
||||
Memory::ProtectMemory(CurrentProcess(), segment.addr, segment.size, prot);
|
||||
|
||||
kthread_mutex_unlock(&process->segment_lock);
|
||||
kthread_mutex_unlock(&process->segment_write_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -318,7 +318,7 @@ bool ExtractFromPhysicalInto(addr_t physaddr, size_t size, Ref<Descriptor> desc)
|
|||
// Map the physical frames onto our address space.
|
||||
addr_t mapat = initrd_addr_alloc.from;
|
||||
for ( size_t i = 0; i < size; i += Page::Size() )
|
||||
if ( !Memory::Map(physaddr + i, mapat + i, PROT_KREAD) )
|
||||
if ( !Memory::Map(physaddr + i, mapat + i, PROT_KREAD | PROT_KWRITE) )
|
||||
PanicF("Unable to map the init ramdisk into virtual memory");
|
||||
Memory::Flush();
|
||||
|
||||
|
|
|
@ -148,7 +148,7 @@ bool ProtectMemory(Process* process, uintptr_t addr, size_t size, int prot)
|
|||
// First split the segments overlapping with [addr, addr + size) into
|
||||
// smaller segments that doesn't cross addr and addr+size, while verifying
|
||||
// there are no gaps in that region. This is where the operation can fail as
|
||||
// the AddSegtment call can run out of memory. There is no harm in splitting
|
||||
// the AddSegment call can run out of memory. There is no harm in splitting
|
||||
// the segments into smaller chunks.
|
||||
for ( size_t offset = 0; offset < size; )
|
||||
{
|
||||
|
@ -353,7 +353,7 @@ void* sys_mmap(void* addr_ptr, size_t size, int prot, int flags, int fd,
|
|||
new_segment.size = aligned_size;
|
||||
else if ( !PlaceSegment(&new_segment, process, (void*) addr, aligned_size, flags) )
|
||||
return errno = ENOMEM, MAP_FAILED;
|
||||
new_segment.prot = prot | PROT_KREAD | PROT_KWRITE | PROT_FORK;
|
||||
new_segment.prot = PROT_KWRITE | PROT_FORK;
|
||||
|
||||
// Allocate a memory segment with the desired properties.
|
||||
if ( !Memory::MapMemory(process, new_segment.addr, new_segment.size, new_segment.prot) )
|
||||
|
@ -390,6 +390,16 @@ void* sys_mmap(void* addr_ptr, size_t size, int prot, int flags, int fd,
|
|||
}
|
||||
}
|
||||
|
||||
// Finally switch to the desired page protections.
|
||||
kthread_mutex_lock(&process->segment_lock);
|
||||
if ( prot & PROT_READ )
|
||||
prot |= PROT_KREAD;
|
||||
if ( prot & PROT_WRITE )
|
||||
prot |= PROT_KWRITE;
|
||||
prot |= PROT_FORK;
|
||||
Memory::ProtectMemory(CurrentProcess(), new_segment.addr, new_segment.size, prot);
|
||||
kthread_mutex_unlock(&process->segment_lock);
|
||||
|
||||
lock1.Reset();
|
||||
|
||||
return (void*) new_segment.addr;
|
||||
|
|
|
@ -851,7 +851,8 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
|||
size_t raw_tls_size_aligned = -(-raw_tls_size & ~(aux.tls_mem_align-1));
|
||||
if ( raw_tls_size && raw_tls_size_aligned == 0 /* overflow */ )
|
||||
return errno = EINVAL, -1;
|
||||
int raw_tls_prot = PROT_READ | PROT_KREAD | PROT_KWRITE | PROT_FORK;
|
||||
int raw_tls_kprot = PROT_KWRITE | PROT_FORK;
|
||||
int raw_tls_prot = PROT_READ | PROT_KREAD | PROT_FORK;
|
||||
void* raw_tls_hint = stack_hint;
|
||||
|
||||
size_t tls_size = raw_tls_size_aligned + aux.uthread_size;
|
||||
|
@ -873,7 +874,8 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
|||
void* tls_hint = stack_hint;
|
||||
|
||||
size_t auxcode_size = Page::Size();
|
||||
int auxcode_prot = PROT_EXEC | PROT_READ | PROT_KREAD | PROT_KWRITE | PROT_FORK;
|
||||
int auxcode_kprot = PROT_KWRITE | PROT_FORK;
|
||||
int auxcode_prot = PROT_EXEC | PROT_READ | PROT_KREAD | PROT_FORK;
|
||||
void* auxcode_hint = stack_hint;
|
||||
|
||||
size_t arg_size = 0;
|
||||
|
@ -900,9 +902,9 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
|||
|
||||
if ( !(MapSegment(&arg_segment, stack_hint, arg_size, 0, stack_prot) &&
|
||||
MapSegment(&stack_segment, stack_hint, stack_size, 0, stack_prot) &&
|
||||
MapSegment(&raw_tls_segment, raw_tls_hint, raw_tls_size, 0, raw_tls_prot) &&
|
||||
MapSegment(&raw_tls_segment, raw_tls_hint, raw_tls_size, 0, raw_tls_kprot) &&
|
||||
MapSegment(&tls_segment, tls_hint, tls_size, 0, tls_prot) &&
|
||||
MapSegment(&auxcode_segment, auxcode_hint, auxcode_size, 0, auxcode_prot)) )
|
||||
MapSegment(&auxcode_segment, auxcode_hint, auxcode_size, 0, auxcode_kprot)) )
|
||||
{
|
||||
kthread_mutex_unlock(&segment_lock);
|
||||
kthread_mutex_unlock(&segment_write_lock);
|
||||
|
@ -910,9 +912,6 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
|||
return errno = ENOMEM, -1;
|
||||
}
|
||||
|
||||
kthread_mutex_unlock(&segment_lock);
|
||||
kthread_mutex_unlock(&segment_write_lock);
|
||||
|
||||
char** target_argv = (char**) ((char*) arg_segment.addr + 0);
|
||||
char** target_envp = (char**) ((char*) arg_segment.addr + argv_size);
|
||||
char* target_strings = (char*) ((char*) arg_segment.addr + argv_size + envp_size);
|
||||
|
@ -945,6 +944,7 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
|||
uint8_t* target_raw_tls = (uint8_t*) raw_tls_segment.addr;
|
||||
memcpy(target_raw_tls, file_raw_tls, aux.tls_file_size);
|
||||
memset(target_raw_tls + aux.tls_file_size, 0, aux.tls_mem_size - aux.tls_file_size);
|
||||
Memory::ProtectMemory(this, raw_tls_segment.addr, raw_tls_segment.size, raw_tls_prot);
|
||||
|
||||
uint8_t* target_tls = (uint8_t*) (tls_segment.addr + tls_offset_tls);
|
||||
assert((((uintptr_t) target_tls) & (aux.tls_mem_align-1)) == 0);
|
||||
|
@ -1020,6 +1020,10 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
|||
(void) auxcode;
|
||||
#warning "You need to initialize auxcode with a sigreturn routine"
|
||||
#endif
|
||||
Memory::ProtectMemory(this, auxcode_segment.addr, auxcode_segment.size, auxcode_prot);
|
||||
|
||||
kthread_mutex_unlock(&segment_lock);
|
||||
kthread_mutex_unlock(&segment_write_lock);
|
||||
|
||||
dtable->OnExecute();
|
||||
|
||||
|
|
|
@ -114,6 +114,8 @@ __start:
|
|||
movl $(bootpml1_b + 0x003), bootpml2 + 1 * 8
|
||||
|
||||
# Page Table (identity map the first 4 MiB, except NULL).
|
||||
# TODO: This is insecure as it doesn't restrict write & execute access to
|
||||
# the code kernel code & variables appropriately.
|
||||
movl $(bootpml1_a + 8), %edi
|
||||
movl $0x1003, %esi
|
||||
movl $1023, %ecx
|
||||
|
@ -152,15 +154,15 @@ __start:
|
|||
orl $0x20, %eax
|
||||
movl %eax, %cr4
|
||||
|
||||
# Enable long mode.
|
||||
# Enable long mode and the No-Execute bit.
|
||||
movl $0xC0000080, %ecx
|
||||
rdmsr
|
||||
orl $0x100, %eax
|
||||
orl $0x900, %eax
|
||||
wrmsr
|
||||
|
||||
# Enable paging and enter long mode (still 32-bit)
|
||||
# Enable paging (with write protection) and enter long mode (still 32-bit)
|
||||
movl %cr0, %eax
|
||||
orl $0x80000000, %eax
|
||||
orl $0x80010000, %eax
|
||||
movl %eax, %cr0
|
||||
|
||||
# Load the Global Descriptor Table pointer register.
|
||||
|
|
|
@ -414,26 +414,40 @@ namespace Memory {
|
|||
|
||||
addr_t ProtectionToPMLFlags(int prot)
|
||||
{
|
||||
addr_t result = 0;
|
||||
if ( prot & PROT_EXEC ) { result |= PML_USERSPACE; }
|
||||
if ( prot & PROT_READ ) { result |= PML_USERSPACE; }
|
||||
if ( prot & PROT_WRITE ) { result |= PML_USERSPACE | PML_WRITABLE; }
|
||||
if ( prot & PROT_KEXEC ) { result |= 0; }
|
||||
if ( prot & PROT_KREAD ) { result |= 0; }
|
||||
if ( prot & PROT_KWRITE ) { result |= 0; }
|
||||
if ( prot & PROT_FORK ) { result |= PML_FORK; }
|
||||
addr_t result = PML_NX;
|
||||
if ( prot & PROT_EXEC )
|
||||
{
|
||||
result |= PML_USERSPACE;
|
||||
result &= ~PML_NX;
|
||||
}
|
||||
if ( prot & PROT_READ )
|
||||
result |= PML_USERSPACE;
|
||||
if ( prot & PROT_WRITE )
|
||||
result |= PML_USERSPACE | PML_WRITABLE;
|
||||
if ( prot & PROT_KEXEC )
|
||||
result &= ~PML_NX;
|
||||
if ( prot & PROT_KREAD )
|
||||
result |= 0;
|
||||
if ( prot & PROT_KWRITE )
|
||||
result |= PML_WRITABLE;
|
||||
if ( prot & PROT_FORK )
|
||||
result |= PML_FORK;
|
||||
return result;
|
||||
}
|
||||
|
||||
int PMLFlagsToProtection(addr_t flags)
|
||||
{
|
||||
int prot = PROT_KREAD | PROT_KWRITE | PROT_KEXEC;
|
||||
bool user = flags & PML_USERSPACE;
|
||||
bool write = flags & PML_WRITABLE;
|
||||
if ( user )
|
||||
prot |= PROT_EXEC | PROT_READ;
|
||||
if ( user && write )
|
||||
int prot = PROT_KREAD;
|
||||
if ( (flags & PML_USERSPACE) && !(flags & PML_NX) )
|
||||
prot |= PROT_EXEC;
|
||||
if ( (flags & PML_USERSPACE) )
|
||||
prot |= PROT_READ;
|
||||
if ( (flags & PML_USERSPACE) && (flags & PML_WRITABLE) )
|
||||
prot |= PROT_WRITE;
|
||||
if ( !(flags & PML_NX) )
|
||||
prot |= PROT_KEXEC;
|
||||
if ( flags & PML_WRITABLE )
|
||||
prot |= PROT_KWRITE;
|
||||
if ( flags & PML_FORK )
|
||||
prot |= PROT_FORK;
|
||||
return prot;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*******************************************************************************
|
||||
|
||||
Copyright(C) Jonas 'Sortie' Termansen 2011, 2012, 2014.
|
||||
Copyright(C) Jonas 'Sortie' Termansen 2011, 2012, 2014, 2015.
|
||||
|
||||
This file is part of Sortix.
|
||||
|
||||
|
@ -47,8 +47,13 @@ const addr_t PML_AVAILABLE1 = 1 << 9;
|
|||
const addr_t PML_AVAILABLE2 = 1 << 10;
|
||||
const addr_t PML_AVAILABLE3 = 1 << 11;
|
||||
const addr_t PML_FORK = PML_AVAILABLE1;
|
||||
const addr_t PML_FLAGS = 0xFFFUL; // Bits used for the flags.
|
||||
const addr_t PML_ADDRESS = ~0xFFFUL; // Bits used for the address.
|
||||
#ifdef __x86_64__
|
||||
const addr_t PML_NX = 1UL << 63;
|
||||
#else
|
||||
const addr_t PML_NX = 0;
|
||||
#endif
|
||||
const addr_t PML_FLAGS = 0xFFFUL | PML_NX; // Bits used for the flags.
|
||||
const addr_t PML_ADDRESS = ~PML_FLAGS; // Bits used for the address.
|
||||
const addr_t PAT_UC = 0x00; // Uncacheable
|
||||
const addr_t PAT_WC = 0x01; // Write-Combine
|
||||
const addr_t PAT_WT = 0x04; // Writethrough
|
||||
|
|
|
@ -80,6 +80,8 @@ __start:
|
|||
movl $(bootpml1 + 0x003), bootpml2 + 0 * 4
|
||||
|
||||
# Page Table (identity map the first 4 MiB, except NULL).
|
||||
# TODO: This is insecure as it doesn't restrict write & execute access to
|
||||
# the code kernel code & variables appropriately.
|
||||
movl $(bootpml1 + 4), %edi
|
||||
movl $0x1003, %esi
|
||||
movl $1023, %ecx
|
||||
|
@ -103,9 +105,9 @@ __start:
|
|||
movl $(physpml1 + 0x003), bootpml2 + 1021 * 4
|
||||
movl $(physpml0 + 0x003), physpml1 + 0 * 4
|
||||
|
||||
# Enable paging.
|
||||
# Enable paging (with write protection).
|
||||
movl %cr0, %edi
|
||||
orl $0x80000000, %edi
|
||||
orl $0x80010000, %edi
|
||||
movl %edi, %cr0
|
||||
|
||||
# Load the Global Descriptor Table pointer register.
|
||||
|
|
Loading…
Reference in a new issue