mirror of
https://gitlab.com/sortix/sortix.git
synced 2023-02-13 20:55:38 -05:00
Add paging no-execute and write protection support.
Enable the NX bit on x86_64 and set if not PROT_EXEC and enable the write protection mode (CR0.WP) that disables the default behavior where the kernel is able to write to read-only memory. Fix kernel broken assumptions it can access read-only memory and take care to never set PROT_KWRITE on user-space pages unless PROT_WRITE is also set, otherwise user-space will be able to write to read-only memory. This achieves X^W in the whole system except for the core kernel itself as it is currently don't know the purpose of pages when identity mapping the first 4 MiB.
This commit is contained in:
parent
c8bbd6e0aa
commit
2e3d7c45af
8 changed files with 80 additions and 41 deletions
|
@ -262,13 +262,14 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
|
||||||
pheader->p_vaddr % pheader->p_align !=
|
pheader->p_vaddr % pheader->p_align !=
|
||||||
pheader->p_offset % pheader->p_align )
|
pheader->p_offset % pheader->p_align )
|
||||||
return errno = EINVAL, 0;
|
return errno = EINVAL, 0;
|
||||||
int prot = PROT_FORK | PROT_KREAD | PROT_KWRITE;
|
int kprot = PROT_KWRITE | PROT_FORK;
|
||||||
|
int prot = PROT_FORK;
|
||||||
if ( pheader->p_flags & PF_X )
|
if ( pheader->p_flags & PF_X )
|
||||||
prot |= PROT_EXEC;
|
prot |= PROT_EXEC;
|
||||||
if ( pheader->p_flags & PF_R )
|
if ( pheader->p_flags & PF_R )
|
||||||
prot |= PROT_READ;
|
prot |= PROT_READ | PROT_KREAD;
|
||||||
if ( pheader->p_flags & PF_W )
|
if ( pheader->p_flags & PF_W )
|
||||||
prot |= PROT_WRITE;
|
prot |= PROT_WRITE | PROT_KWRITE;
|
||||||
|
|
||||||
if ( pheader->p_vaddr < userspace_addr )
|
if ( pheader->p_vaddr < userspace_addr )
|
||||||
return errno = EINVAL, 0;
|
return errno = EINVAL, 0;
|
||||||
|
@ -284,7 +285,7 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
|
||||||
struct segment segment;
|
struct segment segment;
|
||||||
segment.addr = map_start;
|
segment.addr = map_start;
|
||||||
segment.size = map_size;
|
segment.size = map_size;
|
||||||
segment.prot = prot;
|
segment.prot = kprot;
|
||||||
|
|
||||||
assert(IsUserspaceSegment(&segment));
|
assert(IsUserspaceSegment(&segment));
|
||||||
|
|
||||||
|
@ -298,7 +299,7 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
|
||||||
return errno = EINVAL, 0;
|
return errno = EINVAL, 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !Memory::MapRange(segment.addr, segment.size, prot, PAGE_USAGE_USER_SPACE) )
|
if ( !Memory::MapRange(segment.addr, segment.size, kprot, PAGE_USAGE_USER_SPACE) )
|
||||||
{
|
{
|
||||||
kthread_mutex_unlock(&process->segment_lock);
|
kthread_mutex_unlock(&process->segment_lock);
|
||||||
kthread_mutex_unlock(&process->segment_write_lock);
|
kthread_mutex_unlock(&process->segment_write_lock);
|
||||||
|
@ -313,11 +314,12 @@ uintptr_t Load(const void* file_ptr, size_t file_size, Auxiliary* aux)
|
||||||
return errno = EINVAL, 0;
|
return errno = EINVAL, 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
kthread_mutex_unlock(&process->segment_lock);
|
|
||||||
kthread_mutex_unlock(&process->segment_write_lock);
|
|
||||||
|
|
||||||
memset((void*) segment.addr, 0, segment.size);
|
memset((void*) segment.addr, 0, segment.size);
|
||||||
memcpy((void*) pheader->p_vaddr, file + pheader->p_offset, pheader->p_filesz);
|
memcpy((void*) pheader->p_vaddr, file + pheader->p_offset, pheader->p_filesz);
|
||||||
|
Memory::ProtectMemory(CurrentProcess(), segment.addr, segment.size, prot);
|
||||||
|
|
||||||
|
kthread_mutex_unlock(&process->segment_lock);
|
||||||
|
kthread_mutex_unlock(&process->segment_write_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -318,7 +318,7 @@ bool ExtractFromPhysicalInto(addr_t physaddr, size_t size, Ref<Descriptor> desc)
|
||||||
// Map the physical frames onto our address space.
|
// Map the physical frames onto our address space.
|
||||||
addr_t mapat = initrd_addr_alloc.from;
|
addr_t mapat = initrd_addr_alloc.from;
|
||||||
for ( size_t i = 0; i < size; i += Page::Size() )
|
for ( size_t i = 0; i < size; i += Page::Size() )
|
||||||
if ( !Memory::Map(physaddr + i, mapat + i, PROT_KREAD) )
|
if ( !Memory::Map(physaddr + i, mapat + i, PROT_KREAD | PROT_KWRITE) )
|
||||||
PanicF("Unable to map the init ramdisk into virtual memory");
|
PanicF("Unable to map the init ramdisk into virtual memory");
|
||||||
Memory::Flush();
|
Memory::Flush();
|
||||||
|
|
||||||
|
|
|
@ -148,7 +148,7 @@ bool ProtectMemory(Process* process, uintptr_t addr, size_t size, int prot)
|
||||||
// First split the segments overlapping with [addr, addr + size) into
|
// First split the segments overlapping with [addr, addr + size) into
|
||||||
// smaller segments that doesn't cross addr and addr+size, while verifying
|
// smaller segments that doesn't cross addr and addr+size, while verifying
|
||||||
// there are no gaps in that region. This is where the operation can fail as
|
// there are no gaps in that region. This is where the operation can fail as
|
||||||
// the AddSegtment call can run out of memory. There is no harm in splitting
|
// the AddSegment call can run out of memory. There is no harm in splitting
|
||||||
// the segments into smaller chunks.
|
// the segments into smaller chunks.
|
||||||
for ( size_t offset = 0; offset < size; )
|
for ( size_t offset = 0; offset < size; )
|
||||||
{
|
{
|
||||||
|
@ -353,7 +353,7 @@ void* sys_mmap(void* addr_ptr, size_t size, int prot, int flags, int fd,
|
||||||
new_segment.size = aligned_size;
|
new_segment.size = aligned_size;
|
||||||
else if ( !PlaceSegment(&new_segment, process, (void*) addr, aligned_size, flags) )
|
else if ( !PlaceSegment(&new_segment, process, (void*) addr, aligned_size, flags) )
|
||||||
return errno = ENOMEM, MAP_FAILED;
|
return errno = ENOMEM, MAP_FAILED;
|
||||||
new_segment.prot = prot | PROT_KREAD | PROT_KWRITE | PROT_FORK;
|
new_segment.prot = PROT_KWRITE | PROT_FORK;
|
||||||
|
|
||||||
// Allocate a memory segment with the desired properties.
|
// Allocate a memory segment with the desired properties.
|
||||||
if ( !Memory::MapMemory(process, new_segment.addr, new_segment.size, new_segment.prot) )
|
if ( !Memory::MapMemory(process, new_segment.addr, new_segment.size, new_segment.prot) )
|
||||||
|
@ -390,6 +390,16 @@ void* sys_mmap(void* addr_ptr, size_t size, int prot, int flags, int fd,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Finally switch to the desired page protections.
|
||||||
|
kthread_mutex_lock(&process->segment_lock);
|
||||||
|
if ( prot & PROT_READ )
|
||||||
|
prot |= PROT_KREAD;
|
||||||
|
if ( prot & PROT_WRITE )
|
||||||
|
prot |= PROT_KWRITE;
|
||||||
|
prot |= PROT_FORK;
|
||||||
|
Memory::ProtectMemory(CurrentProcess(), new_segment.addr, new_segment.size, prot);
|
||||||
|
kthread_mutex_unlock(&process->segment_lock);
|
||||||
|
|
||||||
lock1.Reset();
|
lock1.Reset();
|
||||||
|
|
||||||
return (void*) new_segment.addr;
|
return (void*) new_segment.addr;
|
||||||
|
|
|
@ -851,7 +851,8 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
||||||
size_t raw_tls_size_aligned = -(-raw_tls_size & ~(aux.tls_mem_align-1));
|
size_t raw_tls_size_aligned = -(-raw_tls_size & ~(aux.tls_mem_align-1));
|
||||||
if ( raw_tls_size && raw_tls_size_aligned == 0 /* overflow */ )
|
if ( raw_tls_size && raw_tls_size_aligned == 0 /* overflow */ )
|
||||||
return errno = EINVAL, -1;
|
return errno = EINVAL, -1;
|
||||||
int raw_tls_prot = PROT_READ | PROT_KREAD | PROT_KWRITE | PROT_FORK;
|
int raw_tls_kprot = PROT_KWRITE | PROT_FORK;
|
||||||
|
int raw_tls_prot = PROT_READ | PROT_KREAD | PROT_FORK;
|
||||||
void* raw_tls_hint = stack_hint;
|
void* raw_tls_hint = stack_hint;
|
||||||
|
|
||||||
size_t tls_size = raw_tls_size_aligned + aux.uthread_size;
|
size_t tls_size = raw_tls_size_aligned + aux.uthread_size;
|
||||||
|
@ -873,7 +874,8 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
||||||
void* tls_hint = stack_hint;
|
void* tls_hint = stack_hint;
|
||||||
|
|
||||||
size_t auxcode_size = Page::Size();
|
size_t auxcode_size = Page::Size();
|
||||||
int auxcode_prot = PROT_EXEC | PROT_READ | PROT_KREAD | PROT_KWRITE | PROT_FORK;
|
int auxcode_kprot = PROT_KWRITE | PROT_FORK;
|
||||||
|
int auxcode_prot = PROT_EXEC | PROT_READ | PROT_KREAD | PROT_FORK;
|
||||||
void* auxcode_hint = stack_hint;
|
void* auxcode_hint = stack_hint;
|
||||||
|
|
||||||
size_t arg_size = 0;
|
size_t arg_size = 0;
|
||||||
|
@ -900,9 +902,9 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
||||||
|
|
||||||
if ( !(MapSegment(&arg_segment, stack_hint, arg_size, 0, stack_prot) &&
|
if ( !(MapSegment(&arg_segment, stack_hint, arg_size, 0, stack_prot) &&
|
||||||
MapSegment(&stack_segment, stack_hint, stack_size, 0, stack_prot) &&
|
MapSegment(&stack_segment, stack_hint, stack_size, 0, stack_prot) &&
|
||||||
MapSegment(&raw_tls_segment, raw_tls_hint, raw_tls_size, 0, raw_tls_prot) &&
|
MapSegment(&raw_tls_segment, raw_tls_hint, raw_tls_size, 0, raw_tls_kprot) &&
|
||||||
MapSegment(&tls_segment, tls_hint, tls_size, 0, tls_prot) &&
|
MapSegment(&tls_segment, tls_hint, tls_size, 0, tls_prot) &&
|
||||||
MapSegment(&auxcode_segment, auxcode_hint, auxcode_size, 0, auxcode_prot)) )
|
MapSegment(&auxcode_segment, auxcode_hint, auxcode_size, 0, auxcode_kprot)) )
|
||||||
{
|
{
|
||||||
kthread_mutex_unlock(&segment_lock);
|
kthread_mutex_unlock(&segment_lock);
|
||||||
kthread_mutex_unlock(&segment_write_lock);
|
kthread_mutex_unlock(&segment_write_lock);
|
||||||
|
@ -910,9 +912,6 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
||||||
return errno = ENOMEM, -1;
|
return errno = ENOMEM, -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
kthread_mutex_unlock(&segment_lock);
|
|
||||||
kthread_mutex_unlock(&segment_write_lock);
|
|
||||||
|
|
||||||
char** target_argv = (char**) ((char*) arg_segment.addr + 0);
|
char** target_argv = (char**) ((char*) arg_segment.addr + 0);
|
||||||
char** target_envp = (char**) ((char*) arg_segment.addr + argv_size);
|
char** target_envp = (char**) ((char*) arg_segment.addr + argv_size);
|
||||||
char* target_strings = (char*) ((char*) arg_segment.addr + argv_size + envp_size);
|
char* target_strings = (char*) ((char*) arg_segment.addr + argv_size + envp_size);
|
||||||
|
@ -945,6 +944,7 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
||||||
uint8_t* target_raw_tls = (uint8_t*) raw_tls_segment.addr;
|
uint8_t* target_raw_tls = (uint8_t*) raw_tls_segment.addr;
|
||||||
memcpy(target_raw_tls, file_raw_tls, aux.tls_file_size);
|
memcpy(target_raw_tls, file_raw_tls, aux.tls_file_size);
|
||||||
memset(target_raw_tls + aux.tls_file_size, 0, aux.tls_mem_size - aux.tls_file_size);
|
memset(target_raw_tls + aux.tls_file_size, 0, aux.tls_mem_size - aux.tls_file_size);
|
||||||
|
Memory::ProtectMemory(this, raw_tls_segment.addr, raw_tls_segment.size, raw_tls_prot);
|
||||||
|
|
||||||
uint8_t* target_tls = (uint8_t*) (tls_segment.addr + tls_offset_tls);
|
uint8_t* target_tls = (uint8_t*) (tls_segment.addr + tls_offset_tls);
|
||||||
assert((((uintptr_t) target_tls) & (aux.tls_mem_align-1)) == 0);
|
assert((((uintptr_t) target_tls) & (aux.tls_mem_align-1)) == 0);
|
||||||
|
@ -1020,6 +1020,10 @@ int Process::Execute(const char* programname, const uint8_t* program,
|
||||||
(void) auxcode;
|
(void) auxcode;
|
||||||
#warning "You need to initialize auxcode with a sigreturn routine"
|
#warning "You need to initialize auxcode with a sigreturn routine"
|
||||||
#endif
|
#endif
|
||||||
|
Memory::ProtectMemory(this, auxcode_segment.addr, auxcode_segment.size, auxcode_prot);
|
||||||
|
|
||||||
|
kthread_mutex_unlock(&segment_lock);
|
||||||
|
kthread_mutex_unlock(&segment_write_lock);
|
||||||
|
|
||||||
dtable->OnExecute();
|
dtable->OnExecute();
|
||||||
|
|
||||||
|
|
|
@ -114,6 +114,8 @@ __start:
|
||||||
movl $(bootpml1_b + 0x003), bootpml2 + 1 * 8
|
movl $(bootpml1_b + 0x003), bootpml2 + 1 * 8
|
||||||
|
|
||||||
# Page Table (identity map the first 4 MiB, except NULL).
|
# Page Table (identity map the first 4 MiB, except NULL).
|
||||||
|
# TODO: This is insecure as it doesn't restrict write & execute access to
|
||||||
|
# the code kernel code & variables appropriately.
|
||||||
movl $(bootpml1_a + 8), %edi
|
movl $(bootpml1_a + 8), %edi
|
||||||
movl $0x1003, %esi
|
movl $0x1003, %esi
|
||||||
movl $1023, %ecx
|
movl $1023, %ecx
|
||||||
|
@ -152,15 +154,15 @@ __start:
|
||||||
orl $0x20, %eax
|
orl $0x20, %eax
|
||||||
movl %eax, %cr4
|
movl %eax, %cr4
|
||||||
|
|
||||||
# Enable long mode.
|
# Enable long mode and the No-Execute bit.
|
||||||
movl $0xC0000080, %ecx
|
movl $0xC0000080, %ecx
|
||||||
rdmsr
|
rdmsr
|
||||||
orl $0x100, %eax
|
orl $0x900, %eax
|
||||||
wrmsr
|
wrmsr
|
||||||
|
|
||||||
# Enable paging and enter long mode (still 32-bit)
|
# Enable paging (with write protection) and enter long mode (still 32-bit)
|
||||||
movl %cr0, %eax
|
movl %cr0, %eax
|
||||||
orl $0x80000000, %eax
|
orl $0x80010000, %eax
|
||||||
movl %eax, %cr0
|
movl %eax, %cr0
|
||||||
|
|
||||||
# Load the Global Descriptor Table pointer register.
|
# Load the Global Descriptor Table pointer register.
|
||||||
|
|
|
@ -414,26 +414,40 @@ namespace Memory {
|
||||||
|
|
||||||
addr_t ProtectionToPMLFlags(int prot)
|
addr_t ProtectionToPMLFlags(int prot)
|
||||||
{
|
{
|
||||||
addr_t result = 0;
|
addr_t result = PML_NX;
|
||||||
if ( prot & PROT_EXEC ) { result |= PML_USERSPACE; }
|
if ( prot & PROT_EXEC )
|
||||||
if ( prot & PROT_READ ) { result |= PML_USERSPACE; }
|
{
|
||||||
if ( prot & PROT_WRITE ) { result |= PML_USERSPACE | PML_WRITABLE; }
|
result |= PML_USERSPACE;
|
||||||
if ( prot & PROT_KEXEC ) { result |= 0; }
|
result &= ~PML_NX;
|
||||||
if ( prot & PROT_KREAD ) { result |= 0; }
|
}
|
||||||
if ( prot & PROT_KWRITE ) { result |= 0; }
|
if ( prot & PROT_READ )
|
||||||
if ( prot & PROT_FORK ) { result |= PML_FORK; }
|
result |= PML_USERSPACE;
|
||||||
|
if ( prot & PROT_WRITE )
|
||||||
|
result |= PML_USERSPACE | PML_WRITABLE;
|
||||||
|
if ( prot & PROT_KEXEC )
|
||||||
|
result &= ~PML_NX;
|
||||||
|
if ( prot & PROT_KREAD )
|
||||||
|
result |= 0;
|
||||||
|
if ( prot & PROT_KWRITE )
|
||||||
|
result |= PML_WRITABLE;
|
||||||
|
if ( prot & PROT_FORK )
|
||||||
|
result |= PML_FORK;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
int PMLFlagsToProtection(addr_t flags)
|
int PMLFlagsToProtection(addr_t flags)
|
||||||
{
|
{
|
||||||
int prot = PROT_KREAD | PROT_KWRITE | PROT_KEXEC;
|
int prot = PROT_KREAD;
|
||||||
bool user = flags & PML_USERSPACE;
|
if ( (flags & PML_USERSPACE) && !(flags & PML_NX) )
|
||||||
bool write = flags & PML_WRITABLE;
|
prot |= PROT_EXEC;
|
||||||
if ( user )
|
if ( (flags & PML_USERSPACE) )
|
||||||
prot |= PROT_EXEC | PROT_READ;
|
prot |= PROT_READ;
|
||||||
if ( user && write )
|
if ( (flags & PML_USERSPACE) && (flags & PML_WRITABLE) )
|
||||||
prot |= PROT_WRITE;
|
prot |= PROT_WRITE;
|
||||||
|
if ( !(flags & PML_NX) )
|
||||||
|
prot |= PROT_KEXEC;
|
||||||
|
if ( flags & PML_WRITABLE )
|
||||||
|
prot |= PROT_KWRITE;
|
||||||
if ( flags & PML_FORK )
|
if ( flags & PML_FORK )
|
||||||
prot |= PROT_FORK;
|
prot |= PROT_FORK;
|
||||||
return prot;
|
return prot;
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
|
|
||||||
Copyright(C) Jonas 'Sortie' Termansen 2011, 2012, 2014.
|
Copyright(C) Jonas 'Sortie' Termansen 2011, 2012, 2014, 2015.
|
||||||
|
|
||||||
This file is part of Sortix.
|
This file is part of Sortix.
|
||||||
|
|
||||||
|
@ -47,8 +47,13 @@ const addr_t PML_AVAILABLE1 = 1 << 9;
|
||||||
const addr_t PML_AVAILABLE2 = 1 << 10;
|
const addr_t PML_AVAILABLE2 = 1 << 10;
|
||||||
const addr_t PML_AVAILABLE3 = 1 << 11;
|
const addr_t PML_AVAILABLE3 = 1 << 11;
|
||||||
const addr_t PML_FORK = PML_AVAILABLE1;
|
const addr_t PML_FORK = PML_AVAILABLE1;
|
||||||
const addr_t PML_FLAGS = 0xFFFUL; // Bits used for the flags.
|
#ifdef __x86_64__
|
||||||
const addr_t PML_ADDRESS = ~0xFFFUL; // Bits used for the address.
|
const addr_t PML_NX = 1UL << 63;
|
||||||
|
#else
|
||||||
|
const addr_t PML_NX = 0;
|
||||||
|
#endif
|
||||||
|
const addr_t PML_FLAGS = 0xFFFUL | PML_NX; // Bits used for the flags.
|
||||||
|
const addr_t PML_ADDRESS = ~PML_FLAGS; // Bits used for the address.
|
||||||
const addr_t PAT_UC = 0x00; // Uncacheable
|
const addr_t PAT_UC = 0x00; // Uncacheable
|
||||||
const addr_t PAT_WC = 0x01; // Write-Combine
|
const addr_t PAT_WC = 0x01; // Write-Combine
|
||||||
const addr_t PAT_WT = 0x04; // Writethrough
|
const addr_t PAT_WT = 0x04; // Writethrough
|
||||||
|
|
|
@ -80,6 +80,8 @@ __start:
|
||||||
movl $(bootpml1 + 0x003), bootpml2 + 0 * 4
|
movl $(bootpml1 + 0x003), bootpml2 + 0 * 4
|
||||||
|
|
||||||
# Page Table (identity map the first 4 MiB, except NULL).
|
# Page Table (identity map the first 4 MiB, except NULL).
|
||||||
|
# TODO: This is insecure as it doesn't restrict write & execute access to
|
||||||
|
# the code kernel code & variables appropriately.
|
||||||
movl $(bootpml1 + 4), %edi
|
movl $(bootpml1 + 4), %edi
|
||||||
movl $0x1003, %esi
|
movl $0x1003, %esi
|
||||||
movl $1023, %ecx
|
movl $1023, %ecx
|
||||||
|
@ -103,9 +105,9 @@ __start:
|
||||||
movl $(physpml1 + 0x003), bootpml2 + 1021 * 4
|
movl $(physpml1 + 0x003), bootpml2 + 1021 * 4
|
||||||
movl $(physpml0 + 0x003), physpml1 + 0 * 4
|
movl $(physpml0 + 0x003), physpml1 + 0 * 4
|
||||||
|
|
||||||
# Enable paging.
|
# Enable paging (with write protection).
|
||||||
movl %cr0, %edi
|
movl %cr0, %edi
|
||||||
orl $0x80000000, %edi
|
orl $0x80010000, %edi
|
||||||
movl %edi, %cr0
|
movl %edi, %cr0
|
||||||
|
|
||||||
# Load the Global Descriptor Table pointer register.
|
# Load the Global Descriptor Table pointer register.
|
||||||
|
|
Loading…
Reference in a new issue