From 0b5e1442f5425490f9c9d249683d05fe72b29ea9 Mon Sep 17 00:00:00 2001 From: samuel Date: Tue, 20 Nov 2018 10:17:44 +0000 Subject: [PATCH] Initial support for x64-mingw32 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65868 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- configure.ac | 14 +++++++++++++- coroutine/win32/Context.asm | 2 ++ coroutine/win32/Context.h | 10 +++++----- coroutine/win64/Context.asm | 37 +++++++++++++++++++++++++++++++------ coroutine/win64/Context.h | 24 +++++++++++++++++------- 5 files changed, 68 insertions(+), 19 deletions(-) diff --git a/configure.ac b/configure.ac index 4d74c7737e..4dec975960 100644 --- a/configure.ac +++ b/configure.ac @@ -2325,8 +2325,14 @@ AS_IF([test "${universal_binary-no}" = yes ], [ AC_DEFINE_UNQUOTED(STACK_GROW_DIRECTION, $dir) ]) -AC_MSG_CHECKING(native fiber implementation) +AC_MSG_CHECKING(native coroutine implementation for $target_cpu-$target_os) AS_CASE(["$target_cpu-$target_os"], + [x*64-darwin], [ + AC_MSG_RESULT(amd64) + COROUTINE_H=coroutine/amd64/Context.h + AC_DEFINE_UNQUOTED(FIBER_USE_COROUTINE, ["$COROUTINE_H"]) + AC_LIBOBJ([coroutine/amd64/Context]) + ], [x*64-linux], [ AC_MSG_RESULT(amd64) COROUTINE_H=coroutine/amd64/Context.h @@ -2339,6 +2345,12 @@ AS_CASE(["$target_cpu-$target_os"], AC_DEFINE_UNQUOTED(FIBER_USE_COROUTINE, ["$COROUTINE_H"]) AC_LIBOBJ([coroutine/x86/Context]) ] + [x64-mingw32], [ + AC_MSG_RESULT(x86) + COROUTINE_H=coroutine/win64/Context.h + AC_DEFINE_UNQUOTED(FIBER_USE_COROUTINE, ["$COROUTINE_H"]) + AC_LIBOBJ([coroutine/win64/Context]) + ] [*], [ AC_MSG_RESULT(no) ] diff --git a/coroutine/win32/Context.asm b/coroutine/win32/Context.asm index b024b22fc7..22b56c0568 100644 --- a/coroutine/win32/Context.asm +++ b/coroutine/win32/Context.asm @@ -9,6 +9,8 @@ .code +assume fs:nothing + ; Using fastcall is a big win (and it's the same has how x64 works). ; In coroutine transfer, the arguments are passed in ecx and edx. We don't need ; to touch these in order to pass them to the destination coroutine. diff --git a/coroutine/win32/Context.h b/coroutine/win32/Context.h index ba6b1fd07b..95b4ccdba1 100644 --- a/coroutine/win32/Context.h +++ b/coroutine/win32/Context.h @@ -24,7 +24,7 @@ struct coroutine_context void **stack_pointer; }; -typedef COROUTINE(* coroutine_start)(coroutine_context *from, coroutine_context *self); +typedef void(__fastcall * coroutine_start)(coroutine_context *from, coroutine_context *self); static inline void coroutine_initialize( coroutine_context *context, @@ -40,12 +40,12 @@ static inline void coroutine_initialize( return; } + *--context->stack_pointer = (void*)start; + /* Windows Thread Information Block */ *--context->stack_pointer = 0; /* fs:[0] */ - *--context->stack_pointer = stack_pointer + stack_size; /* fs:[4] */ - *--context->stack_pointer = (void*)stack_pointer; /* fs:[8] */ - - *--context->stack_pointer = (void*)start; + *--context->stack_pointer = (void*)stack_pointer; /* fs:[4] */ + *--context->stack_pointer = (void*)((char *)stack_pointer - stack_size); /* fs:[8] */ context->stack_pointer -= COROUTINE_REGISTERS; memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); diff --git a/coroutine/win64/Context.asm b/coroutine/win64/Context.asm index 4031c613fe..59673ffa3e 100644 --- a/coroutine/win64/Context.asm +++ b/coroutine/win64/Context.asm @@ -9,9 +9,8 @@ coroutine_transfer proc ; Save the thread information block: - push gs:[0x00] - push gs:[0x08] - push gs:[0x10] + push qword ptr gs:[8] + push qword ptr gs:[16] ; Save caller registers: push rbp @@ -23,12 +22,34 @@ coroutine_transfer proc push r14 push r15 + movaps [rsp - 24], xmm6 + movaps [rsp - 40], xmm7 + movaps [rsp - 56], xmm8 + movaps [rsp - 72], xmm9 + movaps [rsp - 88], xmm10 + movaps [rsp - 104], xmm11 + movaps [rsp - 120], xmm12 + movaps [rsp - 136], xmm13 + movaps [rsp - 152], xmm14 + movaps [rsp - 168], xmm15 + ; Save caller stack pointer: mov [rcx], rsp ; Restore callee stack pointer: mov rsp, [rdx] + movaps xmm15, [rsp - 168] + movaps xmm14, [rsp - 152] + movaps xmm13, [rsp - 136] + movaps xmm12, [rsp - 120] + movaps xmm11, [rsp - 104] + movaps xmm10, [rsp - 88] + movaps xmm9, [rsp - 72] + movaps xmm8, [rsp - 56] + movaps xmm7, [rsp - 40] + movaps xmm6, [rsp - 24] + ; Restore callee stack: pop r15 pop r14 @@ -40,9 +61,8 @@ coroutine_transfer proc pop rbp ; Restore the thread information block: - pop gs:[0x10] - pop gs:[0x08] - pop gs:[0x00] + pop qword ptr gs:[16] + pop qword ptr gs:[8] ; Put the first argument into the return value: mov rax, rcx @@ -51,4 +71,9 @@ coroutine_transfer proc ret coroutine_transfer endp +coroutine_trampoline proc + ; Do not remove this. This forces 16-byte alignment when entering the coroutine. + ret +coroutine_trampoline endp + end diff --git a/coroutine/win64/Context.h b/coroutine/win64/Context.h index 32d6e1038b..db003a2ee6 100644 --- a/coroutine/win64/Context.h +++ b/coroutine/win64/Context.h @@ -17,13 +17,16 @@ extern "C" { #define COROUTINE __declspec(noreturn) void const size_t COROUTINE_REGISTERS = 8; +const size_t COROUTINE_XMM_REGISTERS = 1+10*2; struct coroutine_context { void **stack_pointer; }; -typedef COROUTINE(* coroutine_start)(coroutine_context *from, coroutine_context *self); +typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self); + +void coroutine_trampoline(); static inline void coroutine_initialize( coroutine_context *context, @@ -31,7 +34,8 @@ static inline void coroutine_initialize( void *stack_pointer, size_t stack_size ) { - context->stack_pointer = (void**)stack_pointer; + /* Force 16-byte alignment */ + context->stack_pointer = (void**)((uintptr_t)stack_pointer & ~0xF); if (!start) { assert(!context->stack_pointer); @@ -39,16 +43,22 @@ static inline void coroutine_initialize( return; } - /* Windows Thread Information Block */ - *--context->stack_pointer = 0; /* gs:[0x00] */ - *--context->stack_pointer = stack_pointer + stack_size; /* gs:[0x08] */ - *--context->stack_pointer = (void*)stack_pointer; /* gs:[0x10] */ - + /* Win64 ABI requires space for arguments */ + context->stack_pointer -= 4; + /* Return address */ + *--context->stack_pointer = 0; *--context->stack_pointer = (void*)start; + *--context->stack_pointer = (void*)coroutine_trampoline; + + /* Windows Thread Information Block */ + /* *--context->stack_pointer = 0; */ /* gs:[0x00] is not used */ + *--context->stack_pointer = (void*)stack_pointer; /* gs:[0x08] */ + *--context->stack_pointer = (void*)((char *)stack_pointer - stack_size); /* gs:[0x10] */ context->stack_pointer -= COROUTINE_REGISTERS; memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + memset(context->stack_pointer - COROUTINE_XMM_REGISTERS, 0, sizeof(void*) * COROUTINE_XMM_REGISTERS); } coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target);