diff --git a/.gitignore b/.gitignore index 9849721d2a..2e16569097 100644 --- a/.gitignore +++ b/.gitignore @@ -18,7 +18,6 @@ *.pch *.pdb *.rej -*.s *.sav *.swp *.yarb diff --git a/common.mk b/common.mk index 96b4fb9739..7a00667b32 100644 --- a/common.mk +++ b/common.mk @@ -895,6 +895,7 @@ strstr.$(OBJEXT): {$(VPATH)}strstr.c nt.$(OBJEXT): {$(VPATH)}nt.c ia64.$(OBJEXT): {$(VPATH)}ia64.s $(CC) $(CFLAGS) -c $< +coroutine/amd64/Context.$(OBJECT): {$(VPATH)}coroutine/amd64/Context.s ### diff --git a/configure.ac b/configure.ac index f4be92039d..03d3cf4aea 100644 --- a/configure.ac +++ b/configure.ac @@ -2274,6 +2274,12 @@ AS_IF([test x"$target_cpu" = xia64], [ ]) ]) +AS_CASE(["$target_cpu-$target_os"], + [*-darwin*], [ + AC_LIBOBJ([coroutine/amd64/Context]) + ], +) + AC_CACHE_CHECK(whether right shift preserve sign bit, rb_cv_rshift_sign, [AC_COMPILE_IFELSE([AC_LANG_BOOL_COMPILE_TRY([], [(-1==(-1>>1))])], rb_cv_rshift_sign=yes, diff --git a/cont.c b/cont.c index 6387118f4a..1d736f8c27 100644 --- a/cont.c +++ b/cont.c @@ -26,6 +26,27 @@ * in Proc. of 51th Programming Symposium, pp.21--28 (2010) (in Japanese). */ +/* + Enable this include to make fiber yield/resume about twice as fast. + + # Without libcoro + koyoko% ./build/bin/ruby ./fiber_benchmark.rb 10000 1000 + setup time for 10000 fibers: 0.099961 + execution time for 1000 messages: 19.505909 + + # With libcoro + koyoko% ./build/bin/ruby ./fiber_benchmark.rb 10000 1000 + setup time for 10000 fibers: 0.099268 + execution time for 1000 messages: 8.491746 +*/ + +#define FIBER_USE_COROUTINE + +#ifdef FIBER_USE_COROUTINE +#include "coroutine/amd64/Context.h" +#define FIBER_USE_NATIVE 1 +#endif + #if !defined(FIBER_USE_NATIVE) # if defined(HAVE_GETCONTEXT) && defined(HAVE_SETCONTEXT) # if 0 @@ -139,7 +160,7 @@ enum fiber_status { #define FIBER_TERMINATED_P(fib) ((fib)->status == FIBER_TERMINATED) #define FIBER_RUNNABLE_P(fib) (FIBER_CREATED_P(fib) || FIBER_SUSPENDED_P(fib)) -#if FIBER_USE_NATIVE && !defined(_WIN32) +#if FIBER_USE_NATIVE && !defined(FIBER_USE_COROUTINE) && !defined(_WIN32) static inline int fiber_context_create(ucontext_t *context, void (*func)(), void *arg, void *ptr, size_t size) { @@ -181,7 +202,11 @@ struct rb_fiber_struct { unsigned int transferred : 1; #if FIBER_USE_NATIVE -#ifdef _WIN32 +#if defined(FIBER_USE_COROUTINE) + coroutine_context context; + void *ss_sp; + size_t ss_size; +#elif defined(_WIN32) void *fib_handle; #else ucontext_t context; @@ -382,8 +407,16 @@ cont_free(void *ptr) else { /* fiber */ const rb_fiber_t *fib = (rb_fiber_t*)cont; -#ifdef _WIN32 - if (!fiber_is_root_p(fib)) { +#if defined(FIBER_USE_COROUTINE) + coroutine_destroy(&fib->context); + if (fib->ss_sp != NULL) { + if (fiber_is_root_p(fib)) { + rb_bug("Illegal root fiber parameter"); + } + munmap((void*)fib->ss_sp, fib->ss_size); + } +#elif defined(_WIN32) + if (!fiber_is_root_p(fib)) { /* don't delete root fiber handle */ if (fib->fib_handle) { DeleteFiber(fib->fib_handle); @@ -799,9 +832,7 @@ fiber_entry(void *arg) } #else /* _WIN32 */ -NORETURN(static void fiber_entry(void *arg)); -static void -fiber_entry(void *arg) +COROUTINE fiber_entry(coroutine_context * from, coroutine_context * to) { rb_fiber_start(); } @@ -862,7 +893,17 @@ fiber_initialize_machine_stack_context(rb_fiber_t *fib, size_t size) { rb_execution_context_t *sec = &fib->cont.saved_ec; -#ifdef _WIN32 +#if defined(FIBER_USE_COROUTINE) + char *ptr; + STACK_GROW_DIR_DETECTION; + + ptr = fiber_machine_stack_alloc(size); + fib->ss_sp = ptr; + fib->ss_size = size; + coroutine_initialize(&fib->context, fiber_entry, ptr+size, size); + sec->machine.stack_start = (VALUE*)(ptr + STACK_DIR_UPPER(0, size)); + sec->machine.stack_maxsize = size - RB_PAGE_SIZE; +#elif defined(_WIN32) # if defined(_MSC_VER) && _MSC_VER <= 1200 # define CreateFiberEx(cs, stacksize, flags, entry, param) \ CreateFiber((stacksize), (entry), (param)) @@ -925,15 +966,15 @@ fiber_setcontext(rb_fiber_t *newfib, rb_fiber_t *oldfib) /* restore thread context */ fiber_restore_thread(th, newfib); -#ifndef _WIN32 + /* swap machine context */ +#if defined(FIBER_USE_COROUTINE) + coroutine_transfer(&oldfib->context, &newfib->context); +#elif defined(_WIN32) + SwitchToFiber(newfib->fib_handle); +#else if (!newfib->context.uc_stack.ss_sp && th->root_fiber != newfib) { rb_bug("non_root_fiber->context.uc_stac.ss_sp should not be NULL"); } -#endif - /* swap machine context */ -#ifdef _WIN32 - SwitchToFiber(newfib->fib_handle); -#else swapcontext(&oldfib->context, &newfib->context); #endif } @@ -1532,7 +1573,9 @@ root_fiber_alloc(rb_thread_t *th) fib->cont.self = fibval; #if FIBER_USE_NATIVE -#ifdef _WIN32 +#if defined(FIBER_USE_COROUTINE) + coroutine_initialize(&fib->context, NULL, NULL, 0); +#elif defined(_WIN32) /* setup fib_handle for root Fiber */ if (fib->fib_handle == 0) { if ((fib->fib_handle = ConvertThreadToFiber(0)) == 0) { @@ -1794,9 +1837,13 @@ rb_fiber_terminate(rb_fiber_t *fib, int need_interrupt) VM_ASSERT(FIBER_RESUMED_P(fib)); rb_fiber_close(fib); -#if FIBER_USE_NATIVE && !defined(_WIN32) +#if FIBER_USE_NATIVE +#if defined(FIBER_USE_COROUTINE) + coroutine_destroy(&fib->context); +#elif !defined(_WIN32) fib->context.uc_stack.ss_sp = NULL; #endif +#endif #ifdef MAX_MACHINE_STACK_CACHE /* Ruby must not switch to other thread until storing terminated_machine_stack */ terminated_machine_stack.ptr = fib->ss_sp; diff --git a/coroutine/amd64/Context.h b/coroutine/amd64/Context.h new file mode 100644 index 0000000000..fa5819e115 --- /dev/null +++ b/coroutine/amd64/Context.h @@ -0,0 +1,62 @@ +// +// amd64.h +// File file is part of the "Coroutine" project and released under the MIT License. +// +// Created by Samuel Williams on 10/5/2018. +// Copyright, 2018, by Samuel Williams. All rights reserved. +// + +#pragma once + +#include +#include + +#if __cplusplus +extern "C" { +#endif + +#define COROUTINE __attribute__((noreturn)) void + +const size_t COROUTINE_REGISTERS = 6; + +// The fiber context (stack pointer). +typedef struct +{ + void **stack_pointer; +} coroutine_context; + +// The initialization function. +typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self); + +inline void coroutine_initialize( + coroutine_context *context, + coroutine_start start, + void *stack_pointer, + size_t stack_size +) { + /* Force 16-byte alignment */ + context->stack_pointer = (void**)((uintptr_t)stack_pointer & ~0xF); + + if (!start) { + assert(!context->stack_pointer); + /* We are main coroutine for this thread */ + return; + } + + *--context->stack_pointer = NULL; + *--context->stack_pointer = (void*)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target); + +inline void coroutine_destroy(coroutine_context * context) +{ + context->stack_pointer = NULL; +} + +#if __cplusplus +} +#endif diff --git a/coroutine/amd64/Context.s b/coroutine/amd64/Context.s new file mode 100644 index 0000000000..980ba73202 --- /dev/null +++ b/coroutine/amd64/Context.s @@ -0,0 +1,44 @@ +## +## amd64.c +## File file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 10/5/2018. +## Copyright, 2018, by Samuel Williams. All rights reserved. +## + +.text + +.globl coroutine_transfer +coroutine_transfer: + +# For older linkers +.globl _coroutine_transfer +_coroutine_transfer: + + # Save caller state + pushq %rbp + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + # Save caller stack pointer + movq %rsp, (%rdi) + + # Restore callee stack pointer + movq (%rsi), %rsp + + # Restore callee stack + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx + popq %rbp + + # Put the first argument into the return value + # movq %rdi, %rax + + # We pop the return address and jump to it + ret diff --git a/coroutine/arm32/Context.h b/coroutine/arm32/Context.h new file mode 100644 index 0000000000..1c730ed458 --- /dev/null +++ b/coroutine/arm32/Context.h @@ -0,0 +1,59 @@ +// +// amd64.h +// File file is part of the "Coroutine" project and released under the MIT License. +// +// Created by Samuel Williams on 10/5/2018. +// Copyright, 2018, by Samuel Williams. All rights reserved. +// + +#pragma once + +#include +#include + +#if __cplusplus +extern "C" { +#endif + +#define COROUTINE void + +const size_t COROUTINE_REGISTERS = 9; + +// The fiber context (stack pointer). +typedef struct +{ + void **stack_pointer; +} coroutine_context; + +// The initialization function. +typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self); + +inline void coroutine_initialize( + coroutine_context *context, + coroutine_start start, + void *stack_pointer, + size_t stack_size +) { + context->stack_pointer = (void**)stack_pointer; + + if (!start) { + assert(!context->stack_pointer); + /* We are main coroutine for this thread */ + return; + } + + *--context->stack_pointer = (void*)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target); + +inline void coroutine_destroy(coroutine_context * context) +{ +} + +#if __cplusplus +} +#endif diff --git a/coroutine/arm32/Context.s b/coroutine/arm32/Context.s new file mode 100644 index 0000000000..91fc697c18 --- /dev/null +++ b/coroutine/arm32/Context.s @@ -0,0 +1,15 @@ +## +## arm.c +## File file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 10/5/2018. +## Copyright, 2018, by Samuel Williams. All rights reserved. +## + +.text + +.globl coroutine_transfer +coroutine_transfer: + stmia r1!, {r4-r11,sp,lr} + ldmia r0!, {r4-r11,sp,pc} + bx lr \ No newline at end of file diff --git a/coroutine/arm64/Context.h b/coroutine/arm64/Context.h new file mode 100644 index 0000000000..8dc010d982 --- /dev/null +++ b/coroutine/arm64/Context.h @@ -0,0 +1,60 @@ +// +// amd64.h +// File file is part of the "Coroutine" project and released under the MIT License. +// +// Created by Samuel Williams on 10/5/2018. +// Copyright, 2018, by Samuel Williams. All rights reserved. +// + +#pragma once + +#include +#include + +#if __cplusplus +extern "C" { +#endif + +#define COROUTINE void + +const size_t COROUTINE_REGISTERS = 0xb0 / 8; + +// The fiber context (stack pointer). +typedef struct +{ + void **stack_pointer; +} coroutine_context; + +// The initialization function. +typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self); + +inline void coroutine_initialize( + coroutine_context *context, + coroutine_start start, + void *stack_pointer, + size_t stack_size +) { + /* Force 16-byte alignment */ + context->stack_pointer = (void**)((uintptr_t)stack_pointer & ~0xF); + + if (!start) { + assert(!context->stack_pointer); + /* We are main coroutine for this thread */ + return; + } + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); + + context->stack_pointer[0xa0 / 8] = (void*)start; +} + +coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target); + +inline void coroutine_destroy(coroutine_context * context) +{ +} + +#if __cplusplus +} +#endif diff --git a/coroutine/arm64/Context.s b/coroutine/arm64/Context.s new file mode 100644 index 0000000000..f8d76e82c4 --- /dev/null +++ b/coroutine/arm64/Context.s @@ -0,0 +1,58 @@ +## +## arm64.s +## File file is part of the "Coroutine" project and released under the MIT License. +## +## Created by Samuel Williams on 10/5/2018. +## Copyright, 2018, by Samuel Williams. All rights reserved. +## + +.text +.align 2 +.global coroutine_transfer +coroutine_transfer: + # Make space on the stack for caller registers + sub sp, sp, 0xb0 + + # Save caller registers + stp d8, d9, [sp, 0x00] + stp d10, d11, [sp, 0x10] + stp d12, d13, [sp, 0x20] + stp d14, d15, [sp, 0x30] + stp x19, x20, [sp, 0x40] + stp x21, x22, [sp, 0x50] + stp x23, x24, [sp, 0x60] + stp x25, x26, [sp, 0x70] + stp x27, x28, [sp, 0x80] + stp x29, x30, [sp, 0x90] + + # Save return address + str x30, [sp, 0xa0] + + # Save stack pointer to x0 (first argument) + mov x2, sp + str x2, [x0, 0] + + # Load stack pointer from x1 (second argument) + ldr x3, [x1, 0] + mov sp, x3 + + # Restore caller registers + ldp d8, d9, [sp, 0x00] + ldp d10, d11, [sp, 0x10] + ldp d12, d13, [sp, 0x20] + ldp d14, d15, [sp, 0x30] + ldp x19, x20, [sp, 0x40] + ldp x21, x22, [sp, 0x50] + ldp x23, x24, [sp, 0x60] + ldp x25, x26, [sp, 0x70] + ldp x27, x28, [sp, 0x80] + ldp x29, x30, [sp, 0x90] + + # Load return address into x4 + ldr x4, [sp, 0xa0] + + # Pop stack frame + add sp, sp, 0xb0 + + # Jump to return address (in x4) + ret x4 diff --git a/coroutine/win32/Context.asm b/coroutine/win32/Context.asm new file mode 100644 index 0000000000..aa27099cfe --- /dev/null +++ b/coroutine/win32/Context.asm @@ -0,0 +1,43 @@ +;; +;; win32.asm +;; File file is part of the "Coroutine" project and released under the MIT License. +;; +;; Created by Samuel Williams on 10/5/2018. +;; Copyright, 2018, by Samuel Williams. All rights reserved. +;; + +.model flat + +.code + +; Using fastcall is a big win (and it's the same has how x64 works). +; In coroutine transfer, the arguments are passed in ecx and edx. We don't need +; to touch these in order to pass them to the destination coroutine. + +@coroutine_transfer@8 proc + ; Save caller registers + push ebp + push ebx + push edi + push esi + + ; Save caller stack pointer + mov dword ptr [ecx], esp + + ; Restore callee stack pointer + mov esp, dword ptr [edx] + + ; Restore callee stack + pop esi + pop edi + pop ebx + pop ebp + + ; Save the first argument as the return value + mov eax, dword ptr ecx + + ; Jump to the address on the stack + ret +@coroutine_transfer@8 endp + +end diff --git a/coroutine/win32/Context.h b/coroutine/win32/Context.h new file mode 100644 index 0000000000..e5283cd4b0 --- /dev/null +++ b/coroutine/win32/Context.h @@ -0,0 +1,64 @@ +// +// win32.h +// File file is part of the "Coroutine" project and released under the MIT License. +// +// Created by Samuel Williams on 10/5/2018. +// Copyright, 2018, by Samuel Williams. All rights reserved. +// + +#pragma once + +#include +#include + +#if __cplusplus +extern "C" { +#endif + +#define COROUTINE void __fastcall + +const size_t COROUTINE_REGISTERS = 4; + +// The fiber context (stack pointer). +struct coroutine_context +{ + void **stack_pointer; +}; + +// The initialization function. +typedef void(__fastcall * coroutine_start)(coroutine_context *from, coroutine_context *self); + +inline void coroutine_initialize( + coroutine_context *context, + coroutine_start start, + void *stack_pointer, + size_t stack_size +) { + context->stack_pointer = (void**)stack_pointer; + + if (!start) { + assert(!context->stack_pointer); + /* We are main coroutine for this thread */ + return; + } + + /* Windows Thread Information Block */ + *--context->stack_pointer = 0; + *--context->stack_pointer = stack_pointer; + *--context->stack_pointer = (void*)stack_size; + + *--context->stack_pointer = (void*)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +coroutine_context * __fastcall coroutine_transfer(coroutine_context * current, coroutine_context * target); + +inline void coroutine_destroy(coroutine_context * context) +{ +} + +#if __cplusplus +} +#endif \ No newline at end of file diff --git a/coroutine/win64/Context.asm b/coroutine/win64/Context.asm new file mode 100644 index 0000000000..f57d3fd387 --- /dev/null +++ b/coroutine/win64/Context.asm @@ -0,0 +1,44 @@ +;; +;; win64.asm +;; File file is part of the "Coroutine" project and released under the MIT License. +;; +;; Created by Samuel Williams on 10/5/2018. +;; Copyright, 2018, by Samuel Williams. All rights reserved. +;; + +.code + +coroutine_transfer proc + push rbp + push rbx + push rdi + push rsi + push r12 + push r13 + push r14 + push r15 + + ; Save caller stack pointer + mov [rcx], rsp + + ; Restore callee stack pointer + mov rsp, [rdx] + + ; Restore callee stack + pop r15 + pop r14 + pop r13 + pop r12 + pop rsi + pop rdi + pop rbx + pop rbp + + ; Put the first argument into the return value + mov rax, rcx + + ; We pop the return address and jump to it + ret +coroutine_transfer endp + +end diff --git a/coroutine/win64/Context.h b/coroutine/win64/Context.h new file mode 100644 index 0000000000..53d9d43ae6 --- /dev/null +++ b/coroutine/win64/Context.h @@ -0,0 +1,64 @@ +// +// win64.h +// File file is part of the "Coroutine" project and released under the MIT License. +// +// Created by Samuel Williams on 10/5/2018. +// Copyright, 2018, by Samuel Williams. All rights reserved. +// + +#pragma once + +#include +#include + +#if __cplusplus +extern "C" { +#endif + +#define COROUTINE void + +const size_t COROUTINE_REGISTERS = 8; + +// The fiber context (stack pointer). +struct coroutine_context +{ + void **stack_pointer; +}; + +// The initialization function. +typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self); + +inline void coroutine_initialize( + coroutine_context *context, + coroutine_start start, + void *stack_pointer, + size_t stack_size +) { + context->stack_pointer = (void**)stack_pointer; + + if (!start) { + assert(!context->stack_pointer); + /* We are main coroutine for this thread */ + return; + } + + /* Windows Thread Information Block */ + *--context->stack_pointer = 0; + *--context->stack_pointer = stack_pointer; + *--context->stack_pointer = (void*)stack_size; + + *--context->stack_pointer = (void*)start; + + context->stack_pointer -= COROUTINE_REGISTERS; + memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS); +} + +coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target); + +inline void coroutine_destroy(coroutine_context * context) +{ +} + +#if __cplusplus +} +#endif \ No newline at end of file diff --git a/fiber_benchmark.rb b/fiber_benchmark.rb new file mode 100755 index 0000000000..030c1666c2 --- /dev/null +++ b/fiber_benchmark.rb @@ -0,0 +1,103 @@ +#!/usr/bin/env ruby + +require 'fiber' +require 'benchmark' + +class Ring + attr_reader :id + attr_accessor :attach + + def initialize(id) + @id = id + #puts "Creating ring ... #{id}" + @fiber = Fiber.new do + pass_message + end + end + + def |(other) + other.attach = self if !other.nil? + #puts "attaching #{@id} to #{other.id}" if !other.nil? + other + end + + def resume + @fiber.resume + end + + def pass_message + #puts "I'm fiber #{@id}" + while message = message_in + #puts "... #{@id} I received message #{message}" + # do something with message + message_out(message) + end + end + + def message_in + #puts "Resuming #{@attach.id}" if !@attach.nil? + @attach.resume if !@attach.nil? + end + + def message_out(message) + Fiber.yield(message) + end + +end + +class RingStart < Ring + attr_accessor :message + def initialize(n, m, message) + @m = m + @message = message + super(n) + end + + def pass_message + loop { message_out(@message) } + end + +end + + +def create_chain_r(i, chain) + # recursive version + return chain if i<=0 + r = chain.nil? ? Ring.new(i) : chain | Ring.new(i) + create_chain(i-1, r) +end + +def create_chain(n, chain) + # loop version + # needed to avoid stack overflow for high n + n.downto(0) { + chain = chain | Ring.new(n) + } + chain +end + +def run_benchmark(n, m) + mess = :hello + ringu = nil + chain = nil + + tm = Benchmark.measure { + ringu = RingStart.new(0, m, mess) + chain = create_chain(n, ringu) + }.format("%10.6r\n").gsub!(/\(|\)/, "") + + puts "setup time for #{n} fibers: #{tm}" + + tm = Benchmark.measure { + m.times { ringu.message = chain.resume } + }.format("%10.6r\n").gsub!(/\(|\)/, "") + + puts "execution time for #{m} messages: #{tm}" +end + +n = (ARGV[0] || 1000).to_i +m = (ARGV[1] || 10000).to_i + +5.times do + run_benchmark(n, m) +end