1
0
Fork 0
mirror of https://github.com/ruby/ruby.git synced 2022-11-09 12:17:21 -05:00

Native implementation of coroutines to improve performance of fibers

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65834 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
samuel 2018-11-20 09:59:10 +00:00
parent 27665e5134
commit 07a324a0f6
15 changed files with 686 additions and 17 deletions

1
.gitignore vendored
View file

@ -18,7 +18,6 @@
*.pch
*.pdb
*.rej
*.s
*.sav
*.swp
*.yarb

View file

@ -895,6 +895,7 @@ strstr.$(OBJEXT): {$(VPATH)}strstr.c
nt.$(OBJEXT): {$(VPATH)}nt.c
ia64.$(OBJEXT): {$(VPATH)}ia64.s
$(CC) $(CFLAGS) -c $<
coroutine/amd64/Context.$(OBJECT): {$(VPATH)}coroutine/amd64/Context.s
###

View file

@ -2274,6 +2274,12 @@ AS_IF([test x"$target_cpu" = xia64], [
])
])
AS_CASE(["$target_cpu-$target_os"],
[*-darwin*], [
AC_LIBOBJ([coroutine/amd64/Context])
],
)
AC_CACHE_CHECK(whether right shift preserve sign bit, rb_cv_rshift_sign,
[AC_COMPILE_IFELSE([AC_LANG_BOOL_COMPILE_TRY([], [(-1==(-1>>1))])],
rb_cv_rshift_sign=yes,

79
cont.c
View file

@ -26,6 +26,27 @@
* in Proc. of 51th Programming Symposium, pp.21--28 (2010) (in Japanese).
*/
/*
Enable this include to make fiber yield/resume about twice as fast.
# Without libcoro
koyoko% ./build/bin/ruby ./fiber_benchmark.rb 10000 1000
setup time for 10000 fibers: 0.099961
execution time for 1000 messages: 19.505909
# With libcoro
koyoko% ./build/bin/ruby ./fiber_benchmark.rb 10000 1000
setup time for 10000 fibers: 0.099268
execution time for 1000 messages: 8.491746
*/
#define FIBER_USE_COROUTINE
#ifdef FIBER_USE_COROUTINE
#include "coroutine/amd64/Context.h"
#define FIBER_USE_NATIVE 1
#endif
#if !defined(FIBER_USE_NATIVE)
# if defined(HAVE_GETCONTEXT) && defined(HAVE_SETCONTEXT)
# if 0
@ -139,7 +160,7 @@ enum fiber_status {
#define FIBER_TERMINATED_P(fib) ((fib)->status == FIBER_TERMINATED)
#define FIBER_RUNNABLE_P(fib) (FIBER_CREATED_P(fib) || FIBER_SUSPENDED_P(fib))
#if FIBER_USE_NATIVE && !defined(_WIN32)
#if FIBER_USE_NATIVE && !defined(FIBER_USE_COROUTINE) && !defined(_WIN32)
static inline int
fiber_context_create(ucontext_t *context, void (*func)(), void *arg, void *ptr, size_t size)
{
@ -181,7 +202,11 @@ struct rb_fiber_struct {
unsigned int transferred : 1;
#if FIBER_USE_NATIVE
#ifdef _WIN32
#if defined(FIBER_USE_COROUTINE)
coroutine_context context;
void *ss_sp;
size_t ss_size;
#elif defined(_WIN32)
void *fib_handle;
#else
ucontext_t context;
@ -382,8 +407,16 @@ cont_free(void *ptr)
else {
/* fiber */
const rb_fiber_t *fib = (rb_fiber_t*)cont;
#ifdef _WIN32
if (!fiber_is_root_p(fib)) {
#if defined(FIBER_USE_COROUTINE)
coroutine_destroy(&fib->context);
if (fib->ss_sp != NULL) {
if (fiber_is_root_p(fib)) {
rb_bug("Illegal root fiber parameter");
}
munmap((void*)fib->ss_sp, fib->ss_size);
}
#elif defined(_WIN32)
if (!fiber_is_root_p(fib)) {
/* don't delete root fiber handle */
if (fib->fib_handle) {
DeleteFiber(fib->fib_handle);
@ -799,9 +832,7 @@ fiber_entry(void *arg)
}
#else /* _WIN32 */
NORETURN(static void fiber_entry(void *arg));
static void
fiber_entry(void *arg)
COROUTINE fiber_entry(coroutine_context * from, coroutine_context * to)
{
rb_fiber_start();
}
@ -862,7 +893,17 @@ fiber_initialize_machine_stack_context(rb_fiber_t *fib, size_t size)
{
rb_execution_context_t *sec = &fib->cont.saved_ec;
#ifdef _WIN32
#if defined(FIBER_USE_COROUTINE)
char *ptr;
STACK_GROW_DIR_DETECTION;
ptr = fiber_machine_stack_alloc(size);
fib->ss_sp = ptr;
fib->ss_size = size;
coroutine_initialize(&fib->context, fiber_entry, ptr+size, size);
sec->machine.stack_start = (VALUE*)(ptr + STACK_DIR_UPPER(0, size));
sec->machine.stack_maxsize = size - RB_PAGE_SIZE;
#elif defined(_WIN32)
# if defined(_MSC_VER) && _MSC_VER <= 1200
# define CreateFiberEx(cs, stacksize, flags, entry, param) \
CreateFiber((stacksize), (entry), (param))
@ -925,15 +966,15 @@ fiber_setcontext(rb_fiber_t *newfib, rb_fiber_t *oldfib)
/* restore thread context */
fiber_restore_thread(th, newfib);
#ifndef _WIN32
/* swap machine context */
#if defined(FIBER_USE_COROUTINE)
coroutine_transfer(&oldfib->context, &newfib->context);
#elif defined(_WIN32)
SwitchToFiber(newfib->fib_handle);
#else
if (!newfib->context.uc_stack.ss_sp && th->root_fiber != newfib) {
rb_bug("non_root_fiber->context.uc_stac.ss_sp should not be NULL");
}
#endif
/* swap machine context */
#ifdef _WIN32
SwitchToFiber(newfib->fib_handle);
#else
swapcontext(&oldfib->context, &newfib->context);
#endif
}
@ -1532,7 +1573,9 @@ root_fiber_alloc(rb_thread_t *th)
fib->cont.self = fibval;
#if FIBER_USE_NATIVE
#ifdef _WIN32
#if defined(FIBER_USE_COROUTINE)
coroutine_initialize(&fib->context, NULL, NULL, 0);
#elif defined(_WIN32)
/* setup fib_handle for root Fiber */
if (fib->fib_handle == 0) {
if ((fib->fib_handle = ConvertThreadToFiber(0)) == 0) {
@ -1794,9 +1837,13 @@ rb_fiber_terminate(rb_fiber_t *fib, int need_interrupt)
VM_ASSERT(FIBER_RESUMED_P(fib));
rb_fiber_close(fib);
#if FIBER_USE_NATIVE && !defined(_WIN32)
#if FIBER_USE_NATIVE
#if defined(FIBER_USE_COROUTINE)
coroutine_destroy(&fib->context);
#elif !defined(_WIN32)
fib->context.uc_stack.ss_sp = NULL;
#endif
#endif
#ifdef MAX_MACHINE_STACK_CACHE
/* Ruby must not switch to other thread until storing terminated_machine_stack */
terminated_machine_stack.ptr = fib->ss_sp;

62
coroutine/amd64/Context.h Normal file
View file

@ -0,0 +1,62 @@
//
// amd64.h
// File file is part of the "Coroutine" project and released under the MIT License.
//
// Created by Samuel Williams on 10/5/2018.
// Copyright, 2018, by Samuel Williams. All rights reserved.
//
#pragma once
#include <assert.h>
#include <string.h>
#if __cplusplus
extern "C" {
#endif
#define COROUTINE __attribute__((noreturn)) void
const size_t COROUTINE_REGISTERS = 6;
// The fiber context (stack pointer).
typedef struct
{
void **stack_pointer;
} coroutine_context;
// The initialization function.
typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self);
inline void coroutine_initialize(
coroutine_context *context,
coroutine_start start,
void *stack_pointer,
size_t stack_size
) {
/* Force 16-byte alignment */
context->stack_pointer = (void**)((uintptr_t)stack_pointer & ~0xF);
if (!start) {
assert(!context->stack_pointer);
/* We are main coroutine for this thread */
return;
}
*--context->stack_pointer = NULL;
*--context->stack_pointer = (void*)start;
context->stack_pointer -= COROUTINE_REGISTERS;
memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS);
}
coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target);
inline void coroutine_destroy(coroutine_context * context)
{
context->stack_pointer = NULL;
}
#if __cplusplus
}
#endif

44
coroutine/amd64/Context.s Normal file
View file

@ -0,0 +1,44 @@
##
## amd64.c
## File file is part of the "Coroutine" project and released under the MIT License.
##
## Created by Samuel Williams on 10/5/2018.
## Copyright, 2018, by Samuel Williams. All rights reserved.
##
.text
.globl coroutine_transfer
coroutine_transfer:
# For older linkers
.globl _coroutine_transfer
_coroutine_transfer:
# Save caller state
pushq %rbp
pushq %rbx
pushq %r12
pushq %r13
pushq %r14
pushq %r15
# Save caller stack pointer
movq %rsp, (%rdi)
# Restore callee stack pointer
movq (%rsi), %rsp
# Restore callee stack
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbx
popq %rbp
# Put the first argument into the return value
# movq %rdi, %rax
# We pop the return address and jump to it
ret

59
coroutine/arm32/Context.h Normal file
View file

@ -0,0 +1,59 @@
//
// amd64.h
// File file is part of the "Coroutine" project and released under the MIT License.
//
// Created by Samuel Williams on 10/5/2018.
// Copyright, 2018, by Samuel Williams. All rights reserved.
//
#pragma once
#include <assert.h>
#include <string.h>
#if __cplusplus
extern "C" {
#endif
#define COROUTINE void
const size_t COROUTINE_REGISTERS = 9;
// The fiber context (stack pointer).
typedef struct
{
void **stack_pointer;
} coroutine_context;
// The initialization function.
typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self);
inline void coroutine_initialize(
coroutine_context *context,
coroutine_start start,
void *stack_pointer,
size_t stack_size
) {
context->stack_pointer = (void**)stack_pointer;
if (!start) {
assert(!context->stack_pointer);
/* We are main coroutine for this thread */
return;
}
*--context->stack_pointer = (void*)start;
context->stack_pointer -= COROUTINE_REGISTERS;
memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS);
}
coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target);
inline void coroutine_destroy(coroutine_context * context)
{
}
#if __cplusplus
}
#endif

15
coroutine/arm32/Context.s Normal file
View file

@ -0,0 +1,15 @@
##
## arm.c
## File file is part of the "Coroutine" project and released under the MIT License.
##
## Created by Samuel Williams on 10/5/2018.
## Copyright, 2018, by Samuel Williams. All rights reserved.
##
.text
.globl coroutine_transfer
coroutine_transfer:
stmia r1!, {r4-r11,sp,lr}
ldmia r0!, {r4-r11,sp,pc}
bx lr

60
coroutine/arm64/Context.h Normal file
View file

@ -0,0 +1,60 @@
//
// amd64.h
// File file is part of the "Coroutine" project and released under the MIT License.
//
// Created by Samuel Williams on 10/5/2018.
// Copyright, 2018, by Samuel Williams. All rights reserved.
//
#pragma once
#include <assert.h>
#include <string.h>
#if __cplusplus
extern "C" {
#endif
#define COROUTINE void
const size_t COROUTINE_REGISTERS = 0xb0 / 8;
// The fiber context (stack pointer).
typedef struct
{
void **stack_pointer;
} coroutine_context;
// The initialization function.
typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self);
inline void coroutine_initialize(
coroutine_context *context,
coroutine_start start,
void *stack_pointer,
size_t stack_size
) {
/* Force 16-byte alignment */
context->stack_pointer = (void**)((uintptr_t)stack_pointer & ~0xF);
if (!start) {
assert(!context->stack_pointer);
/* We are main coroutine for this thread */
return;
}
context->stack_pointer -= COROUTINE_REGISTERS;
memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS);
context->stack_pointer[0xa0 / 8] = (void*)start;
}
coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target);
inline void coroutine_destroy(coroutine_context * context)
{
}
#if __cplusplus
}
#endif

58
coroutine/arm64/Context.s Normal file
View file

@ -0,0 +1,58 @@
##
## arm64.s
## File file is part of the "Coroutine" project and released under the MIT License.
##
## Created by Samuel Williams on 10/5/2018.
## Copyright, 2018, by Samuel Williams. All rights reserved.
##
.text
.align 2
.global coroutine_transfer
coroutine_transfer:
# Make space on the stack for caller registers
sub sp, sp, 0xb0
# Save caller registers
stp d8, d9, [sp, 0x00]
stp d10, d11, [sp, 0x10]
stp d12, d13, [sp, 0x20]
stp d14, d15, [sp, 0x30]
stp x19, x20, [sp, 0x40]
stp x21, x22, [sp, 0x50]
stp x23, x24, [sp, 0x60]
stp x25, x26, [sp, 0x70]
stp x27, x28, [sp, 0x80]
stp x29, x30, [sp, 0x90]
# Save return address
str x30, [sp, 0xa0]
# Save stack pointer to x0 (first argument)
mov x2, sp
str x2, [x0, 0]
# Load stack pointer from x1 (second argument)
ldr x3, [x1, 0]
mov sp, x3
# Restore caller registers
ldp d8, d9, [sp, 0x00]
ldp d10, d11, [sp, 0x10]
ldp d12, d13, [sp, 0x20]
ldp d14, d15, [sp, 0x30]
ldp x19, x20, [sp, 0x40]
ldp x21, x22, [sp, 0x50]
ldp x23, x24, [sp, 0x60]
ldp x25, x26, [sp, 0x70]
ldp x27, x28, [sp, 0x80]
ldp x29, x30, [sp, 0x90]
# Load return address into x4
ldr x4, [sp, 0xa0]
# Pop stack frame
add sp, sp, 0xb0
# Jump to return address (in x4)
ret x4

View file

@ -0,0 +1,43 @@
;;
;; win32.asm
;; File file is part of the "Coroutine" project and released under the MIT License.
;;
;; Created by Samuel Williams on 10/5/2018.
;; Copyright, 2018, by Samuel Williams. All rights reserved.
;;
.model flat
.code
; Using fastcall is a big win (and it's the same has how x64 works).
; In coroutine transfer, the arguments are passed in ecx and edx. We don't need
; to touch these in order to pass them to the destination coroutine.
@coroutine_transfer@8 proc
; Save caller registers
push ebp
push ebx
push edi
push esi
; Save caller stack pointer
mov dword ptr [ecx], esp
; Restore callee stack pointer
mov esp, dword ptr [edx]
; Restore callee stack
pop esi
pop edi
pop ebx
pop ebp
; Save the first argument as the return value
mov eax, dword ptr ecx
; Jump to the address on the stack
ret
@coroutine_transfer@8 endp
end

64
coroutine/win32/Context.h Normal file
View file

@ -0,0 +1,64 @@
//
// win32.h
// File file is part of the "Coroutine" project and released under the MIT License.
//
// Created by Samuel Williams on 10/5/2018.
// Copyright, 2018, by Samuel Williams. All rights reserved.
//
#pragma once
#include <assert.h>
#include <string.h>
#if __cplusplus
extern "C" {
#endif
#define COROUTINE void __fastcall
const size_t COROUTINE_REGISTERS = 4;
// The fiber context (stack pointer).
struct coroutine_context
{
void **stack_pointer;
};
// The initialization function.
typedef void(__fastcall * coroutine_start)(coroutine_context *from, coroutine_context *self);
inline void coroutine_initialize(
coroutine_context *context,
coroutine_start start,
void *stack_pointer,
size_t stack_size
) {
context->stack_pointer = (void**)stack_pointer;
if (!start) {
assert(!context->stack_pointer);
/* We are main coroutine for this thread */
return;
}
/* Windows Thread Information Block */
*--context->stack_pointer = 0;
*--context->stack_pointer = stack_pointer;
*--context->stack_pointer = (void*)stack_size;
*--context->stack_pointer = (void*)start;
context->stack_pointer -= COROUTINE_REGISTERS;
memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS);
}
coroutine_context * __fastcall coroutine_transfer(coroutine_context * current, coroutine_context * target);
inline void coroutine_destroy(coroutine_context * context)
{
}
#if __cplusplus
}
#endif

View file

@ -0,0 +1,44 @@
;;
;; win64.asm
;; File file is part of the "Coroutine" project and released under the MIT License.
;;
;; Created by Samuel Williams on 10/5/2018.
;; Copyright, 2018, by Samuel Williams. All rights reserved.
;;
.code
coroutine_transfer proc
push rbp
push rbx
push rdi
push rsi
push r12
push r13
push r14
push r15
; Save caller stack pointer
mov [rcx], rsp
; Restore callee stack pointer
mov rsp, [rdx]
; Restore callee stack
pop r15
pop r14
pop r13
pop r12
pop rsi
pop rdi
pop rbx
pop rbp
; Put the first argument into the return value
mov rax, rcx
; We pop the return address and jump to it
ret
coroutine_transfer endp
end

64
coroutine/win64/Context.h Normal file
View file

@ -0,0 +1,64 @@
//
// win64.h
// File file is part of the "Coroutine" project and released under the MIT License.
//
// Created by Samuel Williams on 10/5/2018.
// Copyright, 2018, by Samuel Williams. All rights reserved.
//
#pragma once
#include <assert.h>
#include <string.h>
#if __cplusplus
extern "C" {
#endif
#define COROUTINE void
const size_t COROUTINE_REGISTERS = 8;
// The fiber context (stack pointer).
struct coroutine_context
{
void **stack_pointer;
};
// The initialization function.
typedef void(* coroutine_start)(coroutine_context *from, coroutine_context *self);
inline void coroutine_initialize(
coroutine_context *context,
coroutine_start start,
void *stack_pointer,
size_t stack_size
) {
context->stack_pointer = (void**)stack_pointer;
if (!start) {
assert(!context->stack_pointer);
/* We are main coroutine for this thread */
return;
}
/* Windows Thread Information Block */
*--context->stack_pointer = 0;
*--context->stack_pointer = stack_pointer;
*--context->stack_pointer = (void*)stack_size;
*--context->stack_pointer = (void*)start;
context->stack_pointer -= COROUTINE_REGISTERS;
memset(context->stack_pointer, 0, sizeof(void*) * COROUTINE_REGISTERS);
}
coroutine_context * coroutine_transfer(coroutine_context * current, coroutine_context * target);
inline void coroutine_destroy(coroutine_context * context)
{
}
#if __cplusplus
}
#endif

103
fiber_benchmark.rb Executable file
View file

@ -0,0 +1,103 @@
#!/usr/bin/env ruby
require 'fiber'
require 'benchmark'
class Ring
attr_reader :id
attr_accessor :attach
def initialize(id)
@id = id
#puts "Creating ring ... #{id}"
@fiber = Fiber.new do
pass_message
end
end
def |(other)
other.attach = self if !other.nil?
#puts "attaching #{@id} to #{other.id}" if !other.nil?
other
end
def resume
@fiber.resume
end
def pass_message
#puts "I'm fiber #{@id}"
while message = message_in
#puts "... #{@id} I received message #{message}"
# do something with message
message_out(message)
end
end
def message_in
#puts "Resuming #{@attach.id}" if !@attach.nil?
@attach.resume if !@attach.nil?
end
def message_out(message)
Fiber.yield(message)
end
end
class RingStart < Ring
attr_accessor :message
def initialize(n, m, message)
@m = m
@message = message
super(n)
end
def pass_message
loop { message_out(@message) }
end
end
def create_chain_r(i, chain)
# recursive version
return chain if i<=0
r = chain.nil? ? Ring.new(i) : chain | Ring.new(i)
create_chain(i-1, r)
end
def create_chain(n, chain)
# loop version
# needed to avoid stack overflow for high n
n.downto(0) {
chain = chain | Ring.new(n)
}
chain
end
def run_benchmark(n, m)
mess = :hello
ringu = nil
chain = nil
tm = Benchmark.measure {
ringu = RingStart.new(0, m, mess)
chain = create_chain(n, ringu)
}.format("%10.6r\n").gsub!(/\(|\)/, "")
puts "setup time for #{n} fibers: #{tm}"
tm = Benchmark.measure {
m.times { ringu.message = chain.resume }
}.format("%10.6r\n").gsub!(/\(|\)/, "")
puts "execution time for #{m} messages: #{tm}"
end
n = (ARGV[0] || 1000).to_i
m = (ARGV[1] || 10000).to_i
5.times do
run_benchmark(n, m)
end