2020-05-04 15:52:56 +09:00
|
|
|
#ifndef RBIMPL_RSTRING_H /*-*-C++-*-vi:se ft=cpp:*/
|
|
|
|
#define RBIMPL_RSTRING_H
|
2020-04-10 14:11:40 +09:00
|
|
|
/**
|
2020-04-08 13:28:13 +09:00
|
|
|
* @file
|
|
|
|
* @author Ruby developers <ruby-core@ruby-lang.org>
|
|
|
|
* @copyright This file is a part of the programming language Ruby.
|
|
|
|
* Permission is hereby granted, to either redistribute and/or
|
|
|
|
* modify this file, provided that the conditions mentioned in the
|
|
|
|
* file COPYING are met. Consult the file for details.
|
2020-05-04 16:27:48 +09:00
|
|
|
* @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
|
2020-04-08 13:28:13 +09:00
|
|
|
* implementation details. Don't take them as canon. They could
|
|
|
|
* rapidly appear then vanish. The name (path) of this header file
|
|
|
|
* is also an implementation detail. Do not expect it to persist
|
|
|
|
* at the place it is now. Developers are free to move it anywhere
|
|
|
|
* anytime at will.
|
|
|
|
* @note To ruby-core: remember that this header can be possibly
|
|
|
|
* recursively included from extension libraries written in C++.
|
|
|
|
* Do not expect for instance `__VA_ARGS__` is always available.
|
|
|
|
* We assume C99 for ruby itself but we don't assume languages of
|
2021-01-14 15:00:54 +09:00
|
|
|
* extension libraries. They could be written in C++98.
|
2020-04-08 13:28:13 +09:00
|
|
|
* @brief Defines struct ::RString.
|
|
|
|
*/
|
2020-05-08 18:31:09 +09:00
|
|
|
#include "ruby/internal/config.h"
|
|
|
|
#include "ruby/internal/arithmetic/long.h"
|
|
|
|
#include "ruby/internal/attr/artificial.h"
|
|
|
|
#include "ruby/internal/attr/pure.h"
|
|
|
|
#include "ruby/internal/cast.h"
|
|
|
|
#include "ruby/internal/core/rbasic.h"
|
|
|
|
#include "ruby/internal/dllexport.h"
|
|
|
|
#include "ruby/internal/fl_type.h"
|
|
|
|
#include "ruby/internal/value_type.h"
|
|
|
|
#include "ruby/internal/warning_push.h"
|
2020-04-08 13:28:13 +09:00
|
|
|
#include "ruby/assert.h"
|
|
|
|
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* Convenient casting macro.
|
|
|
|
*
|
|
|
|
* @param obj An object, which is in fact an ::RString.
|
|
|
|
* @return The passed object casted to ::RString.
|
|
|
|
*/
|
2020-05-04 15:52:56 +09:00
|
|
|
#define RSTRING(obj) RBIMPL_CAST((struct RString *)(obj))
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/** @cond INTERNAL_MACRO */
|
2020-04-08 13:28:13 +09:00
|
|
|
#define RSTRING_NOEMBED RSTRING_NOEMBED
|
|
|
|
#define RSTRING_EMBED_LEN_MASK RSTRING_EMBED_LEN_MASK
|
|
|
|
#define RSTRING_EMBED_LEN_SHIFT RSTRING_EMBED_LEN_SHIFT
|
|
|
|
#define RSTRING_EMBED_LEN_MAX RSTRING_EMBED_LEN_MAX
|
|
|
|
#define RSTRING_FSTR RSTRING_FSTR
|
|
|
|
#define RSTRING_EMBED_LEN RSTRING_EMBED_LEN
|
|
|
|
#define RSTRING_LEN RSTRING_LEN
|
|
|
|
#define RSTRING_LENINT RSTRING_LENINT
|
|
|
|
#define RSTRING_PTR RSTRING_PTR
|
|
|
|
#define RSTRING_END RSTRING_END
|
|
|
|
/** @endcond */
|
|
|
|
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* @name Conversion of Ruby strings into C's
|
|
|
|
*
|
|
|
|
* @{
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Ensures that the parameter object is a String. This is done by calling its
|
|
|
|
* `to_str` method.
|
|
|
|
*
|
|
|
|
* @param[in,out] v Arbitrary Ruby object.
|
|
|
|
* @exception rb_eTypeError No implicit conversion defined.
|
|
|
|
* @post `v` is a String.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
#define StringValue(v) rb_string_value(&(v))
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Identical to #StringValue, except it returns a `char*`.
|
|
|
|
*
|
|
|
|
* @param[in,out] v Arbitrary Ruby object.
|
|
|
|
* @exception rb_eTypeError No implicit conversion defined.
|
|
|
|
* @return Converted Ruby string's backend C string.
|
|
|
|
* @post `v` is a String.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
#define StringValuePtr(v) rb_string_value_ptr(&(v))
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Identical to #StringValuePtr, except it additionally checks for the contents
|
|
|
|
* for viability as a C string. Ruby can accept wider range of contents as
|
|
|
|
* strings, compared to C. This function is to check that.
|
|
|
|
*
|
|
|
|
* @param[in,out] v Arbitrary Ruby object.
|
|
|
|
* @exception rb_eTypeError No implicit conversion defined.
|
|
|
|
* @exception rb_eArgError String is not C-compatible.
|
|
|
|
* @return Converted Ruby string's backend C string.
|
|
|
|
* @post `v` is a String.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
#define StringValueCStr(v) rb_string_value_cstr(&(v))
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
*
|
|
|
|
* @deprecated This macro once was a thing in the old days, but makes no sense
|
|
|
|
* any longer today. Exists here for backwards compatibility
|
|
|
|
* only. You can safely forget about it.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
#define SafeStringValue(v) StringValue(v)
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Identical to #StringValue, except it additionally converts the string's
|
|
|
|
* encoding to default external encoding. Ruby has a concept called encodings.
|
|
|
|
* A string can have different encoding than the environment expects. Someone
|
|
|
|
* has to make sure its contents be converted to something suitable. This is
|
|
|
|
* that routine. Call it when necessary.
|
|
|
|
*
|
|
|
|
* @param[in,out] v Arbitrary Ruby object.
|
|
|
|
* @exception rb_eTypeError No implicit conversion defined.
|
|
|
|
* @return Converted Ruby string's backend C string.
|
|
|
|
* @post `v` is a String.
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*
|
|
|
|
* Not sure but it seems this macro does not raise on encoding
|
|
|
|
* incompatibilities? Doesn't sound right to @shyouhei.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
#define ExportStringValue(v) do { \
|
|
|
|
StringValue(v); \
|
|
|
|
(v) = rb_str_export(v); \
|
|
|
|
} while (0)
|
|
|
|
|
2021-02-01 12:10:21 +09:00
|
|
|
/** @} */
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
*
|
|
|
|
* Bits that you can set to ::RBasic::flags.
|
|
|
|
*
|
|
|
|
* @warning These enums are not the only bits we use for strings.
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*
|
|
|
|
* Actually all bits through FL_USER1 to FL_USER19 are used for strings. Why
|
|
|
|
* only this tiny part of them are made public here? @shyouhei can find no
|
|
|
|
* reason.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
enum ruby_rstring_flags {
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* This flag has something to do with memory footprint. If the string is
|
|
|
|
* short enough, ruby tries to be creative to abuse padding bits of struct
|
|
|
|
* ::RString for storing contents. If this flag is set that string does
|
|
|
|
* _not_ do that, to resort to good old fashioned external allocation
|
|
|
|
* strategy instead.
|
|
|
|
*
|
|
|
|
* @warning This bit has to be considered read-only. Setting/clearing
|
|
|
|
* this bit without corresponding fix up must cause immediate
|
|
|
|
* SEGV. Also, internal structures of a string change
|
|
|
|
* dynamically and transparently throughout of its lifetime.
|
|
|
|
* Don't assume it being persistent.
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*
|
|
|
|
* 3rd parties must not be aware that there even is more than one way to
|
|
|
|
* store a string. Might better be hidden.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
RSTRING_NOEMBED = RUBY_FL_USER1,
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* When a string employs embedded strategy (see ::RSTRING_NOEMBED), these
|
|
|
|
* bits are used to store the number of bytes actually filled into
|
|
|
|
* ::RString::ary.
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*
|
|
|
|
* 3rd parties must not be aware that there even is more than one way to
|
|
|
|
* store a string. Might better be hidden.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
RSTRING_EMBED_LEN_MASK = RUBY_FL_USER2 | RUBY_FL_USER3 | RUBY_FL_USER4 |
|
|
|
|
RUBY_FL_USER5 | RUBY_FL_USER6,
|
2021-02-01 12:10:21 +09:00
|
|
|
|
2020-04-08 13:28:13 +09:00
|
|
|
/* Actually, string encodings are also encoded into the flags, using
|
|
|
|
* remaining bits.*/
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* This flag has something to do with infamous "f"string. What is a
|
|
|
|
* fstring? Well it is a special subkind of strings that is immutable,
|
|
|
|
* deduped globally, and managed by our GC. It is much like a Symbol (in
|
|
|
|
* fact Symbols are dynamic these days and are backended using fstrings).
|
|
|
|
* This concept has been silently introduced at some point in 2.x era.
|
|
|
|
* Since then it gained wider acceptance in the core. But extension
|
|
|
|
* libraries could not know that until very recently. Strings of this flag
|
|
|
|
* live in a special Limbo deep inside of the interpreter. Never try to
|
|
|
|
* manipulate it by hand.
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*
|
|
|
|
* Fstrings are not the only variant strings that we implement today.
|
|
|
|
* Other things are behind-the-scene. This is the only one that is visible
|
|
|
|
* from extension library. There is no clear reason why it has to be.
|
|
|
|
* Given there are more "polite" ways to create fstrings, it seems this bit
|
|
|
|
* need not be exposed to extension libraries. Might better be hidden.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
RSTRING_FSTR = RUBY_FL_USER17
|
|
|
|
};
|
|
|
|
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* This is an enum because GDB wants it (rather than a macro). People need not
|
|
|
|
* bother.
|
|
|
|
*/
|
2020-04-25 17:52:27 +09:00
|
|
|
enum ruby_rstring_consts {
|
2021-02-01 12:10:21 +09:00
|
|
|
/** Where ::RSTRING_EMBED_LEN_MASK resides. */
|
2020-04-08 13:28:13 +09:00
|
|
|
RSTRING_EMBED_LEN_SHIFT = RUBY_FL_USHIFT + 2,
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/** Max possible number of characters that can be embedded. */
|
2020-05-04 15:52:56 +09:00
|
|
|
RSTRING_EMBED_LEN_MAX = RBIMPL_EMBED_LEN_MAX_OF(char) - 1
|
2020-04-08 13:28:13 +09:00
|
|
|
};
|
|
|
|
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* Ruby's String. A string in ruby conceptually has these information:
|
|
|
|
*
|
|
|
|
* - Encoding of the string.
|
|
|
|
* - Length of the string.
|
|
|
|
* - Contents of the string.
|
|
|
|
*
|
|
|
|
* It is worth noting that a string is _not_ an array of characters in ruby.
|
|
|
|
* It has never been. In 1.x a string was an array of integers. Since 2.x a
|
|
|
|
* string is no longer an array of anything. A string is a string -- just like
|
|
|
|
* a Time is not an integer.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
struct RString {
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/** Basic part, including flags and class. */
|
2020-04-08 13:28:13 +09:00
|
|
|
struct RBasic basic;
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/** String's specific fields. */
|
2020-04-08 13:28:13 +09:00
|
|
|
union {
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Strings that use separated memory region for contents use this
|
|
|
|
* pattern.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
struct {
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Length of the string, not including terminating NUL character.
|
|
|
|
*
|
|
|
|
* @note This is in bytes.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
long len;
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Pointer to the contents of the string. In the old days each
|
|
|
|
* string had dedicated memory regions. That is no longer true
|
|
|
|
* today, but there still are strings of such properties. This
|
|
|
|
* field could be used to point such things.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
char *ptr;
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/** Auxiliary info. */
|
2020-04-08 13:28:13 +09:00
|
|
|
union {
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Capacity of `*ptr`. A continuous memory region of at least
|
|
|
|
* `capa` bytes is expected to exist at `*ptr`. This can be
|
|
|
|
* bigger than `len`.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
long capa;
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Parent of the string. Nowadays strings can share their
|
|
|
|
* contents each other, constructing gigantic nest of objects.
|
|
|
|
* This situation is called "shared", and this is the field to
|
|
|
|
* control such properties.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
VALUE shared;
|
|
|
|
} aux;
|
|
|
|
} heap;
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Embedded contents. When a string is short enough, it uses this area
|
|
|
|
* to store the contents themselves. This was impractical in the 20th
|
|
|
|
* century, but these days 64 bit machines can typically hold 48 bytes
|
|
|
|
* here. Could be sufficiently large. In this case the length is
|
|
|
|
* encoded into the flags.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
char ary[RSTRING_EMBED_LEN_MAX + 1];
|
|
|
|
} as;
|
|
|
|
};
|
|
|
|
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* Identical to rb_check_string_type(), except it raises exceptions in case of
|
|
|
|
* conversion failures.
|
|
|
|
*
|
|
|
|
* @param[in] obj Target object.
|
|
|
|
* @exception rb_eTypeError No implicit conversion to String.
|
|
|
|
* @return Return value of `obj.to_str`.
|
|
|
|
* @see rb_io_get_io
|
|
|
|
* @see rb_ary_to_ary
|
|
|
|
*/
|
|
|
|
VALUE rb_str_to_str(VALUE obj);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Identical to rb_str_to_str(), except it fills the passed pointer with the
|
|
|
|
* converted object.
|
|
|
|
*
|
|
|
|
* @param[in,out] ptr Pointer to a variable of target object.
|
|
|
|
* @exception rb_eTypeError No implicit conversion to String.
|
|
|
|
* @return Return value of `obj.to_str`.
|
|
|
|
* @post `*ptr` is the return value.
|
|
|
|
*/
|
|
|
|
VALUE rb_string_value(volatile VALUE *ptr);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Identical to rb_str_to_str(), except it returns the converted string's
|
|
|
|
* backend memory region.
|
|
|
|
*
|
|
|
|
* @param[in,out] ptr Pointer to a variable of target object.
|
|
|
|
* @exception rb_eTypeError No implicit conversion to String.
|
|
|
|
* @post `*ptr` is the return value of `obj.to_str`.
|
|
|
|
* @return Pointer to the contents of the return value.
|
|
|
|
*/
|
|
|
|
char *rb_string_value_ptr(volatile VALUE *ptr);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Identical to rb_string_value_ptr(), except it additionally checks for the
|
|
|
|
* contents for viability as a C string. Ruby can accept wider range of
|
|
|
|
* contents as strings, compared to C. This function is to check that.
|
|
|
|
*
|
|
|
|
* @param[in,out] ptr Pointer to a variable of target object.
|
|
|
|
* @exception rb_eTypeError No implicit conversion to String.
|
|
|
|
* @exception rb_eArgError String is not C-compatible.
|
|
|
|
* @post `*ptr` is the return value of `obj.to_str`.
|
|
|
|
* @return Pointer to the contents of the return value.
|
|
|
|
*/
|
|
|
|
char *rb_string_value_cstr(volatile VALUE *ptr);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Identical to rb_str_to_str(), except it additionally converts the string
|
|
|
|
* into default external encoding. Ruby has a concept called encodings. A
|
|
|
|
* string can have different encoding than the environment expects. Someone
|
|
|
|
* has to make sure its contents be converted to something suitable. This is
|
|
|
|
* that routine. Call it when necessary.
|
|
|
|
*
|
|
|
|
* @param[in] obj Target object.
|
|
|
|
* @exception rb_eTypeError No implicit conversion to String.
|
|
|
|
* @return Converted ruby string of default external encoding.
|
|
|
|
*/
|
|
|
|
VALUE rb_str_export(VALUE obj);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Identical to rb_str_export(), except it converts into the locale encoding
|
|
|
|
* instead.
|
|
|
|
*
|
|
|
|
* @param[in] obj Target object.
|
|
|
|
* @exception rb_eTypeError No implicit conversion to String.
|
|
|
|
* @return Converted ruby string of locale encoding.
|
|
|
|
*/
|
|
|
|
VALUE rb_str_export_locale(VALUE obj);
|
2020-04-08 13:28:13 +09:00
|
|
|
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_ATTR_ERROR(("rb_check_safe_str() and Check_SafeStr() are obsolete; use StringValue() instead"))
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
*
|
|
|
|
* @deprecated This function once was a thing in the old days, but makes no
|
|
|
|
* sense any longer today. Exists here for backwards
|
|
|
|
* compatibility only. You can safely forget about it.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
void rb_check_safe_str(VALUE);
|
2021-02-01 12:10:21 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
*
|
|
|
|
* @deprecated This macro once was a thing in the old days, but makes no sense
|
|
|
|
* any longer today. Exists here for backwards compatibility
|
|
|
|
* only. You can safely forget about it.
|
|
|
|
*/
|
2020-05-04 15:52:56 +09:00
|
|
|
#define Check_SafeStr(v) rb_check_safe_str(RBIMPL_CAST((VALUE)(v)))
|
2021-09-09 23:21:06 +09:00
|
|
|
|
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
*
|
|
|
|
* Prints diagnostic message to stderr when RSTRING_PTR or RSTRING_END
|
|
|
|
* is NULL.
|
|
|
|
*
|
|
|
|
* @param[in] func The function name where encountered NULL pointer.
|
|
|
|
*/
|
|
|
|
void rb_debug_rstring_null_ptr(const char *func);
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_SYMBOL_EXPORT_END()
|
2020-04-08 13:28:13 +09:00
|
|
|
|
2020-05-20 11:38:44 +09:00
|
|
|
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_ATTR_ARTIFICIAL()
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* Queries the length of the string.
|
|
|
|
*
|
|
|
|
* @param[in] str String in question.
|
|
|
|
* @return Its length, in bytes.
|
|
|
|
* @pre `str` must be an instance of ::RString, and must has its
|
|
|
|
* ::RSTRING_NOEMBED flag off.
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*
|
|
|
|
* This was a macro before. It was inevitable to be public, since macros are
|
|
|
|
* global constructs. But should it be forever? Now that it is a function,
|
|
|
|
* @shyouhei thinks it could just be eliminated, hidden into implementation
|
|
|
|
* details.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
static inline long
|
|
|
|
RSTRING_EMBED_LEN(VALUE str)
|
|
|
|
{
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);
|
|
|
|
RBIMPL_ASSERT_OR_ASSUME(! RB_FL_ANY_RAW(str, RSTRING_NOEMBED));
|
2020-04-08 13:28:13 +09:00
|
|
|
|
|
|
|
VALUE f = RBASIC(str)->flags;
|
|
|
|
f &= RSTRING_EMBED_LEN_MASK;
|
|
|
|
f >>= RSTRING_EMBED_LEN_SHIFT;
|
2020-05-04 15:52:56 +09:00
|
|
|
return RBIMPL_CAST((long)f);
|
2020-04-08 13:28:13 +09:00
|
|
|
}
|
|
|
|
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_WARNING_PUSH()
|
|
|
|
#if RBIMPL_COMPILER_IS(Intel)
|
|
|
|
RBIMPL_WARNING_IGNORED(413)
|
2020-04-09 12:22:54 +09:00
|
|
|
#endif
|
|
|
|
|
2020-05-20 11:38:44 +09:00
|
|
|
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_ATTR_ARTIFICIAL()
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* @private
|
|
|
|
*
|
|
|
|
* "Expands" an embedded string into an ordinal one. This is a function that
|
|
|
|
* returns aggregated type. The returned struct always has its `as.heap.len`
|
|
|
|
* an `as.heap.ptr` fields set appropriately.
|
|
|
|
*
|
|
|
|
* This is an implementation detail that 3rd parties should never bother.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
static inline struct RString
|
2020-05-04 16:27:48 +09:00
|
|
|
rbimpl_rstring_getmem(VALUE str)
|
2020-04-08 13:28:13 +09:00
|
|
|
{
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_ASSERT_TYPE(str, RUBY_T_STRING);
|
2020-04-08 13:28:13 +09:00
|
|
|
|
|
|
|
if (RB_FL_ANY_RAW(str, RSTRING_NOEMBED)) {
|
|
|
|
return *RSTRING(str);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* Expecting compilers to optimize this on-stack struct away. */
|
|
|
|
struct RString retval;
|
|
|
|
retval.as.heap.len = RSTRING_EMBED_LEN(str);
|
|
|
|
retval.as.heap.ptr = RSTRING(str)->as.ary;
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_WARNING_POP()
|
2020-04-09 12:22:54 +09:00
|
|
|
|
2020-05-20 11:38:44 +09:00
|
|
|
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_ATTR_ARTIFICIAL()
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* Queries the length of the string.
|
|
|
|
*
|
|
|
|
* @param[in] str String in question.
|
|
|
|
* @return Its length, in bytes.
|
|
|
|
* @pre `str` must be an instance of ::RString.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
static inline long
|
|
|
|
RSTRING_LEN(VALUE str)
|
|
|
|
{
|
2020-05-04 16:27:48 +09:00
|
|
|
return rbimpl_rstring_getmem(str).as.heap.len;
|
2020-04-08 13:28:13 +09:00
|
|
|
}
|
|
|
|
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_ATTR_ARTIFICIAL()
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* Queries the contents pointer of the string.
|
|
|
|
*
|
|
|
|
* @param[in] str String in question.
|
|
|
|
* @return Pointer to its contents.
|
|
|
|
* @pre `str` must be an instance of ::RString.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
static inline char *
|
|
|
|
RSTRING_PTR(VALUE str)
|
|
|
|
{
|
2020-05-04 16:27:48 +09:00
|
|
|
char *ptr = rbimpl_rstring_getmem(str).as.heap.ptr;
|
2020-04-08 13:28:13 +09:00
|
|
|
|
|
|
|
if (RB_UNLIKELY(! ptr)) {
|
|
|
|
/* :BEWARE: @shyouhei thinks that currently, there are rooms for this
|
|
|
|
* function to return NULL. In the 20th century that was a pointless
|
|
|
|
* concern. However struct RString can hold fake strings nowadays. It
|
|
|
|
* seems no check against NULL are exercised around handling of them
|
|
|
|
* (one of such usages is located in marshal.c, which scares
|
|
|
|
* @shyouhei). Better check here for maximum safety.
|
|
|
|
*
|
|
|
|
* Also, this is not rb_warn() because RSTRING_PTR() can be called
|
|
|
|
* during GC (see what obj_info() does). rb_warn() needs to allocate
|
|
|
|
* Ruby objects. That is not possible at this moment. */
|
2021-09-09 23:21:06 +09:00
|
|
|
rb_debug_rstring_null_ptr("RSTRING_PTR");
|
2020-04-08 13:28:13 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_ATTR_ARTIFICIAL()
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* Queries the end of the contents pointer of the string.
|
|
|
|
*
|
|
|
|
* @param[in] str String in question.
|
|
|
|
* @return Pointer to its end of contents.
|
|
|
|
* @pre `str` must be an instance of ::RString.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
static inline char *
|
|
|
|
RSTRING_END(VALUE str)
|
|
|
|
{
|
2020-05-04 16:27:48 +09:00
|
|
|
struct RString buf = rbimpl_rstring_getmem(str);
|
2020-04-08 13:28:13 +09:00
|
|
|
|
|
|
|
if (RB_UNLIKELY(! buf.as.heap.ptr)) {
|
|
|
|
/* Ditto. */
|
2021-09-09 23:21:06 +09:00
|
|
|
rb_debug_rstring_null_ptr("RSTRING_END");
|
2020-04-08 13:28:13 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
return &buf.as.heap.ptr[buf.as.heap.len];
|
|
|
|
}
|
|
|
|
|
2020-05-04 15:52:56 +09:00
|
|
|
RBIMPL_ATTR_ARTIFICIAL()
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* Identical to RSTRING_LEN(), except it differs for the return type.
|
|
|
|
*
|
|
|
|
* @param[in] str String in question.
|
|
|
|
* @exception rb_eRangeError Too long.
|
|
|
|
* @return Its length, in bytes.
|
|
|
|
* @pre `str` must be an instance of ::RString.
|
|
|
|
*
|
|
|
|
* @internal
|
|
|
|
*
|
|
|
|
* This API seems redundant but has actual usages.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
static inline int
|
|
|
|
RSTRING_LENINT(VALUE str)
|
|
|
|
{
|
|
|
|
return rb_long2int(RSTRING_LEN(str));
|
|
|
|
}
|
|
|
|
|
2021-02-01 12:10:21 +09:00
|
|
|
/**
|
|
|
|
* Convenient macro to obtain the contents and length at once.
|
|
|
|
*
|
|
|
|
* @param str String in question.
|
|
|
|
* @param ptrvar Variable where its contents is stored.
|
|
|
|
* @param lenvar Variable where its length is stored.
|
|
|
|
*/
|
2020-04-08 13:28:13 +09:00
|
|
|
#ifdef HAVE_STMT_AND_DECL_IN_EXPR
|
|
|
|
# define RSTRING_GETMEM(str, ptrvar, lenvar) \
|
|
|
|
__extension__ ({ \
|
2020-05-04 16:27:48 +09:00
|
|
|
struct RString rbimpl_str = rbimpl_rstring_getmem(str); \
|
|
|
|
(ptrvar) = rbimpl_str.as.heap.ptr; \
|
|
|
|
(lenvar) = rbimpl_str.as.heap.len; \
|
2020-04-08 13:28:13 +09:00
|
|
|
})
|
|
|
|
#else
|
|
|
|
# define RSTRING_GETMEM(str, ptrvar, lenvar) \
|
|
|
|
((ptrvar) = RSTRING_PTR(str), \
|
|
|
|
(lenvar) = RSTRING_LEN(str))
|
|
|
|
#endif /* HAVE_STMT_AND_DECL_IN_EXPR */
|
2020-05-04 15:52:56 +09:00
|
|
|
#endif /* RBIMPL_RSTRING_H */
|