mirror of
https://github.com/ruby/ruby.git
synced 2022-11-09 12:17:21 -05:00
include/ruby/encoding.h: convert macros into inline functions
Less macros == huge win.
This commit is contained in:
parent
312668cf03
commit
5112a54846
Notes:
git
2021-10-05 14:18:47 +09:00
7 changed files with 368 additions and 120 deletions
|
@ -22,7 +22,9 @@
|
|||
*/
|
||||
|
||||
#include "ruby/internal/attr/const.h"
|
||||
#include "ruby/internal/attr/pure.h"
|
||||
#include "ruby/internal/dllexport.h"
|
||||
#include "ruby/internal/fl_type.h"
|
||||
#include "ruby/internal/value.h"
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
||||
|
@ -65,6 +67,7 @@ rb_enc_coderange_clean_p(int cr)
|
|||
return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT;
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_CONST()
|
||||
/**
|
||||
* Queries if a code range is "clean". "Clean" in this context means it is
|
||||
* known and valid.
|
||||
|
@ -73,8 +76,13 @@ rb_enc_coderange_clean_p(int cr)
|
|||
* @retval 1 It is.
|
||||
* @retval 0 It isn't.
|
||||
*/
|
||||
#define RB_ENC_CODERANGE_CLEAN_P(cr) rb_enc_coderange_clean_p(cr)
|
||||
static inline bool
|
||||
RB_ENC_CODERANGE_CLEAN_P(enum ruby_coderange_type cr)
|
||||
{
|
||||
return rb_enc_coderange_clean_p(cr);
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
|
||||
/**
|
||||
* Queries the (inline) code range of the passed object. The object must be
|
||||
* capable of having inline encoding. Using this macro needs deep
|
||||
|
@ -83,8 +91,15 @@ rb_enc_coderange_clean_p(int cr)
|
|||
* @param[in] obj Target object.
|
||||
* @return An enum ::ruby_coderange_type.
|
||||
*/
|
||||
#define RB_ENC_CODERANGE(obj) ((int)RBASIC(obj)->flags & RUBY_ENC_CODERANGE_MASK)
|
||||
static inline enum ruby_coderange_type
|
||||
RB_ENC_CODERANGE(VALUE obj)
|
||||
{
|
||||
VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENC_CODERANGE_MASK);
|
||||
|
||||
return RBIMPL_CAST((enum ruby_coderange_type)ret);
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
|
||||
/**
|
||||
* Queries the (inline) code range of the passed object is
|
||||
* ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline
|
||||
|
@ -95,7 +110,11 @@ rb_enc_coderange_clean_p(int cr)
|
|||
* @retval 1 It is ascii only.
|
||||
* @retval 0 Otherwise (including cases when the range is not known).
|
||||
*/
|
||||
#define RB_ENC_CODERANGE_ASCIIONLY(obj) (RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT)
|
||||
static inline bool
|
||||
RB_ENC_CODERANGE_ASCIIONLY(VALUE obj)
|
||||
{
|
||||
return RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructively modifies the passed object so that its (inline) code range is
|
||||
|
@ -106,9 +125,12 @@ rb_enc_coderange_clean_p(int cr)
|
|||
* @param[out] cr An enum ::ruby_coderange_type.
|
||||
* @post `obj`'s code range is `cr`.
|
||||
*/
|
||||
#define RB_ENC_CODERANGE_SET(obj,cr) (\
|
||||
RBASIC(obj)->flags = \
|
||||
(RBASIC(obj)->flags & ~RUBY_ENC_CODERANGE_MASK) | (cr))
|
||||
static inline void
|
||||
RB_ENC_CODERANGE_SET(VALUE obj, enum ruby_coderange_type cr)
|
||||
{
|
||||
RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
|
||||
RB_FL_SET_RAW(obj, cr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructively clears the passed object's (inline) code range. The object
|
||||
|
@ -118,8 +140,13 @@ rb_enc_coderange_clean_p(int cr)
|
|||
* @param[out] obj Target object.
|
||||
* @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN.
|
||||
*/
|
||||
#define RB_ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_SET((obj),0)
|
||||
static inline void
|
||||
RB_ENC_CODERANGE_CLEAR(VALUE obj)
|
||||
{
|
||||
RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_CONST()
|
||||
/* assumed ASCII compatibility */
|
||||
/**
|
||||
* "Mix" two code ranges into one. This is handy for instance when you
|
||||
|
@ -131,28 +158,22 @@ rb_enc_coderange_clean_p(int cr)
|
|||
* @param[in] b Another enum ::ruby_coderange_type.
|
||||
* @return The `a` "and" `b`.
|
||||
*/
|
||||
#define RB_ENC_CODERANGE_AND(a, b) \
|
||||
((a) == RUBY_ENC_CODERANGE_7BIT ? (b) : \
|
||||
(a) != RUBY_ENC_CODERANGE_VALID ? RUBY_ENC_CODERANGE_UNKNOWN : \
|
||||
(b) == RUBY_ENC_CODERANGE_7BIT ? RUBY_ENC_CODERANGE_VALID : (b))
|
||||
|
||||
/**
|
||||
* This is #RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. The object must be
|
||||
* capable of having inline encoding. Using this macro needs deep
|
||||
* understanding of bit level object binary layout.
|
||||
*
|
||||
* @param[out] obj Target object.
|
||||
* @param[in] encindex Encoding in encindex format.
|
||||
* @param[in] cr An enum ::ruby_coderange_type.
|
||||
* @post `obj`'s encoding is `encindex`.
|
||||
* @post `obj`'s code range is `cr`.
|
||||
*/
|
||||
#define RB_ENCODING_CODERANGE_SET(obj, encindex, cr) \
|
||||
do { \
|
||||
VALUE rb_encoding_coderange_obj = (obj); \
|
||||
RB_ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \
|
||||
RB_ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \
|
||||
} while (0)
|
||||
static inline enum ruby_coderange_type
|
||||
RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
|
||||
{
|
||||
if (a == RUBY_ENC_CODERANGE_7BIT) {
|
||||
return b;
|
||||
}
|
||||
else if (a != RUBY_ENC_CODERANGE_VALID) {
|
||||
return RUBY_ENC_CODERANGE_UNKNOWN;
|
||||
}
|
||||
else if (b == RUBY_ENC_CODERANGE_7BIT) {
|
||||
return RUBY_ENC_CODERANGE_VALID;
|
||||
}
|
||||
else {
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
#define ENC_CODERANGE_MASK RUBY_ENC_CODERANGE_MASK /**< @old{RUBY_ENC_CODERANGE_MASK} */
|
||||
#define ENC_CODERANGE_UNKNOWN RUBY_ENC_CODERANGE_UNKNOWN /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */
|
||||
|
@ -167,6 +188,15 @@ rb_enc_coderange_clean_p(int cr)
|
|||
#define ENC_CODERANGE_AND(a, b) RB_ENC_CODERANGE_AND(a, b) /**< @old{RB_ENC_CODERANGE_AND} */
|
||||
#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */
|
||||
|
||||
/** @cond INTERNAL_MACRO */
|
||||
#define RB_ENC_CODERANGE RB_ENC_CODERANGE
|
||||
#define RB_ENC_CODERANGE_AND RB_ENC_CODERANGE_AND
|
||||
#define RB_ENC_CODERANGE_ASCIIONLY RB_ENC_CODERANGE_ASCIIONLY
|
||||
#define RB_ENC_CODERANGE_CLEAN_P RB_ENC_CODERANGE_CLEAN_P
|
||||
#define RB_ENC_CODERANGE_CLEAR RB_ENC_CODERANGE_CLEAR
|
||||
#define RB_ENC_CODERANGE_SET RB_ENC_CODERANGE_SET
|
||||
/** @endcond */
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_CODERANGE_H */
|
||||
|
|
|
@ -39,7 +39,14 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* @retval 0 It isn't.
|
||||
* @retval otherwise It is.
|
||||
*/
|
||||
#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE((enc),(UChar*)(p),(UChar*)(end))
|
||||
static inline bool
|
||||
rb_enc_is_newline(const char *p, const char *e, const rb_encoding *enc)
|
||||
{
|
||||
OnigUChar *up = RBIMPL_CAST((OnigUChar *)p);
|
||||
OnigUChar *ue = RBIMPL_CAST((OnigUChar *)e);
|
||||
|
||||
return ONIGENC_IS_MBC_NEWLINE(enc, up, ue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries if the passed code point is of passed character type in the passed
|
||||
|
@ -52,7 +59,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* @retval 1 `c` is of `t` in `enc`.
|
||||
* @retval 0 Otherwise.
|
||||
*/
|
||||
#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE((enc),(c),(t))
|
||||
static inline bool
|
||||
rb_enc_isctype(OnigCodePoint c, OnigCtype t, const rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_CTYPE(enc, c, t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isascii(), except it additionally takes an encoding.
|
||||
|
@ -67,7 +78,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* `enc` is ignored. This is at least an intentional implementation detail
|
||||
* (not a bug). But there could be rooms for future extensions.
|
||||
*/
|
||||
#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
|
||||
static inline bool
|
||||
rb_enc_isascii(OnigCodePoint c, const rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_ASCII(c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isalpha(), except it additionally takes an encoding.
|
||||
|
@ -77,7 +92,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* @retval 1 `enc` classifies `c` as "ALPHA".
|
||||
* @retval 0 Otherwise.
|
||||
*/
|
||||
#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA((enc),(c))
|
||||
static inline bool
|
||||
rb_enc_isalpha(OnigCodePoint c, const rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_ALPHA(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_islower(), except it additionally takes an encoding.
|
||||
|
@ -87,7 +106,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* @retval 1 `enc` classifies `c` as "LOWER".
|
||||
* @retval 0 Otherwise.
|
||||
*/
|
||||
#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER((enc),(c))
|
||||
static inline bool
|
||||
rb_enc_islower(OnigCodePoint c, const rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_LOWER(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isupper(), except it additionally takes an encoding.
|
||||
|
@ -97,7 +120,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* @retval 1 `enc` classifies `c` as "UPPER".
|
||||
* @retval 0 Otherwise.
|
||||
*/
|
||||
#define rb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER((enc),(c))
|
||||
static inline bool
|
||||
rb_enc_isupper(OnigCodePoint c, const rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_UPPER(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_ispunct(), except it additionally takes an encoding.
|
||||
|
@ -107,7 +134,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* @retval 1 `enc` classifies `c` as "PUNCT".
|
||||
* @retval 0 Otherwise.
|
||||
*/
|
||||
#define rb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT((enc),(c))
|
||||
static inline bool
|
||||
rb_enc_ispunct(OnigCodePoint c, const rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_PUNCT(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isalnum(), except it additionally takes an encoding.
|
||||
|
@ -117,7 +148,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* @retval 1 `enc` classifies `c` as "ANUM".
|
||||
* @retval 0 Otherwise.
|
||||
*/
|
||||
#define rb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM((enc),(c))
|
||||
static inline bool
|
||||
rb_enc_isalnum(OnigCodePoint c, const rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_ALNUM(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isprint(), except it additionally takes an encoding.
|
||||
|
@ -127,7 +162,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* @retval 1 `enc` classifies `c` as "PRINT".
|
||||
* @retval 0 Otherwise.
|
||||
*/
|
||||
#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT((enc),(c))
|
||||
static inline bool
|
||||
rb_enc_isprint(OnigCodePoint c, const rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_PRINT(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isspace(), except it additionally takes an encoding.
|
||||
|
@ -137,7 +176,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* @retval 1 `enc` classifies `c` as "PRINT".
|
||||
* @retval 0 Otherwise.
|
||||
*/
|
||||
#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE((enc),(c))
|
||||
static inline bool
|
||||
rb_enc_isspace(OnigCodePoint c, const rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_SPACE(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isdigit(), except it additionally takes an encoding.
|
||||
|
@ -147,7 +190,11 @@ RBIMPL_SYMBOL_EXPORT_BEGIN()
|
|||
* @retval 1 `enc` classifies `c` as "DIGIT".
|
||||
* @retval 0 Otherwise.
|
||||
*/
|
||||
#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT((enc),(c))
|
||||
static inline bool
|
||||
rb_enc_isdigit(OnigCodePoint c, const rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_DIGIT(enc, c);
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_CONST()
|
||||
/**
|
||||
|
@ -179,4 +226,18 @@ int rb_enc_tolower(int c, rb_encoding *enc);
|
|||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
/** @cond INTERNAL_MACRO */
|
||||
#define rb_enc_is_newline rb_enc_is_newline
|
||||
#define rb_enc_isalnum rb_enc_isalnum
|
||||
#define rb_enc_isalpha rb_enc_isalpha
|
||||
#define rb_enc_isascii rb_enc_isascii
|
||||
#define rb_enc_isctype rb_enc_isctype
|
||||
#define rb_enc_isdigit rb_enc_isdigit
|
||||
#define rb_enc_islower rb_enc_islower
|
||||
#define rb_enc_isprint rb_enc_isprint
|
||||
#define rb_enc_ispunct rb_enc_ispunct
|
||||
#define rb_enc_isspace rb_enc_isspace
|
||||
#define rb_enc_isupper rb_enc_isupper
|
||||
/** @endcond */
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_CTYPE_H */
|
||||
|
|
|
@ -76,13 +76,15 @@ enum ruby_encoding_consts {
|
|||
* @param[in] i Encoding in encindex format.
|
||||
* @post `obj`'s encoding is `i`.
|
||||
*/
|
||||
#define RB_ENCODING_SET_INLINED(obj,i) do {\
|
||||
RBASIC(obj)->flags &= ~RUBY_ENCODING_MASK;\
|
||||
RBASIC(obj)->flags |= (VALUE)(i) << RUBY_ENCODING_SHIFT;\
|
||||
} while (0)
|
||||
static inline void
|
||||
RB_ENCODING_SET_INLINED(VALUE obj, int encindex)
|
||||
{
|
||||
VALUE f = /* upcast */ encindex;
|
||||
|
||||
/** @alias{rb_enc_set_index} */
|
||||
#define RB_ENCODING_SET(obj,i) rb_enc_set_index((obj), (i))
|
||||
f <<= RUBY_ENCODING_SHIFT;
|
||||
RB_FL_UNSET_RAW(obj, RUBY_ENCODING_MASK);
|
||||
RB_FL_SET_RAW(obj, f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the encoding of the passed object. The encoding must be smaller
|
||||
|
@ -92,32 +94,13 @@ enum ruby_encoding_consts {
|
|||
* @param[in] obj Target object.
|
||||
* @return `obj`'s encoding index.
|
||||
*/
|
||||
#define RB_ENCODING_GET_INLINED(obj) \
|
||||
(int)((RBASIC(obj)->flags & RUBY_ENCODING_MASK)>>RUBY_ENCODING_SHIFT)
|
||||
static inline int
|
||||
RB_ENCODING_GET_INLINED(VALUE obj)
|
||||
{
|
||||
VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENCODING_MASK) >> RUBY_ENCODING_SHIFT;
|
||||
|
||||
/**
|
||||
* @alias{rb_enc_get_index}
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* Implementation wise this is not a verbatim alias of rb_enc_get_index(). But
|
||||
* the API is consistent. Don't bother.
|
||||
*/
|
||||
#define RB_ENCODING_GET(obj) \
|
||||
(RB_ENCODING_GET_INLINED(obj) != RUBY_ENCODING_INLINE_MAX ? \
|
||||
RB_ENCODING_GET_INLINED(obj) : \
|
||||
rb_enc_get_index(obj))
|
||||
|
||||
/**
|
||||
* Queries if the passed object is in ascii 8bit (== binary) encoding. The
|
||||
* object must be capable of having inline encoding. Using this macro needs
|
||||
* deep understanding of bit level object binary layout.
|
||||
*
|
||||
* @param[in] obj An object to check.
|
||||
* @retval 1 It is.
|
||||
* @retval 0 It isn't.
|
||||
*/
|
||||
#define RB_ENCODING_IS_ASCII8BIT(obj) (RB_ENCODING_GET_INLINED(obj) == 0)
|
||||
return RBIMPL_CAST((int)ret);
|
||||
}
|
||||
|
||||
#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i) /**< @old{RB_ENCODING_SET_INLINED} */
|
||||
#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i) /**< @old{RB_ENCODING_SET} */
|
||||
|
@ -126,7 +109,6 @@ enum ruby_encoding_consts {
|
|||
#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj) /**< @old{RB_ENCODING_IS_ASCII8BIT} */
|
||||
#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN /**< @old{RUBY_ENCODING_MAXNAMELEN} */
|
||||
|
||||
|
||||
/**
|
||||
* The type of encoding. Our design here is we take Oniguruma/Onigmo's
|
||||
* multilingualisation schema as our base data structure.
|
||||
|
@ -217,6 +199,27 @@ int rb_enc_to_index(rb_encoding *enc);
|
|||
*/
|
||||
int rb_enc_get_index(VALUE obj);
|
||||
|
||||
/**
|
||||
* @alias{rb_enc_get_index}
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* Implementation wise this is not a verbatim alias of rb_enc_get_index(). But
|
||||
* the API is consistent. Don't bother.
|
||||
*/
|
||||
static inline int
|
||||
RB_ENCODING_GET(VALUE obj)
|
||||
{
|
||||
int encindex = RB_ENCODING_GET_INLINED(obj);
|
||||
|
||||
if (encindex == RUBY_ENCODING_INLINE_MAX) {
|
||||
return rb_enc_get_index(obj);
|
||||
}
|
||||
else {
|
||||
return encindex;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructively assigns an encoding (via its index) to an object.
|
||||
*
|
||||
|
@ -229,6 +232,31 @@ int rb_enc_get_index(VALUE obj);
|
|||
*/
|
||||
void rb_enc_set_index(VALUE obj, int encindex);
|
||||
|
||||
/** @alias{rb_enc_set_index} */
|
||||
static inline void
|
||||
RB_ENCODING_SET(VALUE obj, int encindex)
|
||||
{
|
||||
rb_enc_set_index(obj, encindex);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is #RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo. The object must be
|
||||
* capable of having inline encoding. Using this macro needs deep
|
||||
* understanding of bit level object binary layout.
|
||||
*
|
||||
* @param[out] obj Target object.
|
||||
* @param[in] encindex Encoding in encindex format.
|
||||
* @param[in] cr An enum ::ruby_coderange_type.
|
||||
* @post `obj`'s encoding is `encindex`.
|
||||
* @post `obj`'s code range is `cr`.
|
||||
*/
|
||||
static inline void
|
||||
RB_ENCODING_CODERANGE_SET(VALUE obj, int encindex, enum ruby_coderange_type cr)
|
||||
{
|
||||
RB_ENCODING_SET(obj, encindex);
|
||||
RB_ENC_CODERANGE_SET(obj, cr);
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_PURE()
|
||||
/**
|
||||
* Queries if the passed object can have its encoding.
|
||||
|
@ -401,7 +429,11 @@ rb_encoding *rb_enc_find(const char *name);
|
|||
* @param[in] enc An encoding.
|
||||
* @return Its name.
|
||||
*/
|
||||
#define rb_enc_name(enc) (enc)->name
|
||||
static inline const char *
|
||||
rb_enc_name(const rb_encoding *enc)
|
||||
{
|
||||
return enc->name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the minimum number of bytes that the passed encoding needs to
|
||||
|
@ -412,7 +444,11 @@ rb_encoding *rb_enc_find(const char *name);
|
|||
* @param[in] enc An encoding.
|
||||
* @return Its least possible number of bytes except 0.
|
||||
*/
|
||||
#define rb_enc_mbminlen(enc) (enc)->min_enc_len
|
||||
static inline int
|
||||
rb_enc_mbminlen(const rb_encoding *enc)
|
||||
{
|
||||
return enc->min_enc_len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the maximum number of bytes that the passed encoding needs to
|
||||
|
@ -423,7 +459,11 @@ rb_encoding *rb_enc_find(const char *name);
|
|||
* @param[in] enc An encoding.
|
||||
* @return Its maximum possible number of bytes of a character.
|
||||
*/
|
||||
#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
|
||||
static inline int
|
||||
rb_enc_mbmaxlen(const rb_encoding *enc)
|
||||
{
|
||||
return enc->max_enc_len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the number of bytes of the character at the passed pointer.
|
||||
|
@ -525,7 +565,6 @@ int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc);
|
|||
*/
|
||||
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc);
|
||||
|
||||
RBIMPL_ATTR_DEPRECATED(("use rb_enc_codepoint_len instead."))
|
||||
/**
|
||||
* Queries the code point of character pointed by the passed pointer.
|
||||
* Exceptions happen in case of broken input.
|
||||
|
@ -536,12 +575,24 @@ RBIMPL_ATTR_DEPRECATED(("use rb_enc_codepoint_len instead."))
|
|||
* @param[in] enc Encoding of the string.
|
||||
* @exception rb_eArgError `p` is broken.
|
||||
* @return Code point of the character pointed by `p`.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* @matz says in commit 91e5ba1cb865a2385d3e1cbfacd824496898e098 that the line
|
||||
* below is a "prototype for obsolete function". However even today there
|
||||
* still are some use cases of it throughout our repository. It seems it has
|
||||
* its own niche.
|
||||
*/
|
||||
unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc);
|
||||
static inline unsigned int
|
||||
rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
|
||||
{
|
||||
return rb_enc_codepoint_len(p, e, 0, enc);
|
||||
/* ^^^
|
||||
* This can be `NULL` in C, `nullptr` in C++, and `0` for both.
|
||||
* We choose the most portable one here.
|
||||
*/
|
||||
}
|
||||
|
||||
/** @cond INTERNAL_MACRO */
|
||||
#define rb_enc_codepoint(p,e,enc) rb_enc_codepoint_len((p),(e),0,(enc))
|
||||
/** @endcond */
|
||||
|
||||
/**
|
||||
* Identical to rb_enc_codepoint(), except it assumes the passed character is
|
||||
|
@ -552,7 +603,14 @@ unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc);
|
|||
* @param[in] enc Encoding of the string.
|
||||
* @return Code point of the character pointed by `p`.
|
||||
*/
|
||||
#define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE((enc),(UChar*)(p),(UChar*)(e))
|
||||
static inline OnigCodePoint
|
||||
rb_enc_mbc_to_codepoint(const char *p, const char *e, const rb_encoding *enc)
|
||||
{
|
||||
const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
|
||||
const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
|
||||
|
||||
return ONIGENC_MBC_TO_CODE(enc, up, ue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the number of bytes requested to represent the passed code point
|
||||
|
@ -573,11 +631,13 @@ int rb_enc_codelen(int code, rb_encoding *enc);
|
|||
* @retval 0 `code` is invalid.
|
||||
* @return otherwise Number of bytes used for `enc` to encode `code`.
|
||||
*/
|
||||
int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
|
||||
static inline int
|
||||
rb_enc_code_to_mbclen(int c, const rb_encoding *enc)
|
||||
{
|
||||
OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c);
|
||||
|
||||
/** @cond INTERNAL_MACRO */
|
||||
#define rb_enc_code_to_mbclen(c, enc) ONIGENC_CODE_TO_MBCLEN((enc), (c));
|
||||
/** @endcond */
|
||||
return ONIGENC_CODE_TO_MBCLEN(enc, uc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_enc_uint_chr(), except it writes back to the passed buffer
|
||||
|
@ -587,8 +647,20 @@ int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
|
|||
* @param[out] buf Return buffer.
|
||||
* @param[in] enc Target encoding scheme.
|
||||
* @post `c` is encoded according to `enc`, then written to `buf`.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* The second argument must be typed. But its current usages prevent us from
|
||||
* being any stricter than this. :FIXME:
|
||||
*/
|
||||
#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC((enc),(c),(UChar*)(buf))
|
||||
static inline int
|
||||
rb_enc_mbcput(unsigned int c, void *buf, const rb_encoding *enc)
|
||||
{
|
||||
OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c);
|
||||
OnigUChar *ubuf = RBIMPL_CAST((OnigUChar *)buf);
|
||||
|
||||
return ONIGENC_CODE_TO_MBC(enc, uc, ubuf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the previous (left) character.
|
||||
|
@ -600,7 +672,16 @@ int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
|
|||
* @retval NULL No previous character.
|
||||
* @retval otherwise Pointer to the head of the previous character.
|
||||
*/
|
||||
#define rb_enc_prev_char(s,p,e,enc) ((char *)onigenc_get_prev_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
|
||||
static inline char *
|
||||
rb_enc_prev_char(const char *s, const char *p, const char *e, const rb_encoding *enc)
|
||||
{
|
||||
const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
|
||||
const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
|
||||
const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
|
||||
OnigUChar *ur = onigenc_get_prev_char_head(enc, us, up, ue);
|
||||
|
||||
return RBIMPL_CAST((char *)ur);
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the left boundary of a character. This function takes a pointer
|
||||
|
@ -612,7 +693,16 @@ int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
|
|||
* @param[in] enc Encoding.
|
||||
* @return Pointer to the head of the character that contains `p`.
|
||||
*/
|
||||
#define rb_enc_left_char_head(s,p,e,enc) ((char *)onigenc_get_left_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
|
||||
static inline char *
|
||||
rb_enc_left_char_head(const char *s, const char *p, const char *e, const rb_encoding *enc)
|
||||
{
|
||||
const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
|
||||
const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
|
||||
const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
|
||||
OnigUChar *ur = onigenc_get_left_adjust_char_head(enc, us, up, ue);
|
||||
|
||||
return RBIMPL_CAST((char *)ur);
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the right boundary of a character. This function takes a pointer
|
||||
|
@ -624,7 +714,16 @@ int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
|
|||
* @param[in] enc Encoding.
|
||||
* @return Pointer to the end of the character that contains `p`.
|
||||
*/
|
||||
#define rb_enc_right_char_head(s,p,e,enc) ((char *)onigenc_get_right_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
|
||||
static inline char *
|
||||
rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
|
||||
{
|
||||
const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
|
||||
const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
|
||||
const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
|
||||
OnigUChar *ur = onigenc_get_right_adjust_char_head(enc, us, up, ue);
|
||||
|
||||
return RBIMPL_CAST((char *)ur);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans the string backwards for n characters.
|
||||
|
@ -637,7 +736,16 @@ int rb_enc_code_to_mbclen(int code, rb_encoding *enc);
|
|||
* @retval NULL There are no `n` characters left.
|
||||
* @retval otherwise Pointer to `n` character before `p`.
|
||||
*/
|
||||
#define rb_enc_step_back(s,p,e,n,enc) ((char *)onigenc_step_back((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e),(int)(n)))
|
||||
static inline char *
|
||||
rb_enc_step_back(const char *s, const char *p, const char *e, int n, const rb_encoding *enc)
|
||||
{
|
||||
const OnigUChar *us = RBIMPL_CAST((const OnigUChar *)s);
|
||||
const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
|
||||
const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
|
||||
const OnigUChar *ur = onigenc_step_back(enc, us, up, ue, n);
|
||||
|
||||
return RBIMPL_CAST((char *)ur);
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
|
@ -670,8 +778,19 @@ rb_enc_asciicompat_inline(rb_encoding *enc)
|
|||
* @retval 0 It is incompatible.
|
||||
* @retval 1 It is compatible.
|
||||
*/
|
||||
#define rb_enc_asciicompat(enc) rb_enc_asciicompat_inline(enc)
|
||||
|
||||
static inline bool
|
||||
rb_enc_asciicompat(rb_encoding *enc)
|
||||
{
|
||||
if (rb_enc_mbminlen(enc) != 1) {
|
||||
return false;
|
||||
}
|
||||
else if (rb_enc_dummy_p(enc)) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries if the passed string is in an ASCII-compatible encoding.
|
||||
|
@ -680,7 +799,13 @@ rb_enc_asciicompat_inline(rb_encoding *enc)
|
|||
* @retval 0 `str` is not a String, or an ASCII-incompatible string.
|
||||
* @retval 1 Otherwise.
|
||||
*/
|
||||
#define rb_enc_str_asciicompat_p(str) rb_enc_asciicompat(rb_enc_get(str))
|
||||
static inline bool
|
||||
rb_enc_str_asciicompat_p(VALUE str)
|
||||
{
|
||||
rb_encoding *enc = rb_enc_get(str);
|
||||
|
||||
return rb_enc_asciicompat(enc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries the Ruby-level counterpart instance of ::rb_cEncoding that
|
||||
|
@ -803,6 +928,21 @@ RBIMPL_ATTR_CONST()
|
|||
int rb_ascii8bit_encindex(void);
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Queries if the passed object is in ascii 8bit (== binary) encoding. The
|
||||
* object must be capable of having inline encoding. Using this macro needs
|
||||
* deep understanding of bit level object binary layout.
|
||||
*
|
||||
* @param[in] obj An object to check.
|
||||
* @retval 1 It is.
|
||||
* @retval 0 It isn't.
|
||||
*/
|
||||
static inline bool
|
||||
RB_ENCODING_IS_ASCII8BIT(VALUE obj)
|
||||
{
|
||||
return RB_ENCODING_GET_INLINED(obj) == rb_ascii8bit_encindex();
|
||||
}
|
||||
|
||||
#ifndef rb_utf8_encindex
|
||||
RBIMPL_ATTR_CONST()
|
||||
/**
|
||||
|
@ -894,4 +1034,25 @@ VALUE rb_locale_charmap(VALUE klass);
|
|||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
/** @cond INTERNAL_MACRO */
|
||||
#define RB_ENCODING_GET RB_ENCODING_GET
|
||||
#define RB_ENCODING_GET_INLINED RB_ENCODING_GET_INLINED
|
||||
#define RB_ENCODING_IS_ASCII8BIT RB_ENCODING_IS_ASCII8BIT
|
||||
#define RB_ENCODING_SET RB_ENCODING_SET
|
||||
#define RB_ENCODING_SET_INLINED RB_ENCODING_SET_INLINED
|
||||
#define rb_enc_asciicompat rb_enc_asciicompat
|
||||
#define rb_enc_code_to_mbclen rb_enc_code_to_mbclen
|
||||
#define rb_enc_codepoint rb_enc_codepoint
|
||||
#define rb_enc_left_char_head rb_enc_left_char_head
|
||||
#define rb_enc_mbc_to_codepoint rb_enc_mbc_to_codepoint
|
||||
#define rb_enc_mbcput rb_enc_mbcput
|
||||
#define rb_enc_mbmaxlen rb_enc_mbmaxlen
|
||||
#define rb_enc_mbminlen rb_enc_mbminlen
|
||||
#define rb_enc_name rb_enc_name
|
||||
#define rb_enc_prev_char rb_enc_prev_char
|
||||
#define rb_enc_right_char_head rb_enc_right_char_head
|
||||
#define rb_enc_step_back rb_enc_step_back
|
||||
#define rb_enc_str_asciicompat_p rb_enc_str_asciicompat_p
|
||||
/** @endcond */
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_ENCODING_H */
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "ruby/internal/value.h"
|
||||
#include "ruby/internal/encoding/encoding.h"
|
||||
#include "ruby/internal/attr/nonnull.h"
|
||||
#include "ruby/internal/intern/string.h" /* rbimpl_strlen */
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
||||
|
||||
|
@ -318,18 +319,26 @@ RBIMPL_ATTR_NONNULL(())
|
|||
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc);
|
||||
|
||||
/** @cond INTERNAL_MACRO */
|
||||
#ifdef HAVE_BUILTIN___BUILTIN_CONSTANT_P
|
||||
#define rb_enc_str_new(str, len, enc) RB_GNUC_EXTENSION_BLOCK( \
|
||||
(__builtin_constant_p(str) && __builtin_constant_p(len)) ? \
|
||||
rb_enc_str_new_static((str), (len), (enc)) : \
|
||||
rb_enc_str_new((str), (len), (enc)) \
|
||||
)
|
||||
#define rb_enc_str_new_cstr(str, enc) RB_GNUC_EXTENSION_BLOCK( \
|
||||
(__builtin_constant_p(str)) ? \
|
||||
rb_enc_str_new_static((str), (long)strlen(str), (enc)) : \
|
||||
rb_enc_str_new_cstr((str), (enc)) \
|
||||
)
|
||||
#endif
|
||||
RBIMPL_ATTR_NONNULL(())
|
||||
static inline VALUE
|
||||
rbimpl_enc_str_new_cstr(const char *str, rb_encoding *enc)
|
||||
{
|
||||
long len = rbimpl_strlen(str);
|
||||
|
||||
return rb_enc_str_new_static(str, len, enc);
|
||||
}
|
||||
|
||||
#define rb_enc_str_new(str, len, enc) \
|
||||
((RBIMPL_CONSTANT_P(str) && \
|
||||
RBIMPL_CONSTANT_P(len) ? \
|
||||
rb_enc_str_new_static: \
|
||||
rb_enc_str_new) ((str), (len), (enc)))
|
||||
|
||||
#define rb_enc_str_new_cstr(str, enc) \
|
||||
((RBIMPL_CONSTANT_P(str) ? \
|
||||
rbimpl_enc_str_new_cstr : \
|
||||
rb_enc_str_new_cstr) ((str), (enc)))
|
||||
|
||||
/** @endcond */
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue