1#ifndef RUBY_INTERNAL_ENCODING_ENCODING_H
2#define RUBY_INTERNAL_ENCODING_ENCODING_H
24#include "ruby/oniguruma.h"
50enum ruby_encoding_consts {
53 RUBY_ENCODING_INLINE_MAX = 127,
59 RUBY_ENCODING_MASK = (RUBY_ENCODING_INLINE_MAX<<RUBY_ENCODING_SHIFT
63 RUBY_ENCODING_MAXNAMELEN = 42
66#define ENCODING_INLINE_MAX RUBY_ENCODING_INLINE_MAX
67#define ENCODING_SHIFT RUBY_ENCODING_SHIFT
68#define ENCODING_MASK RUBY_ENCODING_MASK
80RB_ENCODING_SET_INLINED(
VALUE obj,
int encindex)
84 f <<= RUBY_ENCODING_SHIFT;
85 RB_FL_UNSET_RAW(obj, RUBY_ENCODING_MASK);
86 RB_FL_SET_RAW(obj, f);
98RB_ENCODING_GET_INLINED(
VALUE obj)
100 VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENCODING_MASK) >> RUBY_ENCODING_SHIFT;
102 return RBIMPL_CAST((
int)ret);
105#define ENCODING_SET_INLINED(obj,i) RB_ENCODING_SET_INLINED(obj,i)
106#define ENCODING_SET(obj,i) RB_ENCODING_SET(obj,i)
107#define ENCODING_GET_INLINED(obj) RB_ENCODING_GET_INLINED(obj)
108#define ENCODING_GET(obj) RB_ENCODING_GET(obj)
109#define ENCODING_IS_ASCII8BIT(obj) RB_ENCODING_IS_ASCII8BIT(obj)
110#define ENCODING_MAXNAMELEN RUBY_ENCODING_MAXNAMELEN
213 int encindex = RB_ENCODING_GET_INLINED(obj);
215 if (encindex == RUBY_ENCODING_INLINE_MAX) {
237RB_ENCODING_SET(
VALUE obj,
int encindex)
256 RB_ENCODING_SET(obj, encindex);
257 RB_ENC_CODERANGE_SET(obj, cr);
432static inline const
char *
450 return enc->min_enc_len;
465 return enc->max_enc_len;
532#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret)
533#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret)
534#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret)
535#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret)
536#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret)
586static inline unsigned int
606static inline OnigCodePoint
607rb_enc_mbc_to_codepoint(
const char *p,
const char *e,
rb_encoding *enc)
609 const OnigUChar *up = RBIMPL_CAST((
const OnigUChar *)p);
610 const OnigUChar *ue = RBIMPL_CAST((
const OnigUChar *)e);
612 return ONIGENC_MBC_TO_CODE(enc, up, ue);
637 OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c);
639 return ONIGENC_CODE_TO_MBCLEN(enc, uc);
661 OnigCodePoint uc = RBIMPL_CAST((OnigCodePoint)c);
662 OnigUChar *ubuf = RBIMPL_CAST((OnigUChar *)buf);
664 return ONIGENC_CODE_TO_MBC(enc, uc, ubuf);
678rb_enc_prev_char(
const char *s,
const char *p,
const char *e,
rb_encoding *enc)
680 const OnigUChar *us = RBIMPL_CAST((
const OnigUChar *)s);
681 const OnigUChar *up = RBIMPL_CAST((
const OnigUChar *)p);
682 const OnigUChar *ue = RBIMPL_CAST((
const OnigUChar *)e);
683 OnigUChar *ur = onigenc_get_prev_char_head(enc, us, up, ue);
685 return RBIMPL_CAST((
char *)ur);
699rb_enc_left_char_head(
const char *s,
const char *p,
const char *e,
rb_encoding *enc)
701 const OnigUChar *us = RBIMPL_CAST((
const OnigUChar *)s);
702 const OnigUChar *up = RBIMPL_CAST((
const OnigUChar *)p);
703 const OnigUChar *ue = RBIMPL_CAST((
const OnigUChar *)e);
704 OnigUChar *ur = onigenc_get_left_adjust_char_head(enc, us, up, ue);
706 return RBIMPL_CAST((
char *)ur);
720rb_enc_right_char_head(
const char *s,
const char *p,
const char *e,
rb_encoding *enc)
722 const OnigUChar *us = RBIMPL_CAST((
const OnigUChar *)s);
723 const OnigUChar *up = RBIMPL_CAST((
const OnigUChar *)p);
724 const OnigUChar *ue = RBIMPL_CAST((
const OnigUChar *)e);
725 OnigUChar *ur = onigenc_get_right_adjust_char_head(enc, us, up, ue);
727 return RBIMPL_CAST((
char *)ur);
742rb_enc_step_back(
const char *s,
const char *p,
const char *e,
int n,
rb_encoding *enc)
744 const OnigUChar *us = RBIMPL_CAST((
const OnigUChar *)s);
745 const OnigUChar *up = RBIMPL_CAST((
const OnigUChar *)p);
746 const OnigUChar *ue = RBIMPL_CAST((
const OnigUChar *)e);
747 const OnigUChar *ur = onigenc_step_back(enc, us, up, ue, n);
749 return RBIMPL_CAST((
char *)ur);
786 if (rb_enc_mbminlen(enc) != 1) {
809 return rb_enc_asciicompat(enc);
918#ifndef rb_ascii8bit_encindex
948#ifndef rb_utf8_encindex
959#ifndef rb_usascii_encindex
1040#define RB_ENCODING_GET RB_ENCODING_GET
1041#define RB_ENCODING_GET_INLINED RB_ENCODING_GET_INLINED
1042#define RB_ENCODING_IS_ASCII8BIT RB_ENCODING_IS_ASCII8BIT
1043#define RB_ENCODING_SET RB_ENCODING_SET
1044#define RB_ENCODING_SET_INLINED RB_ENCODING_SET_INLINED
1045#define rb_enc_asciicompat rb_enc_asciicompat
1046#define rb_enc_code_to_mbclen rb_enc_code_to_mbclen
1047#define rb_enc_codepoint rb_enc_codepoint
1048#define rb_enc_left_char_head rb_enc_left_char_head
1049#define rb_enc_mbc_to_codepoint rb_enc_mbc_to_codepoint
1050#define rb_enc_mbcput rb_enc_mbcput
1051#define rb_enc_mbmaxlen rb_enc_mbmaxlen
1052#define rb_enc_mbminlen rb_enc_mbminlen
1053#define rb_enc_name rb_enc_name
1054#define rb_enc_prev_char rb_enc_prev_char
1055#define rb_enc_right_char_head rb_enc_right_char_head
1056#define rb_enc_step_back rb_enc_step_back
1057#define rb_enc_str_asciicompat_p rb_enc_str_asciicompat_p
ruby_coderange_type
What rb_enc_str_coderange() returns.
Defines RBIMPL_ATTR_CONST.
#define RBIMPL_ATTR_CONST()
Wraps (or simulates) __attribute__((const))
Defines RBIMPL_ATTR_DEPRECATED.
Tweaking visibility of C variables/functions.
#define RUBY_EXTERN
Declaration of externally visible global variables.
#define RBIMPL_SYMBOL_EXPORT_END()
Counterpart of RBIMPL_SYMBOL_EXPORT_BEGIN.
#define RBIMPL_SYMBOL_EXPORT_BEGIN()
Shortcut macro equivalent to RUBY_SYMBOL_EXPORT_BEGIN extern "C" {.
Defines enum ruby_fl_type.
@ RUBY_FL_USHIFT
Number of bits in ruby_fl_type that are not open to users.
VALUE rb_cEncoding
Encoding class.
int rb_enc_dummy_p(rb_encoding *enc)
Queries if the passed encoding is dummy.
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
int rb_enc_get_index(VALUE obj)
Queries the index of the encoding of the passed object, if any.
int rb_to_encoding_index(VALUE obj)
Obtains a encoding index from a wider range of objects (than rb_enc_find_index()).
int rb_filesystem_encindex(void)
Identical to rb_filesystem_encoding(), except it returns the encoding's index instead of the encoding...
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Identical to rb_enc_associate_index(), except it takes an encoding itself instead of its index.
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
int rb_enc_codelen(int code, rb_encoding *enc)
Queries the number of bytes requested to represent the passed code point using the passed encoding.
rb_encoding * rb_to_encoding(VALUE obj)
Identical to rb_find_encoding(), except it raises an exception instead of returning NULL.
const OnigEncodingType rb_encoding
The type of encoding.
rb_encoding * rb_filesystem_encoding(void)
Queries the "filesystem" encoding.
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
void rb_enc_copy(VALUE dst, VALUE src)
Destructively copies the encoding of the latter object to that of former one.
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Identical to rb_enc_mbclen() unless the character at p overruns e.
rb_encoding * rb_enc_get(VALUE obj)
Identical to rb_enc_get_index(), except the return type.
rb_encoding * rb_enc_from_index(int idx)
Identical to rb_find_encoding(), except it takes an encoding index instead of a Ruby object.
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_enc_unicode_p(rb_encoding *enc)
Queries if the passed encoding is either one of UTF-8/16/32.
int rb_enc_to_index(rb_encoding *enc)
Queries the index of the encoding.
void rb_enc_set_index(VALUE obj, int encindex)
Destructively assigns an encoding (via its index) to an object.
VALUE rb_locale_charmap(VALUE klass)
Returns a platform-depended "charmap" of the current locale.
void rb_enc_set_default_internal(VALUE encoding)
Destructively assigns the passed encoding as the default internal encoding.
VALUE rb_enc_default_external(void)
Identical to rb_default_external_encoding(), except it returns the Ruby-level counterpart instance of...
rb_encoding * rb_enc_find(const char *name)
Identical to rb_find_encoding(), except it takes a C's string instead of Ruby's.
VALUE rb_enc_from_encoding(rb_encoding *enc)
Queries the Ruby-level counterpart instance of rb_cEncoding that corresponds to the passed encoding.
rb_encoding * rb_find_encoding(VALUE obj)
Identical to rb_to_encoding_index(), except the return type.
int rb_define_dummy_encoding(const char *name)
Creates a new "dummy" encoding.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
int rb_locale_encindex(void)
Identical to rb_locale_encoding(), except it returns the encoding's index instead of the encoding its...
int rb_char_to_option_kcode(int c, int *option, int *kcode)
Converts a character option to its encoding.
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Identical to rb_enc_compatible(), except it raises an exception instead of returning NULL.
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Queries the number of bytes of the character at the passed pointer.
int rb_enc_capable(VALUE obj)
Queries if the passed object can have its encoding.
static void RB_ENCODING_CODERANGE_SET(VALUE obj, int encindex, enum ruby_coderange_type cr)
This is RB_ENCODING_SET + RB_ENC_CODERANGE_SET combo.
VALUE rb_enc_default_internal(void)
Identical to rb_default_internal_encoding(), except it returns the Ruby-level counterpart instance of...
VALUE rb_enc_associate_index(VALUE obj, int encindex)
Identical to rb_enc_set_index(), except it additionally does contents fix-up depending on the passed ...
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Look for the "common" encoding between the two.
int rb_enc_replicate(const char *name, rb_encoding *src)
Creates a new encoding, using the passed one as a template.
rb_encoding * rb_locale_encoding(void)
Queries the encoding that represents the current locale.
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
void rb_enc_set_default_external(VALUE encoding)
Destructively assigns the passed encoding as the default external encoding.
int rb_enc_find_index(const char *name)
Queries the index of the encoding.
int rb_enc_alias(const char *alias, const char *orig)
Registers an "alias" name.
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
Defines RBIMPL_ATTR_NOALIAS.
#define RBIMPL_ATTR_NOALIAS()
Wraps (or simulates) __declspec((noalias))
Defines RBIMPL_ATTR_PURE.
#define RBIMPL_ATTR_PURE()
Wraps (or simulates) __attribute__((pure))
Defines RBIMPL_ATTR_RETURNS_NONNULL.
#define RBIMPL_ATTR_RETURNS_NONNULL()
Wraps (or simulates) __attribute__((returns_nonnull))
uintptr_t VALUE
Type that represents a Ruby object.