12#include "ruby/internal/config.h"
19#include "internal/encoding.h"
20#include "internal/hash.h"
21#include "internal/imemo.h"
22#include "internal/re.h"
23#include "internal/string.h"
24#include "internal/object.h"
25#include "internal/ractor.h"
26#include "internal/variable.h"
34typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
35#define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
37#define BEG(no) (regs->beg[(no)])
38#define END(no) (regs->end[(no)])
41static const char casetable[] = {
42 '\000',
'\001',
'\002',
'\003',
'\004',
'\005',
'\006',
'\007',
43 '\010',
'\011',
'\012',
'\013',
'\014',
'\015',
'\016',
'\017',
44 '\020',
'\021',
'\022',
'\023',
'\024',
'\025',
'\026',
'\027',
45 '\030',
'\031',
'\032',
'\033',
'\034',
'\035',
'\036',
'\037',
47 '\040',
'\041',
'\042',
'\043',
'\044',
'\045',
'\046',
'\047',
49 '\050',
'\051',
'\052',
'\053',
'\054',
'\055',
'\056',
'\057',
51 '\060',
'\061',
'\062',
'\063',
'\064',
'\065',
'\066',
'\067',
53 '\070',
'\071',
'\072',
'\073',
'\074',
'\075',
'\076',
'\077',
55 '\100',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
57 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
59 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
61 '\170',
'\171',
'\172',
'\133',
'\134',
'\135',
'\136',
'\137',
63 '\140',
'\141',
'\142',
'\143',
'\144',
'\145',
'\146',
'\147',
65 '\150',
'\151',
'\152',
'\153',
'\154',
'\155',
'\156',
'\157',
67 '\160',
'\161',
'\162',
'\163',
'\164',
'\165',
'\166',
'\167',
69 '\170',
'\171',
'\172',
'\173',
'\174',
'\175',
'\176',
'\177',
70 '\200',
'\201',
'\202',
'\203',
'\204',
'\205',
'\206',
'\207',
71 '\210',
'\211',
'\212',
'\213',
'\214',
'\215',
'\216',
'\217',
72 '\220',
'\221',
'\222',
'\223',
'\224',
'\225',
'\226',
'\227',
73 '\230',
'\231',
'\232',
'\233',
'\234',
'\235',
'\236',
'\237',
74 '\240',
'\241',
'\242',
'\243',
'\244',
'\245',
'\246',
'\247',
75 '\250',
'\251',
'\252',
'\253',
'\254',
'\255',
'\256',
'\257',
76 '\260',
'\261',
'\262',
'\263',
'\264',
'\265',
'\266',
'\267',
77 '\270',
'\271',
'\272',
'\273',
'\274',
'\275',
'\276',
'\277',
78 '\300',
'\301',
'\302',
'\303',
'\304',
'\305',
'\306',
'\307',
79 '\310',
'\311',
'\312',
'\313',
'\314',
'\315',
'\316',
'\317',
80 '\320',
'\321',
'\322',
'\323',
'\324',
'\325',
'\326',
'\327',
81 '\330',
'\331',
'\332',
'\333',
'\334',
'\335',
'\336',
'\337',
82 '\340',
'\341',
'\342',
'\343',
'\344',
'\345',
'\346',
'\347',
83 '\350',
'\351',
'\352',
'\353',
'\354',
'\355',
'\356',
'\357',
84 '\360',
'\361',
'\362',
'\363',
'\364',
'\365',
'\366',
'\367',
85 '\370',
'\371',
'\372',
'\373',
'\374',
'\375',
'\376',
'\377',
88# error >>> "You lose. You will need a translation table for your character set." <<<
94 const unsigned char *p1 = x, *p2 = y;
98 if ((tmp = casetable[(
unsigned)*p1++] - casetable[(
unsigned)*p2++]))
106rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
108 const unsigned char *y;
110 if ((y = memmem(ys, n, xs, m)) != NULL)
117rb_memsearch_ss(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
119 const unsigned char *x = xs, *xe = xs + m;
120 const unsigned char *y = ys, *ye = ys + n;
121#define VALUE_MAX ((VALUE)~(VALUE)0)
125 rb_bug(
"!!too long pattern string!!");
127 if (!(y = memchr(y, *x, n - m + 1)))
131 for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
151rb_memsearch_qs(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
153 const unsigned char *x = xs, *xe = xs + m;
154 const unsigned char *y = ys;
155 VALUE i, qstable[256];
158 for (i = 0; i < 256; ++i)
161 qstable[*x] = xe - x;
163 for (; y + m <= ys + n; y += *(qstable + y[m])) {
164 if (*xs == *y && memcmp(xs, y, m) == 0)
170static inline unsigned int
171rb_memsearch_qs_utf8_hash(
const unsigned char *x)
173 register const unsigned int mix = 8353;
174 register unsigned int h = *x;
199 return (
unsigned char)h;
203rb_memsearch_qs_utf8(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
205 const unsigned char *x = xs, *xe = xs + m;
206 const unsigned char *y = ys;
207 VALUE i, qstable[512];
210 for (i = 0; i < 512; ++i) {
213 for (; x < xe; ++x) {
214 qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
217 for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
218 if (*xs == *y && memcmp(xs, y, m) == 0)
225rb_memsearch_with_char_size(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n,
int char_size)
227 const unsigned char *x = xs, x0 = *xs, *y = ys;
229 for (n -= m; n >= 0; n -= char_size, y += char_size) {
230 if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
237rb_memsearch_wchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
239 return rb_memsearch_with_char_size(xs, m, ys, n, 2);
243rb_memsearch_qchar(
const unsigned char *xs,
long m,
const unsigned char *ys,
long n)
245 return rb_memsearch_with_char_size(xs, m, ys, n, 4);
251 const unsigned char *x = x0, *y = y0;
253 if (m > n)
return -1;
255 return memcmp(x0, y0, m) == 0 ? 0 : -1;
261 const unsigned char *ys = memchr(y, *x, n);
268 else if (LIKELY(rb_enc_mbminlen(enc) == 1)) {
270 return rb_memsearch_ss(x0, m, y0, n);
272 else if (enc == rb_utf8_encoding()){
273 return rb_memsearch_qs_utf8(x0, m, y0, n);
276 else if (LIKELY(rb_enc_mbminlen(enc) == 2)) {
277 return rb_memsearch_wchar(x0, m, y0, n);
279 else if (LIKELY(rb_enc_mbminlen(enc) == 4)) {
280 return rb_memsearch_qchar(x0, m, y0, n);
282 return rb_memsearch_qs(x0, m, y0, n);
285#define REG_LITERAL FL_USER5
286#define REG_ENCODING_NONE FL_USER6
288#define KCODE_FIXED FL_USER4
290#define ARG_REG_OPTION_MASK \
291 (ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
292#define ARG_ENCODING_FIXED 16
293#define ARG_ENCODING_NONE 32
302 val = ONIG_OPTION_IGNORECASE;
305 val = ONIG_OPTION_EXTEND;
308 val = ONIG_OPTION_MULTILINE;
317enum { OPTBUF_SIZE = 4 };
320option_to_str(
char str[OPTBUF_SIZE],
int options)
323 if (options & ONIG_OPTION_MULTILINE) *p++ =
'm';
324 if (options & ONIG_OPTION_IGNORECASE) *p++ =
'i';
325 if (options & ONIG_OPTION_EXTEND) *p++ =
'x';
337 *kcode = rb_ascii8bit_encindex();
338 return (*option = ARG_ENCODING_NONE);
340 *kcode = ENCINDEX_EUC_JP;
343 *kcode = ENCINDEX_Windows_31J;
346 *kcode = rb_utf8_encindex();
350 return (*option = char_to_option(c));
352 *option = ARG_ENCODING_FIXED;
357rb_reg_check(
VALUE re)
359 if (!
RREGEXP_PTR(re) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
365rb_reg_expr_str(
VALUE str,
const char *s,
long len,
368 const char *p, *pend;
373 p = s; pend = p + len;
377 c = rb_enc_ascget(p, pend, &clen, enc);
380 p += mbclen(p, pend, enc);
387 else if (c != term && rb_enc_isprint(c, enc)) {
404 int unicode_p = rb_enc_unicode_p(enc);
407 c = rb_enc_ascget(p, pend, &clen, enc);
408 if (c ==
'\\' && p+clen < pend) {
409 int n = clen + mbclen(p+clen, pend, enc);
415 clen = rb_enc_precise_mbclen(p, pend, enc);
417 c = (
unsigned char)*p;
422 unsigned int c = rb_enc_mbc_to_codepoint(p, pend, enc);
423 rb_str_buf_cat_escaped_char(str, c, unicode_p);
430 else if (c == term) {
435 else if (rb_enc_isprint(c, enc)) {
438 else if (!rb_enc_isspace(c, enc)) {
442 snprintf(b,
sizeof(b),
"\\x%02X", c);
454rb_reg_desc(
const char *s,
long len,
VALUE re)
458 rb_encoding *resenc = rb_default_internal_encoding();
459 if (resenc == NULL) resenc = rb_default_external_encoding();
461 if (re && rb_enc_asciicompat(enc)) {
462 rb_enc_copy(str, re);
465 rb_enc_associate(str, rb_usascii_encoding());
467 rb_reg_expr_str(str, s, len, enc, resenc,
'/');
470 char opts[OPTBUF_SIZE];
472 if (*option_to_str(opts,
RREGEXP_PTR(re)->options))
474 if (
RBASIC(re)->flags & REG_ENCODING_NONE)
500rb_reg_source(
VALUE re)
521rb_reg_inspect(
VALUE re)
523 if (!
RREGEXP_PTR(re) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
526 return rb_reg_desc(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), re);
529static VALUE rb_reg_str_with_term(
VALUE re,
int term);
561 return rb_reg_str_with_term(re,
'/');
565rb_reg_str_with_term(
VALUE re,
int term)
568 const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
572 char optbuf[OPTBUF_SIZE + 1];
577 rb_enc_copy(str, re);
579 ptr = (UChar*)RREGEXP_SRC_PTR(re);
580 len = RREGEXP_SRC_LEN(re);
582 if (len >= 4 && ptr[0] ==
'(' && ptr[1] ==
'?') {
585 if ((len -= 2) > 0) {
587 opt = char_to_option((
int )*ptr);
597 if (len > 1 && *ptr ==
'-') {
601 opt = char_to_option((
int )*ptr);
616 if (*ptr ==
':' && ptr[len-1] ==
')') {
623 err = onig_new(&rp, ptr, ptr + len, options,
624 enc, OnigDefaultSyntax, NULL);
630 ptr = (UChar*)RREGEXP_SRC_PTR(re);
631 len = RREGEXP_SRC_LEN(re);
637 if ((options & embeddable) != embeddable) {
639 option_to_str(optbuf + 1, ~options);
644 if (rb_enc_asciicompat(enc)) {
645 rb_reg_expr_str(str, (
char*)ptr, len, enc, NULL, term);
653 rb_enc_associate(str, rb_usascii_encoding());
657 s = RSTRING_PTR(str);
658 e = RSTRING_END(str);
659 s = rb_enc_left_char_head(s, e-1, e, enc);
665 rb_reg_expr_str(str, (
char*)ptr, len, enc, NULL, term);
668 rb_enc_copy(str, re);
673NORETURN(
static void rb_reg_raise(
const char *s,
long len,
const char *err,
VALUE re));
676rb_reg_raise(
const char *s,
long len,
const char *err,
VALUE re)
678 VALUE desc = rb_reg_desc(s, len, re);
684rb_enc_reg_error_desc(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
686 char opts[OPTBUF_SIZE + 1];
688 rb_encoding *resenc = rb_default_internal_encoding();
689 if (resenc == NULL) resenc = rb_default_external_encoding();
691 rb_enc_associate(desc, enc);
693 rb_reg_expr_str(desc, s, len, enc, resenc,
'/');
695 option_to_str(opts + 1, options);
700NORETURN(
static void rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err));
703rb_enc_reg_raise(
const char *s,
long len,
rb_encoding *enc,
int options,
const char *err)
705 rb_exc_raise(rb_enc_reg_error_desc(s, len, enc, options, err));
709rb_reg_error_desc(
VALUE str,
int options,
const char *err)
711 return rb_enc_reg_error_desc(RSTRING_PTR(str), RSTRING_LEN(str),
712 rb_enc_get(str), options, err);
715NORETURN(
static void rb_reg_raise_str(
VALUE str,
int options,
const char *err));
718rb_reg_raise_str(
VALUE str,
int options,
const char *err)
738rb_reg_casefold_p(
VALUE re)
741 return RBOOL(
RREGEXP_PTR(re)->options & ONIG_OPTION_IGNORECASE);
783rb_reg_options_m(
VALUE re)
790reg_names_iter(
const OnigUChar *name,
const OnigUChar *name_end,
791 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
794 rb_ary_push(ary, rb_enc_str_new((
const char *)name, name_end-name, regex->enc));
812rb_reg_names(
VALUE re)
816 ary = rb_ary_new_capa(onig_number_of_names(
RREGEXP_PTR(re)));
817 onig_foreach_name(
RREGEXP_PTR(re), reg_names_iter, (
void*)ary);
822reg_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
823 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
829 for (i = 0; i < back_num; i++)
830 rb_ary_store(ary, i,
INT2NUM(back_refs[i]));
832 rb_hash_aset(hash,
rb_str_new((
const char*)name, name_end-name),ary);
856rb_reg_named_captures(
VALUE re)
859 VALUE hash = rb_hash_new_with_size(onig_number_of_names(reg));
860 onig_foreach_name(reg, reg_named_captures_iter, (
void*)hash);
865onig_new_with_source(
regex_t** reg,
const UChar* pattern,
const UChar* pattern_end,
867 OnigErrorInfo* einfo,
const char *sourcefile,
int sourceline)
872 if (IS_NULL(*reg))
return ONIGERR_MEMORY;
874 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
877 r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
887make_regexp(
const char *s,
long len,
rb_encoding *enc,
int flags, onig_errmsg_buffer err,
888 const char *sourcefile,
int sourceline)
901 r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s + len), flags,
902 enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
904 onig_error_code_to_str((UChar*)err, r, &einfo);
963match_alloc(
VALUE klass)
979 if (to->allocated)
return 0;
982 if (to->allocated)
return 0;
983 return ONIGERR_MEMORY;
992pair_byte_cmp(
const void *pair1,
const void *pair2)
994 long diff = ((
pair_t*)pair1)->byte_pos - ((
pair_t*)pair2)->byte_pos;
995#if SIZEOF_LONG > SIZEOF_INT
996 return diff ? diff > 0 ? 1 : -1 : 0;
1003update_char_offset(
VALUE match)
1007 int i, num_regs, num_pos;
1017 num_regs = rm->
regs.num_regs;
1024 enc = rb_enc_get(
RMATCH(match)->str);
1025 if (rb_enc_mbmaxlen(enc) == 1) {
1026 for (i = 0; i < num_regs; i++) {
1035 for (i = 0; i < num_regs; i++) {
1038 pairs[num_pos++].byte_pos = BEG(i);
1039 pairs[num_pos++].byte_pos = END(i);
1041 qsort(pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1043 s = p = RSTRING_PTR(
RMATCH(match)->str);
1045 for (i = 0; i < num_pos; i++) {
1046 q = s + pairs[i].byte_pos;
1048 pairs[i].char_pos = c;
1052 for (i = 0; i < num_regs; i++) {
1060 key.byte_pos = BEG(i);
1061 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1064 key.byte_pos = END(i);
1065 found = bsearch(&key, pairs, num_pos,
sizeof(
pair_t), pair_byte_cmp);
1071match_check(
VALUE match)
1073 if (!
RMATCH(match)->regexp) {
1090 rm =
RMATCH(obj)->rmatch;
1120match_regexp(
VALUE match)
1124 regexp =
RMATCH(match)->regexp;
1125 if (
NIL_P(regexp)) {
1128 RMATCH(match)->regexp = regexp;
1155match_names(
VALUE match)
1159 return rb_ary_new_capa(0);
1160 return rb_reg_names(
RMATCH(match)->regexp);
1178match_size(
VALUE match)
1184static int name_to_backref_number(
struct re_registers *,
VALUE,
const char*,
const char*);
1185NORETURN(
static void name_to_backref_error(
VALUE name));
1188name_to_backref_error(
VALUE name)
1197 if (i < 0 || regs->num_regs <= i)
1202match_backref_number(
VALUE match,
VALUE backref)
1214 else if (!RB_TYPE_P(backref,
T_STRING)) {
1219 num = name_to_backref_number(regs, regexp, name, name + RSTRING_LEN(backref));
1222 name_to_backref_error(backref);
1231 return match_backref_number(match, backref);
1246 int i = match_backref_number(match, n);
1250 backref_number_check(regs, i);
1255 update_char_offset(match);
1281 int i = match_backref_number(match, n);
1285 backref_number_check(regs, i);
1305 int i = match_backref_number(match, n);
1309 backref_number_check(regs, i);
1314 update_char_offset(match);
1331 int i = match_backref_number(match, n);
1335 backref_number_check(regs, i);
1340 update_char_offset(match);
1373 int i = match_backref_number(match, n);
1376 backref_number_check(regs, i);
1378 long start = BEG(i), end = END(i);
1417 int i = match_backref_number(match, n);
1421 backref_number_check(regs, i);
1426 update_char_offset(match);
1428 &
RMATCH(match)->rmatch->char_offset[i];
1432#define MATCH_BUSY FL_USER2
1437 FL_SET(match, MATCH_BUSY);
1441rb_match_unbusy(
VALUE match)
1447rb_match_count(
VALUE match)
1450 if (
NIL_P(match))
return -1;
1452 if (!regs)
return -1;
1453 return regs->num_regs;
1457rb_match_nth_defined(
int nth,
VALUE match)
1460 if (
NIL_P(match))
return FALSE;
1462 if (!regs)
return FALSE;
1463 if (nth >= regs->num_regs) {
1467 nth += regs->num_regs;
1468 if (nth <= 0)
return FALSE;
1470 return (BEG(nth) != -1);
1474match_set_string(
VALUE m,
VALUE string,
long pos,
long len)
1479 match->
str = string;
1481 int err = onig_region_resize(&
rmatch->
regs, 1);
1482 if (err) rb_memerror();
1488rb_backref_set_string(
VALUE string,
long pos,
long len)
1494 match_set_string(match,
string, pos, len);
1528rb_reg_fixed_encoding_p(
VALUE re)
1530 return RBOOL(
FL_TEST(re, KCODE_FIXED));
1534rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
1535 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options);
1537NORETURN(
static void reg_enc_error(
VALUE re,
VALUE str));
1543 "incompatible encoding regexp match (%s regexp with %s string)",
1544 rb_enc_name(rb_enc_get(re)),
1545 rb_enc_name(rb_enc_get(str)));
1549str_coderange(
VALUE str)
1559rb_reg_prepare_enc(
VALUE re,
VALUE str,
int warn)
1562 int cr = str_coderange(str);
1566 "invalid byte sequence in %s",
1567 rb_enc_name(rb_enc_get(str)));
1571 enc = rb_enc_get(str);
1578 else if (!rb_enc_asciicompat(enc)) {
1579 reg_enc_error(re, str);
1581 else if (rb_reg_fixed_encoding_p(re)) {
1584 reg_enc_error(re, str);
1588 else if (warn && (
RBASIC(re)->flags & REG_ENCODING_NONE) &&
1589 enc != rb_ascii8bit_encoding() &&
1591 rb_warn(
"historical binary regexp match /.../n against %s string",
1598rb_reg_prepare_re0(
VALUE re,
VALUE str, onig_errmsg_buffer err)
1603 const char *pattern;
1606 rb_encoding *enc = rb_reg_prepare_enc(re, str, 1);
1608 if (reg->enc == enc)
return reg;
1612 pattern = RREGEXP_SRC_PTR(re);
1614 unescaped = rb_reg_preprocess(
1615 pattern, pattern + RREGEXP_SRC_LEN(re), enc,
1616 &fixed_enc, err, 0);
1618 if (
NIL_P(unescaped)) {
1623 rb_hrtime_t timelimit = reg->timelimit;
1628 r = onig_new(®, (UChar *)ptr, (UChar *)(ptr + len),
1630 OnigDefaultSyntax, &einfo);
1632 onig_error_code_to_str((UChar*)err, r, &einfo);
1633 rb_reg_raise(pattern, RREGEXP_SRC_LEN(re), err, re);
1636 reg->timelimit = timelimit;
1645 onig_errmsg_buffer err =
"";
1646 return rb_reg_prepare_re0(re, str, err);
1656 enc = rb_reg_prepare_enc(re, str, 0);
1662 range = RSTRING_LEN(str) - pos;
1665 if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(str)) {
1666 string = (UChar*)RSTRING_PTR(str);
1669 p = onigenc_get_right_adjust_char_head(enc,
string,
string + pos,
string + RSTRING_LEN(str));
1672 p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,
string,
string + pos,
string + RSTRING_LEN(str));
1682rb_reg_search_set_match(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str,
VALUE *set_match)
1687 char *start, *range;
1691 onig_errmsg_buffer err =
"";
1695 if (pos > len || pos < 0) {
1700 reg = rb_reg_prepare_re0(re, str, err);
1702 if (!tmpreg)
RREGEXP(re)->usecnt++;
1708 result = onig_search(reg,
1710 ((UChar*)(start + len)),
1711 ((UChar*)(start + pos)),
1713 regs, ONIG_OPTION_NONE);
1714 if (!tmpreg)
RREGEXP(re)->usecnt--;
1726 onig_region_free(regs, 0);
1727 if (result == ONIG_MISMATCH) {
1732 onig_error_code_to_str((UChar*)err, (
int)result);
1733 rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
1740 if (set_backref_str) {
1751 RMATCH(match)->regexp = re;
1753 if (set_match) *set_match = match;
1759rb_reg_search0(
VALUE re,
VALUE str,
long pos,
int reverse,
int set_backref_str)
1761 return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
1767 return rb_reg_search0(re, str, pos, reverse, 1);
1778 onig_errmsg_buffer err =
"";
1780 reg = rb_reg_prepare_re0(re, str, err);
1782 if (!tmpreg)
RREGEXP(re)->usecnt++;
1785 if (!
NIL_P(match)) {
1786 if (
FL_TEST(match, MATCH_BUSY)) {
1799 result = onig_match(reg,
1801 ((UChar*)(ptr + len)),
1803 regs, ONIG_OPTION_NONE);
1804 if (!tmpreg)
RREGEXP(re)->usecnt--;
1816 onig_region_free(regs, 0);
1817 if (result == ONIG_MISMATCH) {
1822 onig_error_code_to_str((UChar*)err, (
int)result);
1823 rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
1831 onig_region_free(regs, 0);
1832 if (err) rb_memerror();
1837 RMATCH(match)->regexp = re;
1850 if (nth >= regs->num_regs) {
1854 nth += regs->num_regs;
1855 if (nth <= 0)
return Qnil;
1857 return RBOOL(BEG(nth) != -1);
1864 long start, end, len;
1870 if (nth >= regs->num_regs) {
1874 nth += regs->num_regs;
1875 if (nth <= 0)
return Qnil;
1878 if (start == -1)
return Qnil;
1918 if (BEG(0) == -1)
return Qnil;
1952 if (BEG(0) == -1)
return Qnil;
1953 str =
RMATCH(match)->str;
1968 if (BEG(0) == -1)
return Qnil;
1970 for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
1972 if (i == 0)
return Qnil;
1977last_match_getter(
ID _x,
VALUE *_y)
1983prematch_getter(
ID _x,
VALUE *_y)
1989postmatch_getter(
ID _x,
VALUE *_y)
1995last_paren_match_getter(
ID _x,
VALUE *_y)
2001match_array(
VALUE match,
int start)
2011 target =
RMATCH(match)->str;
2013 for (i=start; i<regs->num_regs; i++) {
2014 if (regs->beg[i] == -1) {
2015 rb_ary_push(ary,
Qnil);
2019 rb_ary_push(ary, str);
2041match_to_a(
VALUE match)
2043 return match_array(match, 0);
2063match_captures(
VALUE match)
2065 return match_array(match, 1);
2069name_to_backref_number(
struct re_registers *regs,
VALUE regexp,
const char* name,
const char* name_end)
2071 if (
NIL_P(regexp))
return -1;
2072 return onig_name_to_backref_number(
RREGEXP_PTR(regexp),
2073 (
const unsigned char *)name, (
const unsigned char *)name_end, regs);
2076#define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
2078 !rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
2079 name_to_backref_number((regs), (re), (name_ptr), (name_end)))
2089 else if (!RB_TYPE_P(name,
T_STRING)) {
2092 num = NAME_TO_NUMBER(regs, re, name,
2093 RSTRING_PTR(name), RSTRING_END(name));
2095 name_to_backref_error(name);
2101match_ary_subseq(
VALUE match,
long beg,
long len,
VALUE result)
2104 long j, end = olen < beg+len ? olen : beg+len;
2105 if (
NIL_P(result)) result = rb_ary_new_capa(len);
2106 if (len == 0)
return result;
2108 for (j = beg; j < end; j++) {
2111 if (beg + len > j) {
2112 rb_ary_resize(result,
RARRAY_LEN(result) + (beg + len) - j);
2132 return match_ary_subseq(match, beg, len, result);
2164match_aref(
int argc,
VALUE *argv,
VALUE match)
2169 rb_scan_args(argc, argv,
"11", &idx, &length);
2171 if (
NIL_P(length)) {
2176 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, idx);
2181 return match_ary_aref(match, idx,
Qnil);
2194 if (beg < 0)
return Qnil;
2196 else if (beg > num_regs) {
2199 if (beg+len > num_regs) {
2200 len = num_regs - beg;
2202 return match_ary_subseq(match, beg, len,
Qnil);
2233match_values_at(
int argc,
VALUE *argv,
VALUE match)
2241 for (i=0; i<argc; i++) {
2246 int num = namev_to_backref_number(
RMATCH_REGS(match),
RMATCH(match)->regexp, argv[i]);
2251 match_ary_aref(match, argv[i], result);
2278match_to_s(
VALUE match)
2287match_named_captures_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2288 int back_num,
int *back_refs,
OnigRegex regex,
void *arg)
2290 struct MEMO *memo = MEMO_CAST(arg);
2291 VALUE hash = memo->v1;
2292 VALUE match = memo->v2;
2293 long symbolize = memo->u3.state;
2295 VALUE key = rb_enc_str_new((
const char *)name, name_end-name, regex->enc);
2297 if (symbolize > 0) {
2306 for (i = 0; i < back_num; i++) {
2309 rb_hash_aset(hash, key, value);
2315 rb_hash_aset(hash, key,
Qnil);
2347match_named_captures(
VALUE match)
2354 return rb_hash_new();
2356 hash = rb_hash_new();
2357 memo = MEMO_NEW(hash, match, 0);
2359 onig_foreach_name(
RREGEXP(
RMATCH(match)->regexp)->ptr, match_named_captures_iter, (
void*)memo);
2381match_deconstruct_keys(
VALUE match,
VALUE keys)
2389 return rb_hash_new_with_size(0);
2393 h = rb_hash_new_with_size(onig_number_of_names(
RREGEXP_PTR(
RMATCH(match)->regexp)));
2396 memo = MEMO_NEW(h, match, 1);
2398 onig_foreach_name(
RREGEXP_PTR(
RMATCH(match)->regexp), match_named_captures_iter, (
void*)memo);
2406 return rb_hash_new_with_size(0);
2420 RSTRING_PTR(name), RSTRING_END(name));
2447match_string(
VALUE match)
2450 return RMATCH(match)->str;
2459match_inspect_name_iter(
const OnigUChar *name,
const OnigUChar *name_end,
2460 int back_num,
int *back_refs,
OnigRegex regex,
void *arg0)
2465 for (i = 0; i < back_num; i++) {
2466 arg[back_refs[i]].name = name;
2467 arg[back_refs[i]].len = name_end - name;
2495match_inspect(
VALUE match)
2501 int num_regs = regs->num_regs;
2506 return rb_sprintf(
"#<%"PRIsVALUE
":%p>", cname, (
void*)match);
2508 else if (
NIL_P(regexp)) {
2509 return rb_sprintf(
"#<%"PRIsVALUE
": %"PRIsVALUE
">",
2517 match_inspect_name_iter, names);
2522 for (i = 0; i < num_regs; i++) {
2547read_escaped_byte(
const char **pp,
const char *end, onig_errmsg_buffer err)
2549 const char *p = *pp;
2551 int meta_prefix = 0, ctrl_prefix = 0;
2554 if (p == end || *p++ !=
'\\') {
2555 errcpy(err,
"too short escaped multibyte character");
2561 errcpy(err,
"too short escape sequence");
2565 case '\\': code =
'\\';
break;
2566 case 'n': code =
'\n';
break;
2567 case 't': code =
'\t';
break;
2568 case 'r': code =
'\r';
break;
2569 case 'f': code =
'\f';
break;
2570 case 'v': code =
'\013';
break;
2571 case 'a': code =
'\007';
break;
2572 case 'e': code =
'\033';
break;
2575 case '0':
case '1':
case '2':
case '3':
2576 case '4':
case '5':
case '6':
case '7':
2578 code =
scan_oct(p, end < p+3 ? end-p : 3, &len);
2583 code =
scan_hex(p, end < p+2 ? end-p : 2, &len);
2585 errcpy(err,
"invalid hex escape");
2593 errcpy(err,
"duplicate meta escape");
2597 if (p+1 < end && *p++ ==
'-' && (*p & 0x80) == 0) {
2607 errcpy(err,
"too short meta escape");
2611 if (p == end || *p++ !=
'-') {
2612 errcpy(err,
"too short control escape");
2617 errcpy(err,
"duplicate control escape");
2621 if (p < end && (*p & 0x80) == 0) {
2631 errcpy(err,
"too short control escape");
2635 errcpy(err,
"unexpected escape sequence");
2638 if (code < 0 || 0xff < code) {
2639 errcpy(err,
"invalid escape code");
2653unescape_escaped_nonascii(
const char **pp,
const char *end,
rb_encoding *enc,
2656 const char *p = *pp;
2657 int chmaxlen = rb_enc_mbmaxlen(enc);
2658 unsigned char *area =
ALLOCA_N(
unsigned char, chmaxlen);
2659 char *chbuf = (
char *)area;
2664 memset(chbuf, 0, chmaxlen);
2666 byte = read_escaped_byte(&p, end, err);
2671 area[chlen++] = byte;
2672 while (chlen < chmaxlen &&
2674 byte = read_escaped_byte(&p, end, err);
2678 area[chlen++] = byte;
2681 l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
2683 errcpy(err,
"invalid multibyte escape");
2686 if (1 < chlen || (area[0] & 0x80)) {
2691 else if (*encp != enc) {
2692 errcpy(err,
"escaped non ASCII character in UTF-8 regexp");
2698 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", area[0]&0xff);
2706check_unicode_range(
unsigned long code, onig_errmsg_buffer err)
2708 if ((0xd800 <= code && code <= 0xdfff) ||
2710 errcpy(err,
"invalid Unicode range");
2717append_utf8(
unsigned long uv,
2720 if (check_unicode_range(uv, err) != 0)
2724 snprintf(escbuf,
sizeof(escbuf),
"\\x%02X", (
int)uv);
2734 *encp = rb_utf8_encoding();
2735 else if (*encp != rb_utf8_encoding()) {
2736 errcpy(err,
"UTF-8 character in non UTF-8 regexp");
2744unescape_unicode_list(
const char **pp,
const char *end,
2747 const char *p = *pp;
2748 int has_unicode = 0;
2752 while (p < end &&
ISSPACE(*p)) p++;
2759 errcpy(err,
"invalid Unicode range");
2763 if (append_utf8(code, buf, encp, err) != 0)
2767 while (p < end &&
ISSPACE(*p)) p++;
2770 if (has_unicode == 0) {
2771 errcpy(err,
"invalid Unicode list");
2781unescape_unicode_bmp(
const char **pp,
const char *end,
2784 const char *p = *pp;
2789 errcpy(err,
"invalid Unicode escape");
2794 errcpy(err,
"invalid Unicode escape");
2797 if (append_utf8(code, buf, encp, err) != 0)
2804unescape_nonascii0(
const char **pp,
const char *end,
rb_encoding *enc,
2806 onig_errmsg_buffer err,
int options,
int recurse)
2808 const char *p = *pp;
2811 int in_char_class = 0;
2813 int extended_mode = options & ONIG_OPTION_EXTEND;
2817 int chlen = rb_enc_precise_mbclen(p, end, enc);
2820 errcpy(err,
"invalid multibyte character");
2824 if (1 < chlen || (*p & 0x80)) {
2830 else if (*encp != enc) {
2831 errcpy(err,
"non ASCII character in UTF-8 regexp");
2840 errcpy(err,
"too short escape sequence");
2843 chlen = rb_enc_precise_mbclen(p, end, enc);
2845 goto invalid_multibyte;
2854 case '1':
case '2':
case '3':
2855 case '4':
case '5':
case '6':
case '7':
2857 size_t len = end-(p-1), octlen;
2858 if (
ruby_scan_oct(p-1, len < 3 ? len : 3, &octlen) <= 0177) {
2874 if (rb_is_usascii_enc(enc)) {
2875 const char *pbeg = p;
2876 int byte = read_escaped_byte(&p, end, err);
2877 if (
byte == -1)
return -1;
2882 if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
2889 errcpy(err,
"too short escape sequence");
2895 if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
2897 if (p == end || *p++ !=
'}') {
2898 errcpy(err,
"invalid Unicode list");
2905 if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
2927 if (extended_mode && !in_char_class) {
2929 while ((p < end) && ((c = *p++) !=
'\n'));
2939 if (in_char_class) {
2946 if (!in_char_class && recurse) {
2947 if (--parens == 0) {
2954 if (!in_char_class && p + 1 < end && *p ==
'?') {
2955 if (*(p+1) ==
'#') {
2957 const char *orig_p = p;
2960 while (cont && (p < end)) {
2963 if (!(c & 0x80))
break;
2967 chlen = rb_enc_precise_mbclen(p, end, enc);
2969 goto invalid_multibyte;
2989 int local_extend = 0;
2996 for(s = p+1; s < end; s++) {
2999 local_extend = invert ? -1 : 1;
3006 if (local_extend == 0 ||
3007 (local_extend == -1 && !extended_mode) ||
3008 (local_extend == 1 && extended_mode)) {
3015 int local_options = options;
3016 if (local_extend == 1) {
3017 local_options |= ONIG_OPTION_EXTEND;
3019 local_options &= ~ONIG_OPTION_EXTEND;
3023 int ret = unescape_nonascii0(&p, end, enc, buf, encp,
3026 if (ret < 0)
return ret;
3030 extended_mode = local_extend == 1;
3046 }
else if (!in_char_class && recurse) {
3064unescape_nonascii(
const char *p,
const char *end,
rb_encoding *enc,
3066 onig_errmsg_buffer err,
int options)
3068 return unescape_nonascii0(&p, end, enc, buf, encp, has_property,
3073rb_reg_preprocess(
const char *p,
const char *end,
rb_encoding *enc,
3074 rb_encoding **fixed_enc, onig_errmsg_buffer err,
int options)
3077 int has_property = 0;
3081 if (rb_enc_asciicompat(enc))
3085 rb_enc_associate(buf, enc);
3088 if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err, options) != 0)
3091 if (has_property && !*fixed_enc) {
3096 rb_enc_associate(buf, *fixed_enc);
3103rb_reg_check_preprocess(
VALUE str)
3106 onig_errmsg_buffer err =
"";
3112 p = RSTRING_PTR(str);
3113 end = p + RSTRING_LEN(str);
3114 enc = rb_enc_get(str);
3116 buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err, 0);
3120 return rb_reg_error_desc(str, 0, err);
3126rb_reg_preprocess_dregexp(
VALUE ary,
int options)
3130 onig_errmsg_buffer err =
"";
3145 src_enc = rb_enc_get(str);
3146 if (options & ARG_ENCODING_NONE &&
3147 src_enc != ascii8bit) {
3151 src_enc = ascii8bit;
3155 p = RSTRING_PTR(str);
3156 end = p + RSTRING_LEN(str);
3158 buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err, options);
3163 if (fixed_enc != 0) {
3164 if (regexp_enc != 0 && regexp_enc != fixed_enc) {
3166 rb_enc_name(regexp_enc), rb_enc_name(fixed_enc));
3168 regexp_enc = fixed_enc;
3177 rb_enc_associate(result, regexp_enc);
3185 int options, onig_errmsg_buffer err,
3186 const char *sourcefile,
int sourceline)
3194 if (
FL_TEST(obj, REG_LITERAL))
3200 if (rb_enc_dummy_p(enc)) {
3201 errcpy(err,
"can't make regexp with dummy encoding");
3205 unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err, options);
3206 if (
NIL_P(unescaped))
3210 if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
3211 (fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
3212 errcpy(err,
"incompatible character encoding");
3215 if (fixed_enc != a_enc) {
3216 options |= ARG_ENCODING_FIXED;
3220 else if (!(options & ARG_ENCODING_FIXED)) {
3221 enc = rb_usascii_encoding();
3224 rb_enc_associate((
VALUE)re, enc);
3225 if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
3228 if (options & ARG_ENCODING_NONE) {
3232 re->
ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
3233 options & ARG_REG_OPTION_MASK, err,
3234 sourcefile, sourceline);
3235 if (!re->
ptr)
return -1;
3244 if (regenc != enc) {
3245 str = rb_enc_associate(
rb_str_dup(str), enc = regenc);
3251rb_reg_initialize_str(
VALUE obj,
VALUE str,
int options, onig_errmsg_buffer err,
3252 const char *sourcefile,
int sourceline)
3255 rb_encoding *str_enc = rb_enc_get(str), *enc = str_enc;
3256 if (options & ARG_ENCODING_NONE) {
3258 if (enc != ascii8bit) {
3260 errcpy(err,
"/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
3266 ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
3267 options, err, sourcefile, sourceline);
3268 if (ret == 0) reg_set_source(obj, str, str_enc);
3273rb_reg_s_alloc(
VALUE klass)
3293 return rb_reg_init_str(rb_reg_alloc(), s, options);
3297rb_reg_init_str(
VALUE re,
VALUE s,
int options)
3299 onig_errmsg_buffer err =
"";
3301 if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
3302 rb_reg_raise_str(s, options, err);
3311 onig_errmsg_buffer err =
"";
3313 if (rb_reg_initialize(re, RSTRING_PTR(s), RSTRING_LEN(s),
3314 enc, options, err, NULL, 0) != 0) {
3315 rb_reg_raise_str(s, options, err);
3317 reg_set_source(re, s, enc);
3322MJIT_FUNC_EXPORTED
VALUE
3323rb_reg_new_ary(
VALUE ary,
int opt)
3333 VALUE re = rb_reg_alloc();
3334 onig_errmsg_buffer err =
"";
3336 if (rb_reg_initialize(re, s, len, enc, options, err, NULL, 0) != 0) {
3337 rb_enc_reg_raise(s, len, enc, options, err);
3351rb_reg_compile(
VALUE str,
int options,
const char *sourcefile,
int sourceline)
3353 VALUE re = rb_reg_alloc();
3354 onig_errmsg_buffer err =
"";
3357 if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
3366static VALUE reg_cache;
3371 if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
3373 && memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
3379static st_index_t reg_hash(
VALUE re);
3391rb_reg_hash(
VALUE re)
3393 st_index_t hashval = reg_hash(re);
3428 if (re1 == re2)
return Qtrue;
3430 rb_reg_check(re1); rb_reg_check(re2);
3433 if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2))
return Qfalse;
3435 return RBOOL(memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0);
3450match_hash(
VALUE match)
3457 hashval =
rb_hash_uint(hashval, reg_hash(match_regexp(match)));
3483 if (match1 == match2)
return Qtrue;
3487 if (!rb_reg_equal(match_regexp(match1), match_regexp(match2)))
return Qfalse;
3490 if (regs1->num_regs != regs2->num_regs)
return Qfalse;
3491 if (memcmp(regs1->beg, regs2->beg, regs1->num_regs *
sizeof(*regs1->beg)))
return Qfalse;
3492 if (memcmp(regs1->end, regs2->end, regs1->num_regs *
sizeof(*regs1->end)))
return Qfalse;
3497reg_operand(
VALUE s,
int check)
3519 *strp = str = reg_operand(str, TRUE);
3530 return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
3592 long pos = reg_match_pos(re, &str, 0, NULL);
3593 if (pos < 0)
return Qnil;
3623 str = reg_operand(str, FALSE);
3629 return RBOOL(start >= 0);
3706rb_reg_match_m(
int argc,
VALUE *argv,
VALUE re)
3711 if (rb_scan_args(argc, argv,
"11", &str, &initpos) == 2) {
3718 pos = reg_match_pos(re, &str, pos, &result);
3747rb_reg_match_m_p(
int argc,
VALUE *argv,
VALUE re)
3749 long pos = rb_check_arity(argc, 1, 2) > 1 ?
NUM2LONG(argv[1]) : 0;
3750 return rb_reg_match_p(re, argv[0], pos);
3757 onig_errmsg_buffer err =
"";
3758 OnigPosition result;
3759 const UChar *start, *end;
3767 if (pos < 0)
return Qfalse;
3773 pos = beg - RSTRING_PTR(str);
3776 reg = rb_reg_prepare_re0(re, str, err);
3778 if (!tmpreg)
RREGEXP(re)->usecnt++;
3779 start = ((UChar*)RSTRING_PTR(str));
3780 end = start + RSTRING_LEN(str);
3781 result = onig_search(reg, start, end, start + pos, end,
3782 NULL, ONIG_OPTION_NONE);
3783 if (!tmpreg)
RREGEXP(re)->usecnt--;
3794 if (result == ONIG_MISMATCH) {
3798 onig_error_code_to_str((UChar*)err, (
int)result);
3799 rb_reg_raise(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re), err, re);
3812str_to_option(
VALUE str)
3818 if (
NIL_P(str))
return -1;
3820 for (
long i = 0; i < len; ++i) {
3821 int f = char_to_option(ptr[i]);
3831set_timeout(rb_hrtime_t *hrt,
VALUE timeout)
3833 double timeout_d =
NIL_P(timeout) ? 0.0 :
NUM2DBL(timeout);
3834 if (!
NIL_P(timeout) && timeout_d <= 0) {
3837 double2hrtime(hrt, timeout_d);
3849void rb_warn_deprecated_to_remove(
const char *removal,
const char *fmt,
const char *suggest, ...);
3906rb_reg_initialize_m(
int argc,
VALUE *argv,
VALUE self)
3910 reg_extract_args(argc, argv, &args);
3913 set_timeout(&
RREGEXP_PTR(self)->timelimit, args.timeout);
3926 argc = rb_scan_args(argc, argv,
"12:", &src, &opts, &n_flag, &kwargs);
3928 args->timeout =
Qnil;
3929 if (!
NIL_P(kwargs)) {
3930 static ID keywords[1];
3932 keywords[0] = rb_intern_const(
"timeout");
3938 rb_warn_deprecated_to_remove(
"3.3",
"3rd argument to Regexp.new",
"2nd argument");
3949 str = RREGEXP_SRC(re);
3952 if (!UNDEF_P(opts)) {
3955 else if ((f = str_to_option(opts)) >= 0) flags = f;
3956 else if (!
NIL_P(opts) && rb_bool_expected(opts,
"ignorecase", FALSE))
3957 flags = ONIG_OPTION_IGNORECASE;
3959 if (!NIL_OR_UNDEF_P(n_flag)) {
3961 if (kcode[0] ==
'n' || kcode[0] ==
'N') {
3962 enc = rb_ascii8bit_encoding();
3963 flags |= ARG_ENCODING_NONE;
3970 args->flags = flags;
3977 if (enc && rb_enc_get(str) != enc)
3978 rb_reg_init_str_enc(self, str, enc, flags);
3980 rb_reg_init_str(self, str, flags);
3993 s = RSTRING_PTR(str);
3994 send = s + RSTRING_LEN(str);
3996 c = rb_enc_ascget(s, send, &clen, enc);
3998 s += mbclen(s, send, enc);
4002 case '[':
case ']':
case '{':
case '}':
4003 case '(':
case ')':
case '|':
case '-':
4004 case '*':
case '.':
case '\\':
4005 case '?':
case '+':
case '^':
case '$':
4007 case '\t':
case '\f':
case '\v':
case '\n':
case '\r':
4014 rb_enc_associate(tmp, rb_usascii_encoding());
4021 rb_enc_associate(tmp, rb_usascii_encoding());
4024 rb_enc_copy(tmp, str);
4026 t = RSTRING_PTR(tmp);
4028 const char *p = RSTRING_PTR(str);
4029 memcpy(t, p, s - p);
4033 c = rb_enc_ascget(s, send, &clen, enc);
4035 int n = mbclen(s, send, enc);
4043 case '[':
case ']':
case '{':
case '}':
4044 case '(':
case ')':
case '|':
case '-':
4045 case '*':
case '.':
case '\\':
4046 case '?':
case '+':
case '^':
case '$':
4048 t += rb_enc_mbcput(
'\\', t, enc);
4051 t += rb_enc_mbcput(
'\\', t, enc);
4052 t += rb_enc_mbcput(
' ', t, enc);
4055 t += rb_enc_mbcput(
'\\', t, enc);
4056 t += rb_enc_mbcput(
't', t, enc);
4059 t += rb_enc_mbcput(
'\\', t, enc);
4060 t += rb_enc_mbcput(
'n', t, enc);
4063 t += rb_enc_mbcput(
'\\', t, enc);
4064 t += rb_enc_mbcput(
'r', t, enc);
4067 t += rb_enc_mbcput(
'\\', t, enc);
4068 t += rb_enc_mbcput(
'f', t, enc);
4071 t += rb_enc_mbcput(
'\\', t, enc);
4072 t += rb_enc_mbcput(
'v', t, enc);
4075 t += rb_enc_mbcput(c, t, enc);
4112 options =
RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
4113 if (
RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
4114 if (
RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
4119rb_check_regexp_type(
VALUE re)
4145 return rb_check_regexp_type(re);
4158 else if (argc == 1) {
4159 VALUE arg = rb_ary_entry(args0, 0);
4160 VALUE re = rb_check_regexp_type(arg);
4165 quoted = rb_reg_s_quote(
Qnil, arg);
4174 int has_asciionly = 0;
4178 for (i = 0; i < argc; i++) {
4180 VALUE e = rb_ary_entry(args0, i);
4185 v = rb_check_regexp_type(e);
4188 if (!rb_enc_asciicompat(enc)) {
4189 if (!has_ascii_incompat)
4190 has_ascii_incompat = enc;
4191 else if (has_ascii_incompat != enc)
4193 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4195 else if (rb_reg_fixed_encoding_p(v)) {
4196 if (!has_ascii_compat_fixed)
4197 has_ascii_compat_fixed = enc;
4198 else if (has_ascii_compat_fixed != enc)
4200 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4205 v = rb_reg_str_with_term(v, -1);
4210 enc = rb_enc_get(e);
4211 if (!rb_enc_asciicompat(enc)) {
4212 if (!has_ascii_incompat)
4213 has_ascii_incompat = enc;
4214 else if (has_ascii_incompat != enc)
4216 rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
4222 if (!has_ascii_compat_fixed)
4223 has_ascii_compat_fixed = enc;
4224 else if (has_ascii_compat_fixed != enc)
4226 rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
4228 v = rb_reg_s_quote(
Qnil, e);
4230 if (has_ascii_incompat) {
4231 if (has_asciionly) {
4233 rb_enc_name(has_ascii_incompat));
4235 if (has_ascii_compat_fixed) {
4237 rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed));
4242 rb_enc_copy(source, v);
4247 if (has_ascii_incompat) {
4248 result_enc = has_ascii_incompat;
4250 else if (has_ascii_compat_fixed) {
4251 result_enc = has_ascii_compat_fixed;
4254 result_enc = rb_ascii8bit_encoding();
4257 rb_enc_associate(source, result_enc);
4301 !
NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
4302 return rb_reg_s_union(self, v);
4304 return rb_reg_s_union(self, args);
4329rb_reg_s_linear_time_p(
int argc,
VALUE *argv,
VALUE self)
4332 VALUE re = reg_extract_args(argc, argv, &args);
4335 re =
reg_init_args(rb_reg_alloc(), args.str, args.enc, args.flags);
4338 return RBOOL(onig_check_linear_time(
RREGEXP_PTR(re)));
4347 return rb_reg_init_str(copy, RREGEXP_SRC(re),
rb_reg_options(re));
4358 int acompat = rb_enc_asciicompat(str_enc);
4360#define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
4367 int c = ASCGET(s, e, &clen);
4371 s += mbclen(s, e, str_enc);
4377 if (c !=
'\\' || s == e)
continue;
4384 c = ASCGET(s, e, &clen);
4386 s += mbclen(s, e, str_enc);
4395 case '1':
case '2':
case '3':
case '4':
4396 case '5':
case '6':
case '7':
case '8':
case '9':
4397 if (!
NIL_P(regexp) && onig_noname_group_capture_is_active(
RREGEXP_PTR(regexp))) {
4406 if (s < e && ASCGET(s, e, &clen) ==
'<') {
4407 char *name, *name_end;
4409 name_end = name = s + clen;
4410 while (name_end < e) {
4411 c = ASCGET(name_end, e, &clen);
4412 if (c ==
'>')
break;
4413 name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
4417 (
long)(name_end - name));
4418 if ((no = NAME_TO_NUMBER(regs, regexp, n, name, name_end)) < 1) {
4419 name_to_backref_error(n);
4421 p = s = name_end + clen;
4446 no = regs->num_regs-1;
4447 while (BEG(no) == -1 && no > 0) no--;
4448 if (no == 0)
continue;
4461 if (no >= regs->num_regs)
continue;
4462 if (BEG(no) == -1)
continue;
4467 if (!val)
return str;
4476ignorecase_getter(
ID _x,
VALUE *_y)
4499get_LAST_MATCH_INFO(
ID _x,
VALUE *_y)
4501 return match_getter();
4552rb_reg_s_last_match(
int argc,
VALUE *argv,
VALUE _)
4554 if (rb_check_arity(argc, 0, 1) == 1) {
4558 n = match_backref_number(match, argv[0]);
4561 return match_getter();
4565re_warn(
const char *s)
4571rb_hrtime_t rb_reg_match_time_limit = 0;
4575rb_reg_check_timeout(
regex_t *reg,
void *end_time_)
4577 rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
4579 if (*end_time == 0) {
4583 rb_hrtime_t timelimit = reg->timelimit;
4587 timelimit = rb_reg_match_time_limit;
4591 *end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
4595 *end_time = RB_HRTIME_MAX;
4599 if (*end_time < rb_hrtime_now()) {
4601 rb_raise(rb_eRegexpTimeoutError,
"regexp match timeout");
4615rb_reg_s_timeout_get(
VALUE dummy)
4617 double d = hrtime2double(rb_reg_match_time_limit);
4618 if (d == 0.0)
return Qnil;
4636rb_reg_s_timeout_set(
VALUE dummy,
VALUE timeout)
4638 rb_ractor_ensure_main_ractor(
"can not access Regexp.timeout from non-main Ractors");
4640 set_timeout(&rb_reg_match_time_limit, timeout);
4661rb_reg_timeout_get(
VALUE re)
4664 double d = hrtime2double(
RREGEXP_PTR(re)->timelimit);
4665 if (d == 0.0)
return Qnil;
4692 onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
4693 onig_set_warn_func(re_warn);
4694 onig_set_verb_warn_func(re_warn);
4702 rb_gvar_ractor_local(
"$~");
4703 rb_gvar_ractor_local(
"$&");
4704 rb_gvar_ractor_local(
"$`");
4705 rb_gvar_ractor_local(
"$'");
4706 rb_gvar_ractor_local(
"$+");
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define rb_str_new2
Old name of rb_str_new_cstr.
#define NEWOBJ_OF
Old name of RB_NEWOBJ_OF.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define REALLOC_N
Old name of RB_REALLOC_N.
#define OBJ_INIT_COPY(obj, orig)
Old name of RB_OBJ_INIT_COPY.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define rb_str_buf_new2
Old name of rb_str_buf_new_cstr.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define ZALLOC
Old name of RB_ZALLOC.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define FIX2INT
Old name of RB_FIX2INT.
#define NUM2DBL
Old name of rb_num2dbl.
#define rb_str_new3
Old name of rb_str_new_shared.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_SET
Old name of RB_FL_SET.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define rb_exc_new3
Old name of rb_exc_new_str.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define NUM2INT
Old name of RB_NUM2INT.
#define INT2NUM
Old name of RB_INT2NUM.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
#define DBL2NUM
Old name of rb_float_new.
#define T_MATCH
Old name of RUBY_T_MATCH.
#define FL_TEST
Old name of RB_FL_TEST.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define scan_oct(s, l, e)
Old name of ruby_scan_oct.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define rb_str_new4
Old name of rb_str_new_frozen.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports always regardless of runtime -W flag.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eStandardError
StandardError exception.
void rb_set_errinfo(VALUE err)
Sets the current exception ($!) to the given value.
VALUE rb_eRegexpError
RegexpError exception.
#define ruby_verbose
This variable controls whether the interpreter is in debug mode.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports always regardless of runtime -W flag.
VALUE rb_eArgError
ArgumentError exception.
VALUE rb_eIndexError
IndexError exception.
VALUE rb_eSecurityError
SecurityError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_check_convert_type(VALUE val, int type, const char *name, const char *mid)
Identical to rb_convert_type(), except it returns RUBY_Qnil instead of raising exceptions,...
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_class_new_instance(int argc, const VALUE *argv, VALUE klass)
Allocates, then initialises an instance of the given class.
VALUE rb_cMatch
MatchData class.
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_cRegexp
Regexp class.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
int rb_char_to_option_kcode(int c, int *option, int *kcode)
Converts a character option to its encoding.
VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts)
Identical to rb_reg_new(), except it additionally takes an encoding.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
#define rb_check_frozen
Just another name of rb_check_frozen.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_lastline_get(void)
Queries the last line, or the $_.
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_last_match(VALUE md)
This just returns the argument, stringified.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_reg_match_post(VALUE md)
The portion of the original string after the given match.
VALUE rb_reg_nth_defined(int n, VALUE md)
Identical to rb_reg_nth_match(), except it just returns Boolean.
VALUE rb_reg_match_pre(VALUE md)
The portion of the original string before the given match.
VALUE rb_reg_new_str(VALUE src, int opts)
Identical to rb_reg_new(), except it takes the expression in Ruby's string instead of C's.
VALUE rb_reg_match_last(VALUE md)
The portion of the original string that captured at the very last.
VALUE rb_reg_match2(VALUE re)
Identical to rb_reg_match(), except it matches against rb_lastline_get() (or, the $_).
VALUE rb_reg_new(const char *src, long len, int opts)
Creates a new Regular expression.
int rb_memcicmp(const void *s1, const void *s2, long n)
Identical to st_locale_insensitive_strcasecmp(), except it is timing safe and returns something diffe...
#define rb_hash_uint(h, i)
Just another name of st_hash_uint.
#define rb_hash_end(h)
Just another name of st_hash_end.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
st_index_t rb_hash_start(st_index_t i)
Starts a series of hashing.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_class_path(VALUE mod)
Identical to rb_mod_name(), except it returns #<Class: ...> style inspection for anonymous modules.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
void rb_define_const(VALUE klass, const char *name, VALUE val)
Defines a Ruby level constant under a namespace.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
regex_t * rb_reg_prepare_re(VALUE re, VALUE str)
Exercises various checks and preprocesses so that the given regular expression can be applied to the ...
long rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int dir)
Tell us if this is a wrong idea, but it seems this function has no usage at all.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_quote(VALUE str)
Escapes any characters that would have special meaning in a regular expression.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
int rb_reg_region_copy(struct re_registers *dst, const struct re_registers *src)
Duplicates a match data.
unsigned long ruby_scan_hex(const char *str, size_t len, size_t *ret)
Interprets the passed string a hexadecimal unsigned integer.
unsigned long ruby_scan_oct(const char *str, size_t len, size_t *consumed)
Interprets the passed string as an octal unsigned integer.
VALUE rb_sprintf(const char *fmt,...)
Ruby's extended sprintf(3).
VALUE rb_str_catf(VALUE dst, const char *fmt,...)
Identical to rb_sprintf(), except it renders the output to the specified object rather than creating ...
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_virtual_variable(const char *q, type *w, void_type *e)
Define a function-backended global variable.
#define RARRAY_LEN
Just another name of rb_array_len.
#define RARRAY_AREF(a, i)
#define RBASIC(obj)
Convenient casting macro.
#define RGENGC_WB_PROTECTED_REGEXP
This is a compile-time flag to enable/disable write barrier for struct RRegexp.
#define RMATCH(obj)
Convenient casting macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
#define RREGEXP(obj)
Convenient casting macro.
#define RREGEXP_PTR(obj)
Convenient accessor macro.
#define StringValue(v)
Ensures that the parameter object is a String.
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
Regular expression execution context.
VALUE regexp
The expression of this match.
struct rmatch * rmatch
The result of this match.
VALUE str
The target string that the match was made against.
Ruby's regular expression.
struct RBasic basic
Basic part, including flags and class.
const VALUE src
Source code of this expression.
struct re_pattern_buffer * ptr
The pattern buffer.
unsigned long usecnt
Reference count.
Represents the region of a capture group.
long beg
Beginning of a group.
int char_offset_num_allocated
Number of rmatch_offset that rmatch::char_offset holds.
struct rmatch_offset * char_offset
Capture group offsets, in C array.
struct re_registers regs
"Registers" of a match.
uintptr_t VALUE
Type that represents a Ruby object.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
#define SIZEOF_VALUE
Identical to sizeof(VALUE), except it is a macro that can also be used inside of preprocessor directi...