14#include "ruby/internal/config.h"
24#include "debug_counter.h"
29#include "internal/array.h"
30#include "internal/compar.h"
31#include "internal/compilers.h"
32#include "internal/encoding.h"
33#include "internal/error.h"
34#include "internal/gc.h"
35#include "internal/numeric.h"
36#include "internal/object.h"
37#include "internal/proc.h"
38#include "internal/re.h"
39#include "internal/sanitizers.h"
40#include "internal/string.h"
41#include "internal/transcode.h"
46#include "ruby_assert.h"
49#if defined HAVE_CRYPT_R
50# if defined HAVE_CRYPT_H
53#elif !defined HAVE_CRYPT
54# include "missing/crypt.h"
55# define HAVE_CRYPT_R 1
58#define BEG(no) (regs->beg[(no)])
59#define END(no) (regs->end[(no)])
62#undef rb_usascii_str_new
66#undef rb_usascii_str_new_cstr
67#undef rb_utf8_str_new_cstr
68#undef rb_enc_str_new_cstr
69#undef rb_external_str_new_cstr
70#undef rb_locale_str_new_cstr
71#undef rb_str_dup_frozen
72#undef rb_str_buf_new_cstr
103#define RUBY_MAX_CHAR_LEN 16
104#define STR_SHARED_ROOT FL_USER5
105#define STR_BORROWED FL_USER6
106#define STR_TMPLOCK FL_USER7
107#define STR_NOFREE FL_USER18
108#define STR_FAKESTR FL_USER19
110#define STR_SET_NOEMBED(str) do {\
111 FL_SET((str), STR_NOEMBED);\
113 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
116 STR_SET_EMBED_LEN((str), 0);\
119#define STR_SET_EMBED(str) FL_UNSET((str), (STR_NOEMBED|STR_NOFREE))
121# define STR_SET_EMBED_LEN(str, n) do { \
122 assert(str_embed_capa(str) > (n));\
123 RSTRING(str)->as.embed.len = (n);\
126# define STR_SET_EMBED_LEN(str, n) do { \
128 RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
129 RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
133#define STR_SET_LEN(str, n) do { \
134 if (STR_EMBED_P(str)) {\
135 STR_SET_EMBED_LEN((str), (n));\
138 RSTRING(str)->as.heap.len = (n);\
142#define STR_DEC_LEN(str) do {\
143 if (STR_EMBED_P(str)) {\
144 long n = RSTRING_LEN(str);\
146 STR_SET_EMBED_LEN((str), n);\
149 RSTRING(str)->as.heap.len--;\
154str_enc_fastpath(
VALUE str)
158 case ENCINDEX_ASCII_8BIT:
160 case ENCINDEX_US_ASCII:
167#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
168#define TERM_FILL(ptr, termlen) do {\
169 char *const term_fill_ptr = (ptr);\
170 const int term_fill_len = (termlen);\
171 *term_fill_ptr = '\0';\
172 if (UNLIKELY(term_fill_len > 1))\
173 memset(term_fill_ptr, 0, term_fill_len);\
176#define RESIZE_CAPA(str,capacity) do {\
177 const int termlen = TERM_LEN(str);\
178 RESIZE_CAPA_TERM(str,capacity,termlen);\
180#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
181 if (STR_EMBED_P(str)) {\
182 if (str_embed_capa(str) < capacity + termlen) {\
183 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
184 const long tlen = RSTRING_LEN(str);\
185 memcpy(tmp, RSTRING_PTR(str), tlen);\
186 RSTRING(str)->as.heap.ptr = tmp;\
187 RSTRING(str)->as.heap.len = tlen;\
188 STR_SET_NOEMBED(str);\
189 RSTRING(str)->as.heap.aux.capa = (capacity);\
193 assert(!FL_TEST((str), STR_SHARED)); \
194 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
195 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
196 RSTRING(str)->as.heap.aux.capa = (capacity);\
200#define STR_SET_SHARED(str, shared_str) do { \
201 if (!FL_TEST(str, STR_FAKESTR)) { \
202 assert(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
203 assert(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
204 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
205 FL_SET((str), STR_SHARED); \
206 FL_SET((shared_str), STR_SHARED_ROOT); \
207 if (RBASIC_CLASS((shared_str)) == 0) \
208 FL_SET_RAW((shared_str), STR_BORROWED); \
212#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
213#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
216#define STR_ENC_GET(str) get_encoding(str)
218#if !defined SHARABLE_MIDDLE_SUBSTRING
219# define SHARABLE_MIDDLE_SUBSTRING 0
221#if !SHARABLE_MIDDLE_SUBSTRING
222#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
224#define SHARABLE_SUBSTRING_P(beg, len, end) 1
229str_embed_capa(
VALUE str)
232 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.
ary);
239rb_str_reembeddable_p(
VALUE str)
241 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
245rb_str_embed_size(
long capa)
251rb_str_size_as_embedded(
VALUE str)
255 if (STR_EMBED_P(str)) {
256 real_size = rb_str_embed_size(
RSTRING(str)->as.embed.len) + TERM_LEN(str);
260 else if (rb_str_reembeddable_p(str)) {
261 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
265 real_size =
sizeof(
struct RString);
273STR_EMBEDDABLE_P(
long len,
long termlen)
276 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
284static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
285static VALUE str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex);
287static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
288static inline void str_modifiable(
VALUE str);
292str_make_independent(
VALUE str)
294 long len = RSTRING_LEN(str);
295 int termlen = TERM_LEN(str);
296 str_make_independent_expand((str),
len, 0L, termlen);
299static inline int str_dependent_p(
VALUE str);
302rb_str_make_independent(
VALUE str)
304 if (str_dependent_p(str)) {
305 str_make_independent(str);
310rb_str_make_embedded(
VALUE str)
315 char *buf =
RSTRING(str)->as.heap.ptr;
319 STR_SET_EMBED_LEN(str,
len);
322 memcpy(RSTRING_PTR(str), buf,
len);
326 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
333 if (new_root == old_root) {
339 if (!STR_EMBED_P(new_root)) {
343 size_t offset = (size_t)((uintptr_t)
RSTRING(str)->as.heap.ptr - (uintptr_t)
RSTRING(old_root)->as.embed.ary);
346 RSTRING(str)->as.heap.ptr =
RSTRING(new_root)->as.embed.ary + offset;
350rb_debug_rstring_null_ptr(
const char *func)
352 fprintf(stderr,
"%s is returning NULL!! "
353 "SIGSEGV is highly expected to follow immediately.\n"
354 "If you could reproduce, attach your debugger here, "
355 "and look at the passed string.\n",
360static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
363get_encoding(
VALUE str)
369mustnot_broken(
VALUE str)
371 if (is_broken_string(str)) {
377mustnot_wchar(
VALUE str)
380 if (rb_enc_mbminlen(enc) > 1) {
387static VALUE register_fstring(
VALUE str,
bool copy);
394#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
402fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
412 if (rb_objspace_garbage_object_p(str)) {
424 rb_enc_copy(new_str, str);
437 if (STR_SHARED_P(str)) {
439 str_make_independent(str);
442 if (!BARE_STRING_P(str)) {
446 RBASIC(str)->flags |= RSTRING_FSTR;
448 *key = *value = arg->fstr = str;
462 if (
FL_TEST(str, RSTRING_FSTR))
465 bare = BARE_STRING_P(str);
467 if (STR_EMBED_P(str)) {
471 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_SHARED_ROOT|STR_SHARED) == (STR_NOEMBED|STR_SHARED_ROOT)) {
480 fstr = register_fstring(str, FALSE);
483 str_replace_shared_without_enc(str, fstr);
491register_fstring(
VALUE str,
bool copy)
498 st_table *frozen_strings = rb_vm_fstring_table();
501 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
502 }
while (UNDEF_P(args.fstr));
509 assert(RBASIC_CLASS(args.fstr) ==
rb_cString);
514setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
530 return (
VALUE)fake_str;
537rb_setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
rb_encoding *enc)
539 return setup_fake_str(fake_str, name, len, rb_enc_to_index(enc));
547MJIT_FUNC_EXPORTED
VALUE
548rb_fstring_new(
const char *ptr,
long len)
551 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), FALSE);
558 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), FALSE);
562rb_fstring_cstr(
const char *
ptr)
564 return rb_fstring_new(
ptr, strlen(
ptr));
568fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
578 const char *aptr, *bptr;
581 return (alen != blen ||
583 memcmp(aptr, bptr, alen) != 0);
587single_byte_optimizable(
VALUE str)
595 enc = STR_ENC_GET(str);
596 if (rb_enc_mbmaxlen(enc) == 1)
606static inline const char *
607search_nonascii(
const char *p,
const char *e)
609 const uintptr_t *s, *t;
611#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
612# if SIZEOF_UINTPTR_T == 8
613# define NONASCII_MASK UINT64_C(0x8080808080808080)
614# elif SIZEOF_UINTPTR_T == 4
615# define NONASCII_MASK UINT32_C(0x80808080)
617# error "don't know what to do."
620# if SIZEOF_UINTPTR_T == 8
621# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
622# elif SIZEOF_UINTPTR_T == 4
623# define NONASCII_MASK 0x80808080UL
625# error "don't know what to do."
629 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
630#if !UNALIGNED_WORD_ACCESS
631 if ((uintptr_t)p % SIZEOF_VOIDP) {
632 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
637 case 7:
if (p[-7]&0x80)
return p-7;
638 case 6:
if (p[-6]&0x80)
return p-6;
639 case 5:
if (p[-5]&0x80)
return p-5;
640 case 4:
if (p[-4]&0x80)
return p-4;
642 case 3:
if (p[-3]&0x80)
return p-3;
643 case 2:
if (p[-2]&0x80)
return p-2;
644 case 1:
if (p[-1]&0x80)
return p-1;
649#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
650#define aligned_ptr(value) \
651 __builtin_assume_aligned((value), sizeof(uintptr_t))
653#define aligned_ptr(value) (uintptr_t *)(value)
656 t = (uintptr_t *)(e - (SIZEOF_VOIDP-1));
659 if (*s & NONASCII_MASK) {
660#ifdef WORDS_BIGENDIAN
661 return (
const char *)s + (nlz_intptr(*s&NONASCII_MASK)>>3);
663 return (
const char *)s + (ntz_intptr(*s&NONASCII_MASK)>>3);
673 case 7:
if (e[-7]&0x80)
return e-7;
674 case 6:
if (e[-6]&0x80)
return e-6;
675 case 5:
if (e[-5]&0x80)
return e-5;
676 case 4:
if (e[-4]&0x80)
return e-4;
678 case 3:
if (e[-3]&0x80)
return e-3;
679 case 2:
if (e[-2]&0x80)
return e-2;
680 case 1:
if (e[-1]&0x80)
return e-1;
688 const char *e = p +
len;
690 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
692 p = search_nonascii(p, e);
696 if (rb_enc_asciicompat(enc)) {
697 p = search_nonascii(p, e);
700 int ret = rb_enc_precise_mbclen(p, e, enc);
704 p = search_nonascii(p, e);
710 int ret = rb_enc_precise_mbclen(p, e, enc);
726 if (rb_enc_to_index(enc) == rb_ascii8bit_encindex()) {
729 p = search_nonascii(p, e);
733 else if (rb_enc_asciicompat(enc)) {
734 p = search_nonascii(p, e);
740 int ret = rb_enc_precise_mbclen(p, e, enc);
747 p = search_nonascii(p, e);
753 int ret = rb_enc_precise_mbclen(p, e, enc);
772rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
777 str_enc_copy(dest, src);
778 if (RSTRING_LEN(dest) == 0) {
779 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
790 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
791 search_nonascii(RSTRING_PTR(dest), RSTRING_END(dest)))
802rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
804 str_enc_copy(dest, src);
811 return coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
817 return enc_coderange_scan(str, enc);
826 cr = enc_coderange_scan(str, get_encoding(str));
837 if (!rb_enc_asciicompat(enc))
839 else if (is_ascii_string(str))
845str_mod_check(
VALUE s,
const char *p,
long len)
847 if (RSTRING_PTR(s) != p || RSTRING_LEN(s) !=
len){
853str_capacity(
VALUE str,
const int termlen)
855 if (STR_EMBED_P(str)) {
857 return str_embed_capa(str) - termlen;
862 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
863 return RSTRING(str)->as.heap.len;
866 return RSTRING(str)->as.heap.aux.capa;
873 return str_capacity(str, TERM_LEN(str));
877must_not_null(
const char *
ptr)
887 size_t size = rb_str_embed_size(
capa);
889 assert(rb_gc_size_allocatable_p(size));
891 assert(size <=
sizeof(
struct RString));
894 RVARGC_NEWOBJ_OF(str,
struct RString, klass,
901str_alloc_heap(
VALUE klass)
903 RVARGC_NEWOBJ_OF(str,
struct RString, klass,
910empty_str_alloc(
VALUE klass)
912 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
913 VALUE str = str_alloc_embed(klass, 0);
914 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
919str_new0(
VALUE klass,
const char *
ptr,
long len,
int termlen)
927 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
929 if (STR_EMBEDDABLE_P(
len, termlen)) {
930 str = str_alloc_embed(klass,
len + termlen);
936 str = str_alloc_heap(klass);
942 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
945 memcpy(RSTRING_PTR(str),
ptr,
len);
947 STR_SET_LEN(str,
len);
948 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
955 return str_new0(klass,
ptr,
len, 1);
976 rb_enc_associate_index(str, rb_utf8_encindex());
988 rb_enc_associate(str, enc);
1000 __msan_unpoison_string(
ptr);
1016 rb_enc_associate_index(str, rb_utf8_encindex());
1024 if (rb_enc_mbminlen(enc) != 1) {
1027 return rb_enc_str_new(
ptr, strlen(
ptr), enc);
1031str_new_static(
VALUE klass,
const char *
ptr,
long len,
int encindex)
1040 rb_encoding *enc = rb_enc_get_from_index(encindex);
1041 str = str_new0(klass,
ptr,
len, rb_enc_mbminlen(enc));
1044 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1045 str = str_alloc_heap(klass);
1049 RBASIC(str)->flags |= STR_NOFREE;
1051 rb_enc_associate_index(str, encindex);
1079static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1081 int ecflags,
VALUE ecopts);
1086 int encidx = rb_enc_to_index(enc);
1087 if (rb_enc_get_index(str) == encidx)
1088 return is_ascii_string(str);
1099 if (!to)
return str;
1100 if (!from) from = rb_enc_get(str);
1101 if (from == to)
return str;
1102 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1103 rb_is_ascii8bit_enc(to)) {
1104 if (STR_ENC_GET(str) != to) {
1106 rb_enc_associate(str, to);
1113 from, to, ecflags, ecopts);
1114 if (
NIL_P(newstr)) {
1122rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1127 olen = RSTRING_LEN(newstr);
1128 if (ofs < -olen || olen < ofs)
1130 if (ofs < 0) ofs += olen;
1132 STR_SET_LEN(newstr, ofs);
1137 return str_cat_conv_enc_opts(newstr, ofs,
ptr,
len, from,
1145 STR_SET_LEN(str, 0);
1146 rb_enc_associate(str, enc);
1152str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *
ptr,
long len,
1154 int ecflags,
VALUE ecopts)
1159 VALUE econv_wrapper;
1160 const unsigned char *start, *sp;
1161 unsigned char *dest, *dp;
1162 size_t converted_output = (size_t)ofs;
1167 RBASIC_CLEAR_CLASS(econv_wrapper);
1169 if (!ec)
return Qnil;
1172 sp = (
unsigned char*)
ptr;
1174 while ((dest = (
unsigned char*)RSTRING_PTR(newstr)),
1175 (dp = dest + converted_output),
1179 size_t converted_input = sp - start;
1180 size_t rest =
len - converted_input;
1181 converted_output = dp - dest;
1183 if (converted_input && converted_output &&
1184 rest < (LONG_MAX / converted_output)) {
1185 rest = (rest * converted_output) / converted_input;
1190 olen += rest < 2 ? 2 : rest;
1197 len = dp - (
unsigned char*)RSTRING_PTR(newstr);
1199 rb_enc_associate(newstr, to);
1218 const int eidx = rb_enc_to_index(eenc);
1221 return rb_enc_str_new(
ptr,
len, eenc);
1225 if ((eidx == rb_ascii8bit_encindex()) ||
1226 (eidx == rb_usascii_encindex() && search_nonascii(
ptr,
ptr +
len))) {
1230 ienc = rb_default_internal_encoding();
1231 if (!ienc || eenc == ienc) {
1232 return rb_enc_str_new(
ptr,
len, eenc);
1236 if ((eidx == rb_ascii8bit_encindex()) ||
1237 (eidx == rb_usascii_encindex()) ||
1238 (rb_enc_asciicompat(eenc) && !search_nonascii(
ptr,
ptr +
len))) {
1239 return rb_enc_str_new(
ptr,
len, ienc);
1242 str = rb_enc_str_new(NULL, 0, ienc);
1245 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0,
ptr,
len, eenc, 0,
Qnil))) {
1246 rb_str_initialize(str,
ptr,
len, eenc);
1254 int eidx = rb_enc_to_index(eenc);
1255 if (eidx == rb_usascii_encindex() &&
1256 !is_ascii_string(str)) {
1257 rb_enc_associate_index(str, rb_ascii8bit_encindex());
1260 rb_enc_associate_index(str, eidx);
1319str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1321 const int termlen = TERM_LEN(str);
1326 if (str_embed_capa(str2) >=
len + termlen) {
1327 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1328 STR_SET_EMBED(str2);
1329 memcpy(ptr2, RSTRING_PTR(str),
len);
1330 STR_SET_EMBED_LEN(str2,
len);
1331 TERM_FILL(ptr2+
len, termlen);
1335 if (STR_SHARED_P(str)) {
1336 root =
RSTRING(str)->as.heap.aux.shared;
1344 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1346 rb_fatal(
"about to free a possible shared root");
1348 char *ptr2 = STR_HEAP_PTR(str2);
1350 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1353 FL_SET(str2, STR_NOEMBED);
1356 STR_SET_SHARED(str2, root);
1364 str_replace_shared_without_enc(str2, str);
1365 rb_enc_cr_str_exact_copy(str2, str);
1372 return str_replace_shared(str_alloc_heap(klass), str);
1389rb_str_new_frozen_String(
VALUE orig)
1396rb_str_tmp_frozen_acquire(
VALUE orig)
1399 return str_new_frozen_buffer(0, orig, FALSE);
1403rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1405 if (RBASIC_CLASS(tmp) != 0)
1408 if (STR_EMBED_P(tmp)) {
1421 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1422 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1427 STR_SET_EMBED_LEN(tmp, 0);
1435 return str_new_frozen_buffer(klass, orig, TRUE);
1441 assert(!STR_EMBED_P(orig));
1442 assert(!STR_SHARED_P(orig));
1444 VALUE str = str_alloc_heap(klass);
1445 RSTRING(str)->as.heap.len = RSTRING_LEN(orig);
1446 RSTRING(str)->as.heap.ptr = RSTRING_PTR(orig);
1447 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1448 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1449 RBASIC(orig)->flags &= ~STR_NOFREE;
1450 STR_SET_SHARED(orig, str);
1457str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1461 long len = RSTRING_LEN(orig);
1462 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1464 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1465 str = str_new0(klass, RSTRING_PTR(orig),
len, termlen);
1466 assert(STR_EMBED_P(str));
1471 long ofs =
RSTRING(orig)->as.heap.ptr - RSTRING_PTR(
shared);
1472 long rest = RSTRING_LEN(
shared) - ofs -
RSTRING(orig)->as.heap.len;
1475 assert(ofs + rest <= RSTRING_LEN(
shared));
1477 assert(!STR_EMBED_P(
shared));
1481 if ((ofs > 0) || (rest > 0) ||
1484 str = str_new_shared(klass,
shared);
1485 assert(!STR_EMBED_P(str));
1486 RSTRING(str)->as.heap.ptr += ofs;
1487 RSTRING(str)->as.heap.len -= ofs + rest;
1490 if (RBASIC_CLASS(
shared) == 0)
1495 else if (STR_EMBEDDABLE_P(RSTRING_LEN(orig), TERM_LEN(orig))) {
1496 str = str_alloc_embed(klass, RSTRING_LEN(orig) + TERM_LEN(orig));
1498 memcpy(RSTRING_PTR(str), RSTRING_PTR(orig), RSTRING_LEN(orig));
1499 STR_SET_EMBED_LEN(str, RSTRING_LEN(orig));
1500 TERM_FILL(RSTRING_END(str), TERM_LEN(orig));
1503 str = heap_str_make_shared(klass, orig);
1507 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1519str_new_empty_String(
VALUE str)
1522 rb_enc_copy(v, str);
1526#define STR_BUF_MIN_SIZE 63
1534 if (STR_EMBEDDABLE_P(
capa, 1)) {
1541 if (
capa < STR_BUF_MIN_SIZE) {
1542 capa = STR_BUF_MIN_SIZE;
1547 RSTRING(str)->as.heap.ptr[0] =
'\0';
1567 return str_new(0, 0,
len);
1573 if (
FL_TEST(str, RSTRING_FSTR)) {
1574 st_data_t fstr = (st_data_t)str;
1578 st_delete(rb_vm_fstring_table(), &fstr, NULL);
1579 RB_DEBUG_COUNTER_INC(obj_str_fstr);
1584 if (STR_EMBED_P(str)) {
1585 RB_DEBUG_COUNTER_INC(obj_str_embed);
1587 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1588 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1589 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1592 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1593 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1597RUBY_FUNC_EXPORTED
size_t
1598rb_str_memsize(
VALUE str)
1600 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1601 return STR_HEAP_SIZE(str);
1611 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1614static inline void str_discard(
VALUE str);
1615static void str_shared_replace(
VALUE str,
VALUE str2);
1620 if (str != str2) str_shared_replace(str, str2);
1631 enc = STR_ENC_GET(str2);
1634 termlen = rb_enc_mbminlen(enc);
1636 if (str_embed_capa(str) >= RSTRING_LEN(str2) + termlen) {
1638 memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), (
size_t)RSTRING_LEN(str2) + termlen);
1639 STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
1640 rb_enc_associate(str, enc);
1645 if (STR_EMBED_P(str2)) {
1646 assert(!
FL_TEST(str2, STR_SHARED));
1648 assert(
len + termlen <= str_embed_capa(str2));
1650 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1651 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1652 RSTRING(str2)->as.heap.ptr = new_ptr;
1655 STR_SET_NOEMBED(str2);
1659 STR_SET_NOEMBED(str);
1661 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1662 RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
1664 if (
FL_TEST(str2, STR_SHARED)) {
1666 STR_SET_SHARED(str,
shared);
1669 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1673 STR_SET_EMBED(str2);
1674 RSTRING_PTR(str2)[0] = 0;
1675 STR_SET_EMBED_LEN(str2, 0);
1676 rb_enc_associate(str, enc);
1689 str = rb_funcall(obj, idTo_s, 0);
1690 return rb_obj_as_string_result(str, obj);
1693MJIT_FUNC_EXPORTED
VALUE
1706 len = RSTRING_LEN(str2);
1707 if (STR_SHARED_P(str2)) {
1710 STR_SET_NOEMBED(str);
1712 RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
1713 STR_SET_SHARED(str,
shared);
1714 rb_enc_cr_str_exact_copy(str, str2);
1717 str_replace_shared(str, str2);
1726 size_t size = rb_str_embed_size(
capa);
1728 assert(rb_gc_size_allocatable_p(size));
1730 assert(size <=
sizeof(
struct RString));
1733 RB_RVARGC_EC_NEWOBJ_OF(ec, str,
struct RString, klass,
1742 RB_RVARGC_EC_NEWOBJ_OF(ec, str,
struct RString, klass,
1751 const VALUE flag_mask =
1753 RSTRING_NOEMBED | RSTRING_EMBED_LEN_MASK |
1760 if (STR_EMBED_P(str)) {
1761 long len = RSTRING_EMBED_LEN(str);
1763 assert(STR_EMBED_P(dup));
1764 assert(str_embed_capa(dup) >=
len + 1);
1765 STR_SET_EMBED_LEN(dup,
len);
1771 root =
RSTRING(str)->as.heap.aux.shared;
1773 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1774 root = str = str_new_frozen(klass, str);
1777 assert(!STR_SHARED_P(root));
1778 assert(RB_OBJ_FROZEN_RAW(root));
1781 else if (STR_EMBED_P(root)) {
1788 RSTRING(dup)->as.heap.len = RSTRING_LEN(str);
1789 RSTRING(dup)->as.heap.ptr = RSTRING_PTR(str);
1790 FL_SET(root, STR_SHARED_ROOT);
1792 flags |= RSTRING_NOEMBED | STR_SHARED;
1797 encidx = rb_enc_get_index(str);
1798 flags &= ~ENCODING_MASK;
1801 if (encidx) rb_enc_associate_index(dup, encidx);
1809 if (
FL_TEST(str, STR_NOEMBED)) {
1810 dup = ec_str_alloc_heap(ec, klass);
1813 dup = ec_str_alloc_embed(ec, klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
1816 return str_duplicate_setup(klass, str, dup);
1823 if (
FL_TEST(str, STR_NOEMBED)) {
1824 dup = str_alloc_heap(klass);
1827 dup = str_alloc_embed(klass, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
1830 return str_duplicate_setup(klass, str, dup);
1842 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1849 RUBY_DTRACE_CREATE_HOOK(STRING, RSTRING_LEN(str));
1850 return ec_str_duplicate(ec,
rb_cString, str);
1865 static ID keyword_ids[2];
1866 VALUE orig, opt, venc, vcapa;
1871 if (!keyword_ids[0]) {
1872 keyword_ids[0] = rb_id_encoding();
1873 CONST_ID(keyword_ids[1],
"capacity");
1876 n = rb_scan_args(argc, argv,
"01:", &orig, &opt);
1881 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
1882 enc = rb_to_encoding(venc);
1884 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
1887 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
1889 if (
capa < STR_BUF_MIN_SIZE) {
1890 capa = STR_BUF_MIN_SIZE;
1894 len = RSTRING_LEN(orig);
1898 if (orig == str) n = 0;
1900 str_modifiable(str);
1901 if (STR_EMBED_P(str)) {
1902 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1904 assert(
RSTRING(str)->
as.embed.len + 1 <= str_embed_capa(str));
1909 RSTRING(str)->as.heap.ptr = new_ptr;
1911 else if (
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
1912 const size_t size = (size_t)
capa + termlen;
1913 const char *
const old_ptr = RSTRING_PTR(str);
1914 const size_t osize =
RSTRING(str)->as.heap.len + TERM_LEN(str);
1915 char *new_ptr =
ALLOC_N(
char, (
size_t)
capa + termlen);
1916 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
1918 RSTRING(str)->as.heap.ptr = new_ptr;
1920 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
1921 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
1922 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
1927 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
1928 rb_enc_cr_str_exact_copy(str, orig);
1930 FL_SET(str, STR_NOEMBED);
1937 rb_enc_associate(str, enc);
1948#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1963static inline uintptr_t
1964count_utf8_lead_bytes_with_word(
const uintptr_t *s)
1969 d = (d>>6) | (~d>>7);
1970 d &= NONASCII_MASK >> 7;
1973#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
1975 return rb_popcount_intptr(d);
1979# if SIZEOF_VOIDP == 8
1988enc_strlen(
const char *p,
const char *e,
rb_encoding *enc,
int cr)
1993 if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1994 long diff = (long)(e - p);
1995 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2000 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2001 const uintptr_t *s, *t;
2002 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2003 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2004 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2005 while (p < (
const char *)s) {
2006 if (is_utf8_lead_byte(*p))
len++;
2010 len += count_utf8_lead_bytes_with_word(s);
2013 p = (
const char *)s;
2016 if (is_utf8_lead_byte(*p))
len++;
2022 else if (rb_enc_asciicompat(enc)) {
2027 q = search_nonascii(p, e);
2033 p += rb_enc_fast_mbclen(p, e, enc);
2040 q = search_nonascii(p, e);
2046 p += rb_enc_mbclen(p, e, enc);
2053 for (c=0; p<e; c++) {
2054 p += rb_enc_mbclen(p, e, enc);
2069rb_enc_strlen_cr(
const char *p,
const char *e,
rb_encoding *enc,
int *cr)
2076 if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
2077 long diff = (long)(e - p);
2078 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2080 else if (rb_enc_asciicompat(enc)) {
2084 q = search_nonascii(p, e);
2092 ret = rb_enc_precise_mbclen(p, e, enc);
2107 for (c=0; p<e; c++) {
2108 ret = rb_enc_precise_mbclen(p, e, enc);
2115 if (p + rb_enc_mbminlen(enc) <= e)
2116 p += rb_enc_mbminlen(enc);
2132 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2133 if (!enc) enc = STR_ENC_GET(str);
2134 p = RSTRING_PTR(str);
2135 e = RSTRING_END(str);
2139 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2144 return enc_strlen(p, e, enc, cr);
2151 return str_strlen(str, NULL);
2165 return LONG2NUM(str_strlen(str, NULL));
2177rb_str_bytesize(
VALUE str)
2195rb_str_empty(
VALUE str)
2197 return RBOOL(RSTRING_LEN(str) == 0);
2215 char *ptr1, *ptr2, *ptr3;
2220 enc = rb_enc_check_str(str1, str2);
2223 termlen = rb_enc_mbminlen(enc);
2224 if (len1 > LONG_MAX - len2) {
2227 str3 = str_new0(
rb_cString, 0, len1+len2, termlen);
2228 ptr3 = RSTRING_PTR(str3);
2229 memcpy(ptr3, ptr1, len1);
2230 memcpy(ptr3+len1, ptr2, len2);
2231 TERM_FILL(&ptr3[len1+len2], termlen);
2241MJIT_FUNC_EXPORTED
VALUE
2247 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2250 int enc1 = rb_enc_get_index(str1);
2251 int enc2 = rb_enc_get_index(str2);
2256 else if (enc2 < 0) {
2259 else if (enc1 != enc2) {
2262 else if (len1 > LONG_MAX - len2) {
2295 rb_enc_copy(str2, str);
2302 if (RSTRING_LEN(str) == 1 && RSTRING_PTR(str)[0] == 0) {
2303 if (STR_EMBEDDABLE_P(
len, 1)) {
2305 memset(RSTRING_PTR(str2), 0,
len + 1);
2312 STR_SET_LEN(str2,
len);
2313 rb_enc_copy(str2, str);
2316 if (
len && LONG_MAX/
len < RSTRING_LEN(str)) {
2320 len *= RSTRING_LEN(str);
2321 termlen = TERM_LEN(str);
2323 ptr2 = RSTRING_PTR(str2);
2325 n = RSTRING_LEN(str);
2326 memcpy(ptr2, RSTRING_PTR(str), n);
2327 while (n <=
len/2) {
2328 memcpy(ptr2 + n, ptr2, n);
2331 memcpy(ptr2 + n, ptr2,
len-n);
2333 STR_SET_LEN(str2,
len);
2334 TERM_FILL(&ptr2[
len], termlen);
2335 rb_enc_cr_str_copy_for_substr(str2, str);
2361 VALUE tmp = rb_check_array_type(arg);
2370rb_check_lockedtmp(
VALUE str)
2372 if (
FL_TEST(str, STR_TMPLOCK)) {
2378str_modifiable(
VALUE str)
2380 rb_check_lockedtmp(str);
2385str_dependent_p(
VALUE str)
2387 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2396str_independent(
VALUE str)
2398 str_modifiable(str);
2399 return !str_dependent_p(str);
2403str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2411 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2416 STR_SET_EMBED_LEN(str,
len);
2421 oldptr = RSTRING_PTR(str);
2423 memcpy(
ptr, oldptr,
len);
2425 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2428 STR_SET_NOEMBED(str);
2429 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2430 TERM_FILL(
ptr +
len, termlen);
2439 if (!str_independent(str))
2440 str_make_independent(str);
2447 int termlen = TERM_LEN(str);
2448 long len = RSTRING_LEN(str);
2453 if (expand >= LONG_MAX -
len) {
2457 if (!str_independent(str)) {
2458 str_make_independent_expand(str,
len, expand, termlen);
2460 else if (expand > 0) {
2461 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2468str_modify_keep_cr(
VALUE str)
2470 if (!str_independent(str))
2471 str_make_independent(str);
2478str_discard(
VALUE str)
2480 str_modifiable(str);
2481 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2482 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2483 RSTRING(str)->as.heap.ptr = 0;
2484 RSTRING(str)->as.heap.len = 0;
2495 if (!rb_enc_asciicompat(enc)) {
2515 return RSTRING_PTR(str);
2519zero_filled(
const char *s,
int n)
2521 for (; n > 0; --n) {
2528str_null_char(
const char *s,
long len,
const int minlen,
rb_encoding *enc)
2530 const char *e = s +
len;
2532 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2533 if (zero_filled(s, minlen))
return s;
2539str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2544 if (str_dependent_p(str)) {
2545 if (!zero_filled(s +
len, termlen))
2546 str_make_independent_expand(str,
len, 0L, termlen);
2549 TERM_FILL(s +
len, termlen);
2552 return RSTRING_PTR(str);
2556rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2558 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2559 long len = RSTRING_LEN(str);
2563 rb_check_lockedtmp(str);
2564 str_make_independent_expand(str,
len, 0L, termlen);
2566 else if (str_dependent_p(str)) {
2567 if (termlen > oldtermlen)
2568 str_make_independent_expand(str,
len, 0L, termlen);
2571 if (!STR_EMBED_P(str)) {
2573 assert(!
FL_TEST((str), STR_SHARED));
2576 if (termlen > oldtermlen) {
2577 TERM_FILL(RSTRING_PTR(str) +
len, termlen);
2585str_null_check(
VALUE str,
int *w)
2587 char *s = RSTRING_PTR(str);
2588 long len = RSTRING_LEN(str);
2590 const int minlen = rb_enc_mbminlen(enc);
2594 if (str_null_char(s,
len, minlen, enc)) {
2597 return str_fill_term(str, s,
len, minlen);
2600 if (!s || memchr(s, 0,
len)) {
2604 s = str_fill_term(str, s,
len, minlen);
2610rb_str_to_cstr(
VALUE str)
2613 return str_null_check(str, &w);
2621 char *s = str_null_check(str, &w);
2632rb_str_fill_terminator(
VALUE str,
const int newminlen)
2634 char *s = RSTRING_PTR(str);
2635 long len = RSTRING_LEN(str);
2636 return str_fill_term(str, s,
len, newminlen);
2642 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2666str_nth_len(
const char *p,
const char *e,
long *nthp,
rb_encoding *enc)
2669 if (rb_enc_mbmaxlen(enc) == 1) {
2672 else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
2673 p += nth * rb_enc_mbmaxlen(enc);
2675 else if (rb_enc_asciicompat(enc)) {
2676 const char *p2, *e2;
2679 while (p < e && 0 < nth) {
2686 p2 = search_nonascii(p, e2);
2695 n = rb_enc_mbclen(p, e, enc);
2706 while (p < e && nth--) {
2707 p += rb_enc_mbclen(p, e, enc);
2718 return str_nth_len(p, e, &nth, enc);
2722str_nth(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2727 p = str_nth_len(p, e, &nth, enc);
2736str_offset(
const char *p,
const char *e,
long nth,
rb_encoding *enc,
int singlebyte)
2738 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2739 if (!pp)
return e - p;
2746 return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2747 STR_ENC_GET(str), single_byte_optimizable(str));
2752str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2755 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2756 const uintptr_t *s, *t;
2757 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2758 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2759 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2760 while (p < (
const char *)s) {
2761 if (is_utf8_lead_byte(*p)) nth--;
2765 nth -= count_utf8_lead_bytes_with_word(s);
2767 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2771 if (is_utf8_lead_byte(*p)) {
2772 if (nth == 0)
break;
2782str_utf8_offset(
const char *p,
const char *e,
long nth)
2784 const char *pp = str_utf8_nth(p, e, &nth);
2793 if (single_byte_optimizable(str) || pos < 0)
2796 char *p = RSTRING_PTR(str);
2797 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
2802str_subseq(
VALUE str,
long beg,
long len)
2806 const long rstring_embed_capa_max = ((
sizeof(
struct RString) - offsetof(struct
RString,
as.
embed.
ary)) / sizeof(char)) - 1;
2808 if (!SHARABLE_SUBSTRING_P(beg,
len, RSTRING_LEN(str)) ||
2809 len <= rstring_embed_capa_max) {
2816 RSTRING(str2)->as.heap.ptr += beg;
2828 VALUE str2 = str_subseq(str, beg,
len);
2829 rb_enc_cr_str_copy_for_substr(str2, str);
2838 long blen = RSTRING_LEN(str);
2840 char *p, *s = RSTRING_PTR(str), *e = s + blen;
2842 if (
len < 0)
return 0;
2846 if (single_byte_optimizable(str)) {
2847 if (beg > blen)
return 0;
2850 if (beg < 0)
return 0;
2852 if (
len > blen - beg)
2854 if (
len < 0)
return 0;
2859 if (
len > -beg)
len = -beg;
2860 if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
2862 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
2865 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
2871 slen = str_strlen(str, enc);
2873 if (beg < 0)
return 0;
2875 if (
len == 0)
goto end;
2878 else if (beg > 0 && beg > RSTRING_LEN(str)) {
2882 if (beg > str_strlen(str, enc))
return 0;
2887 enc == rb_utf8_encoding()) {
2888 p = str_utf8_nth(s, e, &beg);
2889 if (beg > 0)
return 0;
2890 len = str_utf8_offset(p, e,
len);
2893 else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
2894 int char_sz = rb_enc_mbmaxlen(enc);
2896 p = s + beg * char_sz;
2900 else if (
len * char_sz > e - p)
2905 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
2906 if (beg > 0)
return 0;
2910 len = str_offset(p, e,
len, enc, 0);
2918static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
2923 return str_substr(str, beg,
len, TRUE);
2927str_substr(
VALUE str,
long beg,
long len,
int empty)
2931 if (!p)
return Qnil;
2932 if (!
len && !empty)
return Qnil;
2934 beg = p - RSTRING_PTR(str);
2936 VALUE str2 = str_subseq(str, beg,
len);
2937 rb_enc_cr_str_copy_for_substr(str2, str);
2981str_uminus(
VALUE str)
2986 return rb_fstring(str);
2990#define rb_str_dup_frozen rb_str_new_frozen
2995 if (
FL_TEST(str, STR_TMPLOCK)) {
2998 FL_SET(str, STR_TMPLOCK);
3005 if (!
FL_TEST(str, STR_TMPLOCK)) {
3012RUBY_FUNC_EXPORTED
VALUE
3016 return rb_ensure(func, arg, rb_str_unlocktmp, str);
3023 const int termlen = TERM_LEN(str);
3025 str_modifiable(str);
3026 if (STR_SHARED_P(str)) {
3029 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3032 STR_SET_LEN(str,
len);
3033 TERM_FILL(&RSTRING_PTR(str)[
len], termlen);
3043 int independent = str_independent(str);
3044 long slen = RSTRING_LEN(str);
3052 const int termlen = TERM_LEN(str);
3053 if (STR_EMBED_P(str)) {
3054 if (
len == slen)
return str;
3055 if (str_embed_capa(str) >=
len + termlen) {
3056 STR_SET_EMBED_LEN(str,
len);
3060 str_make_independent_expand(str, slen,
len - slen, termlen);
3062 else if (str_embed_capa(str) >=
len + termlen) {
3063 char *
ptr = STR_HEAP_PTR(str);
3065 if (slen >
len) slen =
len;
3068 STR_SET_EMBED_LEN(str,
len);
3069 if (independent) ruby_xfree(
ptr);
3072 else if (!independent) {
3073 if (
len == slen)
return str;
3074 str_make_independent_expand(str, slen,
len - slen, termlen);
3078 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3079 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3082 else if (
len == slen)
return str;
3090str_buf_cat4(
VALUE str,
const char *
ptr,
long len,
bool keep_cr)
3093 str_modify_keep_cr(str);
3098 if (
len == 0)
return 0;
3100 long capa, total, olen, off = -1;
3102 const int termlen = TERM_LEN(str);
3108 if (
ptr >= sptr &&
ptr <= sptr + olen) {
3112 if (STR_EMBED_P(str)) {
3113 capa = str_embed_capa(str) - termlen;
3114 sptr =
RSTRING(str)->as.embed.ary;
3115 olen = RSTRING_EMBED_LEN(str);
3119 sptr =
RSTRING(str)->as.heap.ptr;
3120 olen =
RSTRING(str)->as.heap.len;
3122 if (olen > LONG_MAX -
len) {
3127 if (total >= LONG_MAX / 2) {
3130 while (total >
capa) {
3133 RESIZE_CAPA_TERM(str,
capa, termlen);
3134 sptr = RSTRING_PTR(str);
3139 memcpy(sptr + olen,
ptr,
len);
3140 STR_SET_LEN(str, total);
3141 TERM_FILL(sptr + total, termlen);
3146#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3147#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3152 if (
len == 0)
return str;
3156 return str_buf_cat(str,
ptr,
len);
3171rb_enc_cr_str_buf_cat(
VALUE str,
const char *
ptr,
long len,
3172 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3181 if (str_encindex == ptr_encindex) {
3183 ptr_cr = coderange_scan(
ptr,
len, rb_enc_from_index(ptr_encindex));
3187 str_enc = rb_enc_from_index(str_encindex);
3188 ptr_enc = rb_enc_from_index(ptr_encindex);
3189 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3192 if (RSTRING_LEN(str) == 0) {
3195 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3201 ptr_cr = coderange_scan(
ptr,
len, ptr_enc);
3210 *ptr_cr_ret = ptr_cr;
3212 if (str_encindex != ptr_encindex &&
3215 str_enc = rb_enc_from_index(str_encindex);
3216 ptr_enc = rb_enc_from_index(ptr_encindex);
3221 res_encindex = str_encindex;
3226 res_encindex = str_encindex;
3230 res_encindex = ptr_encindex;
3235 res_encindex = str_encindex;
3242 res_encindex = str_encindex;
3250 str_buf_cat(str,
ptr,
len);
3256 rb_enc_name(str_enc), rb_enc_name(ptr_enc));
3263 return rb_enc_cr_str_buf_cat(str,
ptr,
len,
3273 if (rb_enc_asciicompat(enc)) {
3274 return rb_enc_cr_str_buf_cat(str,
ptr, strlen(
ptr),
3278 char *buf =
ALLOCA_N(
char, rb_enc_mbmaxlen(enc));
3280 unsigned int c = (
unsigned char)*
ptr;
3281 int len = rb_enc_codelen(c, enc);
3282 rb_enc_mbcput(c, buf, enc);
3283 rb_enc_cr_str_buf_cat(str, buf,
len,
3296 if (str_enc_fastpath(str)) {
3300 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3305 str_buf_cat4(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
true);
3315 rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
3330#define MIN_PRE_ALLOC_SIZE 48
3332MJIT_FUNC_EXPORTED
VALUE
3333rb_str_concat_literals(
size_t num,
const VALUE *strary)
3342 for (i = 0; i < num; ++i) {
len += RSTRING_LEN(strary[i]); }
3343 if (LIKELY(
len < MIN_PRE_ALLOC_SIZE)) {
3349 rb_enc_copy(str, strary[0]);
3353 for (i = s; i < num; ++i) {
3354 const VALUE v = strary[i];
3358 if (encidx != ENCINDEX_US_ASCII) {
3360 rb_enc_set_index(str, encidx);
3385rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3387 str_modifiable(str);
3392 else if (argc > 1) {
3395 rb_enc_copy(arg_str, str);
3396 for (i = 0; i < argc; i++) {
3431 if (rb_num_to_uint(str2, &code) == 0) {
3444 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3447 buf[0] = (char)code;
3449 if (encidx != rb_enc_to_index(enc)) {
3450 rb_enc_associate_index(str1, encidx);
3455 long pos = RSTRING_LEN(str1);
3460 switch (
len = rb_enc_codelen(code, enc)) {
3461 case ONIGERR_INVALID_CODE_POINT_VALUE:
3464 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3470 rb_enc_mbcput(code, buf, enc);
3471 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
3475 memcpy(RSTRING_PTR(str1) + pos, buf,
len);
3484rb_ascii8bit_appendable_encoding_index(
rb_encoding *enc,
unsigned int code)
3486 int encidx = rb_enc_to_index(enc);
3488 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3493 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3494 return ENCINDEX_ASCII_8BIT;
3517rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
3519 str_modifiable(str);
3524 else if (argc > 1) {
3527 rb_enc_copy(arg_str, str);
3528 for (i = 0; i < argc; i++) {
3541 if (e && is_ascii_string(str)) {
3544 return rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str)) ^ e;
3551 const char *ptr1, *ptr2;
3554 return (len1 != len2 ||
3556 memcmp(ptr1, ptr2, len1) != 0);
3570rb_str_hash_m(
VALUE str)
3576#define lesser(a,b) (((a)>(b))?(b):(a))
3584 if (RSTRING_LEN(str1) == 0)
return TRUE;
3585 if (RSTRING_LEN(str2) == 0)
return TRUE;
3588 if (idx1 == idx2)
return TRUE;
3593 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
3597 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
3607 const char *ptr1, *ptr2;
3610 if (str1 == str2)
return 0;
3613 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
3622 if (len1 > len2)
return 1;
3625 if (retval > 0)
return 1;
3652 if (str1 == str2)
return Qtrue;
3659 return rb_str_eql_internal(str1, str2);
3680MJIT_FUNC_EXPORTED
VALUE
3683 if (str1 == str2)
return Qtrue;
3685 return rb_str_eql_internal(str1, str2);
3716 return rb_invcmp(str1, str2);
3758 return str_casecmp(str1, s);
3766 const char *p1, *p1end, *p2, *p2end;
3768 enc = rb_enc_compatible(str1, str2);
3773 p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
3774 p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
3775 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
3776 while (p1 < p1end && p2 < p2end) {
3778 unsigned int c1 =
TOLOWER(*p1 & 0xff);
3779 unsigned int c2 =
TOLOWER(*p2 & 0xff);
3781 return INT2FIX(c1 < c2 ? -1 : 1);
3788 while (p1 < p1end && p2 < p2end) {
3789 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
3790 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
3792 if (0 <= c1 && 0 <= c2) {
3796 return INT2FIX(c1 < c2 ? -1 : 1);
3800 l1 = rb_enc_mbclen(p1, p1end, enc);
3801 l2 = rb_enc_mbclen(p2, p2end, enc);
3802 len = l1 < l2 ? l1 : l2;
3803 r = memcmp(p1, p2,
len);
3805 return INT2FIX(r < 0 ? -1 : 1);
3807 return INT2FIX(l1 < l2 ? -1 : 1);
3813 if (RSTRING_LEN(str1) == RSTRING_LEN(str2))
return INT2FIX(0);
3814 if (RSTRING_LEN(str1) > RSTRING_LEN(str2))
return INT2FIX(1);
3848 return str_casecmp_p(str1, s);
3855 VALUE folded_str1, folded_str2;
3856 VALUE fold_opt = sym_fold;
3858 enc = rb_enc_compatible(str1, str2);
3863 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
3864 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
3866 return rb_str_eql(folded_str1, folded_str2);
3870strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
3871 const char *sub_ptr,
long sub_len,
long offset,
rb_encoding *enc)
3873 const char *search_start = str_ptr;
3874 long pos, search_len = str_len - offset;
3878 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
3879 if (pos < 0)
return pos;
3880 t = rb_enc_right_char_head(search_start, search_start+pos, str_ptr_end, enc);
3881 if (t == search_start + pos)
break;
3882 search_len -= t - search_start;
3883 if (search_len <= 0)
return -1;
3884 offset += t - search_start;
3887 return pos + offset;
3890#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
3893rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
3895 const char *str_ptr, *str_ptr_end, *sub_ptr;
3896 long str_len, sub_len;
3899 enc = rb_enc_check(str, sub);
3900 if (is_broken_string(sub))
return -1;
3902 str_ptr = RSTRING_PTR(str);
3903 str_ptr_end = RSTRING_END(str);
3904 str_len = RSTRING_LEN(str);
3905 sub_ptr = RSTRING_PTR(sub);
3906 sub_len = RSTRING_LEN(sub);
3908 if (str_len < sub_len)
return -1;
3911 long str_len_char, sub_len_char;
3912 int single_byte = single_byte_optimizable(str);
3913 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
3914 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
3916 offset += str_len_char;
3917 if (offset < 0)
return -1;
3919 if (str_len_char - offset < sub_len_char)
return -1;
3920 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
3923 if (sub_len == 0)
return offset;
3926 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
3940rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
3946 if (rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
3953 pos += str_strlen(str, NULL);
3963 if (pos > str_strlen(str, NULL))
3965 pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
3966 rb_enc_check(str, sub), single_byte_optimizable(str));
3980 pos = rb_str_index(str, sub, pos);
3984 if (pos == -1)
return Qnil;
3993str_check_byte_pos(
VALUE str,
long pos)
3995 const char *s = RSTRING_PTR(str);
3996 const char *e = RSTRING_END(str);
3997 const char *p = s + pos;
3998 const char *pp = rb_enc_left_char_head(s, p, e, rb_enc_get(str));
4044rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4050 if (rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4057 pos += RSTRING_LEN(str);
4066 if (!str_check_byte_pos(str, pos)) {
4068 "offset %ld does not land on character boundary", pos);
4072 if (pos > RSTRING_LEN(str))
4086 pos = rb_strseq_index(str, sub, pos, 1);
4089 if (pos == -1)
return Qnil;
4097 char *hit, *adjusted;
4099 long slen, searchlen;
4102 sbeg = RSTRING_PTR(str);
4103 slen = RSTRING_LEN(sub);
4104 if (slen == 0)
return s - sbeg;
4105 e = RSTRING_END(str);
4106 t = RSTRING_PTR(sub);
4108 searchlen = s - sbeg + 1;
4111 hit = memrchr(sbeg, c, searchlen);
4113 adjusted = rb_enc_left_char_head(sbeg, hit, e, enc);
4114 if (hit != adjusted) {
4115 searchlen = adjusted - sbeg;
4118 if (memcmp(hit, t, slen) == 0)
4120 searchlen = adjusted - sbeg;
4121 }
while (searchlen > 0);
4132 sbeg = RSTRING_PTR(str);
4133 e = RSTRING_END(str);
4134 t = RSTRING_PTR(sub);
4135 slen = RSTRING_LEN(sub);
4138 if (memcmp(s, t, slen) == 0) {
4141 if (s <= sbeg)
break;
4142 s = rb_enc_prev_char(sbeg, s, e, enc);
4157 enc = rb_enc_check(str, sub);
4158 if (is_broken_string(sub))
return -1;
4159 singlebyte = single_byte_optimizable(str);
4160 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4161 slen = str_strlen(sub, enc);
4164 if (len < slen)
return -1;
4165 if (len - pos < slen) pos = len - slen;
4166 if (len == 0)
return pos;
4168 sbeg = RSTRING_PTR(str);
4171 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4177 s = str_nth(sbeg, RSTRING_END(str), pos, enc, singlebyte);
4239rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4244 long pos, len = str_strlen(str, enc);
4246 if (rb_scan_args(argc, argv,
"11", &sub, &vpos) == 2) {
4257 if (pos > len) pos = len;
4265 pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
4266 enc, single_byte_optimizable(str));
4277 pos = rb_str_rindex(str, sub, pos);
4278 if (pos >= 0)
return LONG2NUM(pos);
4284rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4290 enc = rb_enc_check(str, sub);
4291 if (is_broken_string(sub))
return -1;
4292 len = RSTRING_LEN(str);
4293 slen = RSTRING_LEN(sub);
4296 if (len < slen)
return -1;
4297 if (len - pos < slen) pos = len - slen;
4298 if (len == 0)
return pos;
4300 sbeg = RSTRING_PTR(str);
4303 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4310 return str_rindex(str, sub, s, enc);
4375rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4379 long pos, len = RSTRING_LEN(str);
4381 if (rb_scan_args(argc, argv,
"11", &sub, &vpos) == 2) {
4392 if (pos > len) pos = len;
4398 if (!str_check_byte_pos(str, pos)) {
4400 "offset %ld does not land on character boundary", pos);
4413 pos = rb_str_byterindex(str, sub, pos);
4414 if (pos >= 0)
return LONG2NUM(pos);
4450 switch (OBJ_BUILTIN_TYPE(y)) {
4458 return rb_funcall(y, idEqTilde, 1, x);
4502rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
4506 rb_check_arity(argc, 1, 2);
4509 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
4541rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
4544 rb_check_arity(argc, 1, 2);
4545 re = get_pat(argv[0]);
4546 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
4555static enum neighbor_char
4561 if (rb_enc_mbminlen(enc) > 1) {
4563 int r = rb_enc_precise_mbclen(p, p + len, enc), c;
4565 return NEIGHBOR_NOT_CHAR;
4567 c = rb_enc_mbc_to_codepoint(p, p + len, enc) + 1;
4568 l = rb_enc_code_to_mbclen(c, enc);
4569 if (!l)
return NEIGHBOR_NOT_CHAR;
4570 if (l != len)
return NEIGHBOR_WRAPPED;
4571 rb_enc_mbcput(c, p, enc);
4572 r = rb_enc_precise_mbclen(p, p + len, enc);
4574 return NEIGHBOR_NOT_CHAR;
4576 return NEIGHBOR_FOUND;
4579 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
4582 return NEIGHBOR_WRAPPED;
4583 ++((
unsigned char*)p)[i];
4584 l = rb_enc_precise_mbclen(p, p+len, enc);
4588 return NEIGHBOR_FOUND;
4591 memset(p+l, 0xff, len-l);
4597 for (len2 = len-1; 0 < len2; len2--) {
4598 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4602 memset(p+len2+1, 0xff, len-(len2+1));
4607static enum neighbor_char
4612 if (rb_enc_mbminlen(enc) > 1) {
4614 int r = rb_enc_precise_mbclen(p, p + len, enc), c;
4616 return NEIGHBOR_NOT_CHAR;
4618 c = rb_enc_mbc_to_codepoint(p, p + len, enc);
4619 if (!c)
return NEIGHBOR_NOT_CHAR;
4621 l = rb_enc_code_to_mbclen(c, enc);
4622 if (!l)
return NEIGHBOR_NOT_CHAR;
4623 if (l != len)
return NEIGHBOR_WRAPPED;
4624 rb_enc_mbcput(c, p, enc);
4625 r = rb_enc_precise_mbclen(p, p + len, enc);
4627 return NEIGHBOR_NOT_CHAR;
4629 return NEIGHBOR_FOUND;
4632 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
4635 return NEIGHBOR_WRAPPED;
4636 --((
unsigned char*)p)[i];
4637 l = rb_enc_precise_mbclen(p, p+len, enc);
4641 return NEIGHBOR_FOUND;
4644 memset(p+l, 0, len-l);
4650 for (len2 = len-1; 0 < len2; len2--) {
4651 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
4655 memset(p+len2+1, 0, len-(len2+1));
4669static enum neighbor_char
4670enc_succ_alnum_char(
char *p,
long len,
rb_encoding *enc,
char *carry)
4672 enum neighbor_char ret;
4676 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
4680 const int max_gaps = 1;
4682 c = rb_enc_mbc_to_codepoint(p, p+len, enc);
4683 if (rb_enc_isctype(c, ONIGENC_CTYPE_DIGIT, enc))
4684 ctype = ONIGENC_CTYPE_DIGIT;
4685 else if (rb_enc_isctype(c, ONIGENC_CTYPE_ALPHA, enc))
4686 ctype = ONIGENC_CTYPE_ALPHA;
4688 return NEIGHBOR_NOT_CHAR;
4690 MEMCPY(save, p,
char, len);
4691 for (
try = 0;
try <= max_gaps; ++
try) {
4692 ret = enc_succ_char(p, len, enc);
4693 if (ret == NEIGHBOR_FOUND) {
4694 c = rb_enc_mbc_to_codepoint(p, p+len, enc);
4695 if (rb_enc_isctype(c, ctype, enc))
4696 return NEIGHBOR_FOUND;
4699 MEMCPY(p, save,
char, len);
4702 MEMCPY(save, p,
char, len);
4703 ret = enc_pred_char(p, len, enc);
4704 if (ret == NEIGHBOR_FOUND) {
4705 c = rb_enc_mbc_to_codepoint(p, p+len, enc);
4706 if (!rb_enc_isctype(c, ctype, enc)) {
4707 MEMCPY(p, save,
char, len);
4712 MEMCPY(p, save,
char, len);
4718 return NEIGHBOR_NOT_CHAR;
4721 if (ctype != ONIGENC_CTYPE_DIGIT) {
4722 MEMCPY(carry, p,
char, len);
4723 return NEIGHBOR_WRAPPED;
4726 MEMCPY(carry, p,
char, len);
4727 enc_succ_char(carry, len, enc);
4728 return NEIGHBOR_WRAPPED;
4797 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
4798 rb_enc_cr_str_copy_for_substr(str, orig);
4799 return str_succ(str);
4806 char *sbeg, *s, *e, *last_alnum = 0;
4807 int found_alnum = 0;
4809 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
4810 long carry_pos = 0, carry_len = 1;
4811 enum neighbor_char neighbor = NEIGHBOR_FOUND;
4813 slen = RSTRING_LEN(str);
4814 if (slen == 0)
return str;
4816 enc = STR_ENC_GET(str);
4817 sbeg = RSTRING_PTR(str);
4818 s = e = sbeg + slen;
4820 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
4821 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
4827 l = rb_enc_precise_mbclen(s, e, enc);
4828 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4829 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4830 neighbor = enc_succ_alnum_char(s, l, enc, carry);
4832 case NEIGHBOR_NOT_CHAR:
4834 case NEIGHBOR_FOUND:
4836 case NEIGHBOR_WRAPPED:
4841 carry_pos = s - sbeg;
4846 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
4847 enum neighbor_char neighbor;
4848 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
4849 l = rb_enc_precise_mbclen(s, e, enc);
4850 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
4851 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
4853 neighbor = enc_succ_char(tmp, l, enc);
4855 case NEIGHBOR_FOUND:
4859 case NEIGHBOR_WRAPPED:
4862 case NEIGHBOR_NOT_CHAR:
4865 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
4867 enc_succ_char(s, l, enc);
4869 if (!rb_enc_asciicompat(enc)) {
4870 MEMCPY(carry, s,
char, l);
4873 carry_pos = s - sbeg;
4877 RESIZE_CAPA(str, slen + carry_len);
4878 sbeg = RSTRING_PTR(str);
4879 s = sbeg + carry_pos;
4880 memmove(s + carry_len, s, slen - carry_pos);
4881 memmove(s, carry, carry_len);
4883 STR_SET_LEN(str, slen);
4884 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
4900rb_str_succ_bang(
VALUE str)
4908all_digits_p(
const char *s,
long len)
4962 VALUE end, exclusive;
4964 rb_scan_args(argc, argv,
"11", &end, &exclusive);
4966 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
4972 VALUE current, after_end;
4979 enc = rb_enc_check(beg, end);
4980 ascii = (is_ascii_string(beg) && is_ascii_string(end));
4982 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
4983 char c = RSTRING_PTR(beg)[0];
4984 char e = RSTRING_PTR(end)[0];
4986 if (c > e || (excl && c == e))
return beg;
4988 if ((*each)(rb_enc_str_new(&c, 1, enc), arg))
break;
4989 if (!excl && c == e)
break;
4991 if (excl && c == e)
break;
4996 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
4997 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
4998 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5002 width = RSTRING_LENINT(beg);
5003 b = rb_str_to_inum(beg, 10, FALSE);
5004 e = rb_str_to_inum(end, 10, FALSE);
5011 if (excl && bi == ei)
break;
5012 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5017 ID op = excl ?
'<' : idLE;
5018 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5021 while (rb_funcall(b, op, 1, e)) {
5023 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5024 b = rb_funcallv(b, succ, 0, 0);
5031 if (n > 0 || (excl && n == 0))
return beg;
5033 after_end = rb_funcallv(end, succ, 0, 0);
5038 next = rb_funcallv(current, succ, 0, 0);
5039 if ((*each)(current, arg))
break;
5040 if (
NIL_P(next))
break;
5044 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5059 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5060 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5061 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5062 int width = RSTRING_LENINT(beg);
5063 b = rb_str_to_inum(beg, 10, FALSE);
5069 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5077 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5078 b = rb_funcallv(b, succ, 0, 0);
5084 VALUE next = rb_funcallv(current, succ, 0, 0);
5085 if ((*each)(current, arg))
break;
5088 if (RSTRING_LEN(current) == 0)
5099 if (!
rb_equal(str, *argp))
return 0;
5113 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5114 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5115 rb_enc_asciicompat(STR_ENC_GET(val))) {
5116 const char *bp = RSTRING_PTR(beg);
5117 const char *ep = RSTRING_PTR(end);
5118 const char *vp = RSTRING_PTR(val);
5119 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5120 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5128 if (b <= v && v < e)
return Qtrue;
5129 return RBOOL(!
RTEST(exclusive) && v == e);
5136 all_digits_p(bp, RSTRING_LEN(beg)) &&
5137 all_digits_p(ep, RSTRING_LEN(end))) {
5142 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5144 return RBOOL(
NIL_P(val));
5166 else if (RB_TYPE_P(indx,
T_REGEXP)) {
5167 return rb_str_subpat(str, indx,
INT2FIX(0));
5169 else if (RB_TYPE_P(indx,
T_STRING)) {
5170 if (rb_str_index(str, indx, 0) != -1)
5176 long beg, len = str_strlen(str, NULL);
5188 return str_substr(str, idx, 1, FALSE);
5207rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5210 if (RB_TYPE_P(argv[0],
T_REGEXP)) {
5211 return rb_str_subpat(str, argv[0], argv[1]);
5219 rb_check_arity(argc, 1, 2);
5220 return rb_str_aref(str, argv[0]);
5226 char *ptr = RSTRING_PTR(str);
5227 long olen = RSTRING_LEN(str), nlen;
5229 str_modifiable(str);
5230 if (len > olen) len = olen;
5232 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5234 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5236 STR_SET_EMBED_LEN(str, nlen);
5237 ptr =
RSTRING(str)->as.embed.ary;
5238 memmove(ptr, oldptr + len, nlen);
5239 if (fl == STR_NOEMBED)
xfree(oldptr);
5242 if (!STR_SHARED_P(str)) {
5244 rb_enc_cr_str_exact_copy(shared, str);
5247 ptr =
RSTRING(str)->as.heap.ptr += len;
5248 RSTRING(str)->as.heap.len = nlen;
5256rb_str_splice_0(
VALUE str,
long beg,
long len,
VALUE val)
5259 long slen, vlen = RSTRING_LEN(val);
5262 if (beg == 0 && vlen == 0) {
5267 str_modify_keep_cr(str);
5271 RESIZE_CAPA(str, slen + vlen - len);
5272 sptr = RSTRING_PTR(str);
5281 memmove(sptr + beg + vlen,
5283 slen - (beg + len));
5285 if (vlen < beg && len < 0) {
5286 MEMZERO(sptr + slen,
char, -len);
5289 memmove(sptr + beg, RSTRING_PTR(val), vlen);
5292 STR_SET_LEN(str, slen);
5293 TERM_FILL(&sptr[slen], TERM_LEN(str));
5303 int singlebyte = single_byte_optimizable(str);
5309 enc = rb_enc_check(str, val);
5310 slen = str_strlen(str, enc);
5312 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5319 assert(beg <= slen);
5320 if (len > slen - beg) {
5323 p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
5324 if (!p) p = RSTRING_END(str);
5325 e = str_nth(p, RSTRING_END(str), len, enc, singlebyte);
5326 if (!e) e = RSTRING_END(str);
5328 beg = p - RSTRING_PTR(str);
5330 rb_str_splice_0(str, beg, len, val);
5331 rb_enc_associate(str, enc);
5337#define rb_str_splice(str, beg, len, val) rb_str_update(str, beg, len, val)
5344 long start, end, len;
5354 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5358 nth += regs->num_regs;
5368 enc = rb_enc_check_str(str, val);
5369 rb_str_splice_0(str, start, len, val);
5370 rb_enc_associate(str, enc);
5378 switch (
TYPE(indx)) {
5380 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5384 beg = rb_str_index(str, indx, 0);
5389 rb_str_splice(str, beg, str_strlen(indx, NULL), val);
5397 rb_str_splice(str, beg, len, val);
5405 rb_str_splice(str, idx, 1, val);
5440rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5443 if (RB_TYPE_P(argv[0],
T_REGEXP)) {
5444 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5451 rb_check_arity(argc, 2, 3);
5452 return rb_str_aset(str, argv[0], argv[1]);
5484 rb_str_splice(str, pos, 0, str2);
5512rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
5519 rb_check_arity(argc, 1, 2);
5520 str_modify_keep_cr(str);
5528 if ((nth += regs->num_regs) <= 0)
return Qnil;
5530 else if (nth >= regs->num_regs)
return Qnil;
5532 len = END(nth) - beg;
5535 else if (argc == 2) {
5543 if (!len)
return Qnil;
5544 beg = p - RSTRING_PTR(str);
5547 else if (RB_TYPE_P(indx,
T_STRING)) {
5548 beg = rb_str_index(str, indx, 0);
5549 if (beg == -1)
return Qnil;
5550 len = RSTRING_LEN(indx);
5561 if (!len)
return Qnil;
5562 beg = p - RSTRING_PTR(str);
5571 beg = p - RSTRING_PTR(str);
5574 result =
rb_str_new(RSTRING_PTR(str)+beg, len);
5575 rb_enc_cr_str_copy_for_substr(result, str);
5583 char *sptr = RSTRING_PTR(str);
5584 long slen = RSTRING_LEN(str);
5585 if (beg + len > slen)
5589 slen - (beg + len));
5591 STR_SET_LEN(str, slen);
5592 TERM_FILL(&sptr[slen], TERM_LEN(str));
5603 switch (OBJ_BUILTIN_TYPE(pat)) {
5622get_pat_quoted(
VALUE pat,
int check)
5626 switch (OBJ_BUILTIN_TYPE(pat)) {
5640 if (check && is_broken_string(pat)) {
5647rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
5650 pos = rb_strseq_index(str, pat, pos, 1);
5651 if (set_backref_str) {
5653 str = rb_str_new_frozen_String(str);
5654 rb_backref_set_string(str, pos, RSTRING_LEN(pat));
5663 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
5683rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
5691 rb_check_arity(argc, min_arity, 2);
5697 hash = rb_check_hash_type(argv[1]);
5703 pat = get_pat_quoted(argv[0], 1);
5705 str_modifiable(str);
5706 beg = rb_pat_search(pat, str, 0, 1);
5720 end0 = beg0 + RSTRING_LEN(pat);
5729 if (iter || !
NIL_P(hash)) {
5730 p = RSTRING_PTR(str); len = RSTRING_LEN(str);
5736 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
5739 str_mod_check(str, p, len);
5746 enc = rb_enc_compatible(str, repl);
5749 p = RSTRING_PTR(str); len = RSTRING_LEN(str);
5753 rb_enc_name(str_enc),
5754 rb_enc_name(STR_ENC_GET(repl)));
5756 enc = STR_ENC_GET(repl);
5759 rb_enc_associate(str, enc);
5769 rlen = RSTRING_LEN(repl);
5770 len = RSTRING_LEN(str);
5772 RESIZE_CAPA(str, len + rlen - plen);
5774 p = RSTRING_PTR(str);
5776 memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
5778 rp = RSTRING_PTR(repl);
5779 memmove(p + beg0, rp, rlen);
5781 STR_SET_LEN(str, len);
5782 TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
5809 rb_str_sub_bang(argc, argv, str);
5814str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
5818 long beg, beg0, end0;
5819 long offset, blen, slen, len, last;
5820 enum {STR, ITER, MAP} mode = STR;
5822 int need_backref = -1;
5832 hash = rb_check_hash_type(argv[1]);
5841 rb_error_arity(argc, 1, 2);
5844 pat = get_pat_quoted(argv[0], 1);
5845 beg = rb_pat_search(pat, str, 0, need_backref);
5847 if (bang)
return Qnil;
5852 blen = RSTRING_LEN(str) + 30;
5854 sp = RSTRING_PTR(str);
5855 slen = RSTRING_LEN(str);
5857 str_enc = STR_ENC_GET(str);
5858 rb_enc_associate(dest, str_enc);
5866 end0 = beg0 + RSTRING_LEN(pat);
5880 val = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
5883 str_mod_check(str, sp, slen);
5888 else if (need_backref) {
5890 if (need_backref < 0) {
5891 need_backref = val != repl;
5898 len = beg0 - offset;
5912 if (RSTRING_LEN(str) <= end0)
break;
5913 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
5915 offset = end0 + len;
5917 cp = RSTRING_PTR(str) + offset;
5918 if (offset > RSTRING_LEN(str))
break;
5919 beg = rb_pat_search(pat, str, offset, need_backref);
5921 if (RSTRING_LEN(str) > offset) {
5924 rb_pat_search(pat, str, last, 1);
5926 str_shared_replace(str, dest);
5954rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
5956 str_modify_keep_cr(str);
5957 return str_gsub(argc, argv, str, 1);
5980 return str_gsub(argc, argv, str, 0);
5998 str_modifiable(str);
5999 if (str == str2)
return str;
6003 return str_replace(str, str2);
6018rb_str_clear(
VALUE str)
6022 STR_SET_EMBED_LEN(str, 0);
6023 RSTRING_PTR(str)[0] = 0;
6024 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6043rb_str_chr(
VALUE str)
6067 pos += RSTRING_LEN(str);
6068 if (pos < 0 || RSTRING_LEN(str) <= pos)
6071 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6090 long len = RSTRING_LEN(str);
6091 char *ptr, *head, *left = 0;
6095 if (pos < -len || len <= pos)
6102 char byte = (char)(
NUM2INT(w) & 0xFF);
6104 if (!str_independent(str))
6105 str_make_independent(str);
6106 enc = STR_ENC_GET(str);
6107 head = RSTRING_PTR(str);
6109 if (!STR_EMBED_P(str)) {
6116 nlen = rb_enc_precise_mbclen(left, head+len, enc);
6123 left = rb_enc_left_char_head(head, ptr, head+len, enc);
6124 width = rb_enc_precise_mbclen(left, head+len, enc);
6126 nlen = rb_enc_precise_mbclen(left, head+len, enc);
6142str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6144 long n = RSTRING_LEN(str);
6146 if (beg > n || len < 0)
return Qnil;
6149 if (beg < 0)
return Qnil;
6154 if (!empty)
return Qnil;
6158 VALUE str2 = str_subseq(str, beg, len);
6160 str_enc_copy(str2, str);
6162 if (RSTRING_LEN(str2) == 0) {
6163 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6191 long beg, len = RSTRING_LEN(str);
6199 return str_byte_substr(str, beg, len, TRUE);
6204 return str_byte_substr(str, idx, 1, FALSE);
6251rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6256 return str_byte_substr(str, beg, len, TRUE);
6258 rb_check_arity(argc, 1, 2);
6259 return str_byte_aref(str, argv[0]);
6279rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6281 long beg, end, len, slen;
6286 rb_check_arity(argc, 2, 3);
6290 rb_builtin_class_name(argv[0]));
6300 slen = RSTRING_LEN(str);
6301 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
6308 assert(beg <= slen);
6309 if (len > slen - beg) {
6313 if (!str_check_byte_pos(str, beg)) {
6315 "offset %ld does not land on character boundary", beg);
6317 if (!str_check_byte_pos(str, end)) {
6319 "offset %ld does not land on character boundary", end);
6322 enc = rb_enc_check(str, val);
6323 str_modify_keep_cr(str);
6324 rb_str_splice_0(str, beg, len, val);
6325 rb_enc_associate(str, enc);
6343rb_str_reverse(
VALUE str)
6350 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6351 enc = STR_ENC_GET(str);
6353 s = RSTRING_PTR(str); e = RSTRING_END(str);
6354 p = RSTRING_END(rev);
6357 if (RSTRING_LEN(str) > 1) {
6358 if (single_byte_optimizable(str)) {
6365 int clen = rb_enc_fast_mbclen(s, e, enc);
6373 cr = rb_enc_asciicompat(enc) ?
6376 int clen = rb_enc_mbclen(s, e, enc);
6385 STR_SET_LEN(rev, RSTRING_LEN(str));
6386 str_enc_copy(rev, str);
6406rb_str_reverse_bang(
VALUE str)
6408 if (RSTRING_LEN(str) > 1) {
6409 if (single_byte_optimizable(str)) {
6412 str_modify_keep_cr(str);
6413 s = RSTRING_PTR(str);
6414 e = RSTRING_END(str) - 1;
6422 str_shared_replace(str, rb_str_reverse(str));
6426 str_modify_keep_cr(str);
6451 i = rb_str_index(str, arg, 0);
6453 return RBOOL(i != -1);
6494 if (rb_check_arity(argc, 0, 1) && (base =
NUM2INT(argv[0])) < 0) {
6497 return rb_str_to_inum(str, base, FALSE);
6521rb_str_to_f(
VALUE str)
6539rb_str_to_s(
VALUE str)
6551 char s[RUBY_MAX_CHAR_LEN];
6552 int n = rb_enc_codelen(c, enc);
6554 rb_enc_mbcput(c, s, enc);
6559#define CHAR_ESC_LEN 13
6562rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
6564 char buf[CHAR_ESC_LEN + 1];
6572 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
6574 else if (c < 0x10000) {
6575 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
6578 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
6583 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
6586 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
6589 l = (int)strlen(buf);
6595ruby_escaped_char(
int c)
6598 case '\0':
return "\\0";
6599 case '\n':
return "\\n";
6600 case '\r':
return "\\r";
6601 case '\t':
return "\\t";
6602 case '\f':
return "\\f";
6603 case '\013':
return "\\v";
6604 case '\010':
return "\\b";
6605 case '\007':
return "\\a";
6606 case '\033':
return "\\e";
6607 case '\x7f':
return "\\c?";
6613rb_str_escape(
VALUE str)
6617 const char *p = RSTRING_PTR(str);
6618 const char *pend = RSTRING_END(str);
6619 const char *prev = p;
6620 char buf[CHAR_ESC_LEN + 1];
6622 int unicode_p = rb_enc_unicode_p(enc);
6623 int asciicompat = rb_enc_asciicompat(enc);
6628 int n = rb_enc_precise_mbclen(p, pend, enc);
6630 if (p > prev) str_buf_cat(result, prev, p - prev);
6631 n = rb_enc_mbminlen(enc);
6633 n = (int)(pend - p);
6635 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6636 str_buf_cat(result, buf, strlen(buf));
6642 c = rb_enc_mbc_to_codepoint(p, pend, enc);
6644 cc = ruby_escaped_char(c);
6646 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6647 str_buf_cat(result, cc, strlen(cc));
6650 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
6653 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6654 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6658 if (p > prev) str_buf_cat(result, prev, p - prev);
6682 const char *p, *pend, *prev;
6683 char buf[CHAR_ESC_LEN + 1];
6685 rb_encoding *resenc = rb_default_internal_encoding();
6686 int unicode_p = rb_enc_unicode_p(enc);
6687 int asciicompat = rb_enc_asciicompat(enc);
6689 if (resenc == NULL) resenc = rb_default_external_encoding();
6690 if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
6691 rb_enc_associate(result, resenc);
6692 str_buf_cat2(result,
"\"");
6694 p = RSTRING_PTR(str); pend = RSTRING_END(str);
6700 n = rb_enc_precise_mbclen(p, pend, enc);
6702 if (p > prev) str_buf_cat(result, prev, p - prev);
6703 n = rb_enc_mbminlen(enc);
6705 n = (int)(pend - p);
6707 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
6708 str_buf_cat(result, buf, strlen(buf));
6714 c = rb_enc_mbc_to_codepoint(p, pend, enc);
6716 if ((asciicompat || unicode_p) &&
6717 (c ==
'"'|| c ==
'\\' ||
6721 (cc = rb_enc_codepoint(p,pend,enc),
6722 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
6723 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6724 str_buf_cat2(result,
"\\");
6725 if (asciicompat || enc == resenc) {
6731 case '\n': cc =
'n';
break;
6732 case '\r': cc =
'r';
break;
6733 case '\t': cc =
't';
break;
6734 case '\f': cc =
'f';
break;
6735 case '\013': cc =
'v';
break;
6736 case '\010': cc =
'b';
break;
6737 case '\007': cc =
'a';
break;
6738 case 033: cc =
'e';
break;
6739 default: cc = 0;
break;
6742 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6745 str_buf_cat(result, buf, 2);
6757 if ((enc == resenc && rb_enc_isprint(c, enc) && c != 0x85) ||
6758 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
6762 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
6763 rb_str_buf_cat_escaped_char(result, c, unicode_p);
6768 if (p > prev) str_buf_cat(result, prev, p - prev);
6769 str_buf_cat2(result,
"\"");
6774#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
6794 int encidx = rb_enc_get_index(str);
6797 const char *p, *pend;
6800 int u8 = (encidx == rb_utf8_encindex());
6801 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
6804 if (!rb_enc_asciicompat(enc)) {
6806 len += strlen(enc->name);
6809 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
6812 unsigned char c = *p++;
6815 case '"':
case '\\':
6816 case '\n':
case '\r':
6817 case '\t':
case '\f':
6818 case '\013':
case '\010':
case '\007':
case '\033':
6823 clen = IS_EVSTR(p, pend) ? 2 : 1;
6831 if (u8 && c > 0x7F) {
6832 int n = rb_enc_precise_mbclen(p-1, pend, enc);
6834 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
6837 else if (cc <= 0xFFFFF)
6850 if (clen > LONG_MAX - len) {
6857 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
6858 q = RSTRING_PTR(result); qend = q + len + 1;
6862 unsigned char c = *p++;
6864 if (c ==
'"' || c ==
'\\') {
6868 else if (c ==
'#') {
6869 if (IS_EVSTR(p, pend)) *q++ =
'\\';
6872 else if (c ==
'\n') {
6876 else if (c ==
'\r') {
6880 else if (c ==
'\t') {
6884 else if (c ==
'\f') {
6888 else if (c ==
'\013') {
6892 else if (c ==
'\010') {
6896 else if (c ==
'\007') {
6900 else if (c ==
'\033') {
6910 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
6912 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
6915 snprintf(q, qend-q,
"u%04X", cc);
6917 snprintf(q, qend-q,
"u{%X}", cc);
6922 snprintf(q, qend-q,
"x%02X", c);
6928 if (!rb_enc_asciicompat(enc)) {
6929 snprintf(q, qend-q, nonascii_suffix, enc->name);
6930 encidx = rb_ascii8bit_encindex();
6933 rb_enc_associate_index(result, encidx);
6939unescape_ascii(
unsigned int c)
6963undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end,
rb_encoding **penc,
bool *utf8,
bool *binary)
6965 const char *s = *ss;
6969 unsigned char buf[6];
6987 *buf = unescape_ascii(*s);
6999 if (enc_utf8 == NULL) enc_utf8 = rb_utf8_encoding();
7000 if (*penc != enc_utf8) {
7002 rb_enc_associate(undumped, enc_utf8);
7019 if (hexlen == 0 || hexlen > 6) {
7025 if (0xd800 <= c && c <= 0xdfff) {
7028 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7038 if (0xd800 <= c && c <= 0xdfff) {
7041 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7069static VALUE rb_str_is_ascii_only_p(
VALUE str);
7087str_undump(
VALUE str)
7089 const char *s = RSTRING_PTR(str);
7090 const char *s_end = RSTRING_END(str);
7092 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7094 bool binary =
false;
7098 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7101 if (!str_null_check(str, &w)) {
7104 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7105 if (*s !=
'"')
goto invalid_format;
7123 static const char force_encoding_suffix[] =
".force_encoding(\"";
7124 static const char dup_suffix[] =
".dup";
7125 const char *encname;
7130 size =
sizeof(dup_suffix) - 1;
7131 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7133 size =
sizeof(force_encoding_suffix) - 1;
7134 if (s_end - s <= size)
goto invalid_format;
7135 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7143 s = memchr(s,
'"', s_end-s);
7145 if (!s)
goto invalid_format;
7146 if (s_end - s != 2)
goto invalid_format;
7147 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7149 encidx = rb_enc_find_index2(encname, (
long)size);
7153 rb_enc_associate_index(undumped, encidx);
7163 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7172 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7178 if (rb_enc_dummy_p(enc)) {
7185str_true_enc(
VALUE str)
7188 rb_str_check_dummy_enc(enc);
7192static OnigCaseFoldType
7193check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7199 if (argv[0]==sym_turkic) {
7200 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7202 if (argv[1]==sym_lithuanian)
7203 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7208 else if (argv[0]==sym_lithuanian) {
7209 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7211 if (argv[1]==sym_turkic)
7212 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7219 else if (argv[0]==sym_ascii)
7220 flags |= ONIGENC_CASE_ASCII_ONLY;
7221 else if (argv[0]==sym_fold) {
7222 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7223 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7235 if ((flags & ONIGENC_CASE_ASCII_ONLY) && (enc==rb_utf8_encoding() || rb_enc_mbmaxlen(enc) == 1))
7241#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7242#ifndef CASEMAP_DEBUG
7243# define CASEMAP_DEBUG 0
7251 OnigUChar space[FLEX_ARY_LEN];
7255mapping_buffer_free(
void *p)
7259 while (current_buffer) {
7260 previous_buffer = current_buffer;
7261 current_buffer = current_buffer->next;
7262 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7268 {0, mapping_buffer_free,}
7276 const OnigUChar *source_current, *source_end;
7277 int target_length = 0;
7278 VALUE buffer_anchor;
7281 size_t buffer_count = 0;
7282 int buffer_length_or_invalid;
7284 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7286 source_current = (OnigUChar*)RSTRING_PTR(source);
7287 source_end = (OnigUChar*)RSTRING_END(source);
7291 while (source_current < source_end) {
7293 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7294 if (CASEMAP_DEBUG) {
7295 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n", capa);
7298 *pre_buffer = current_buffer;
7299 pre_buffer = ¤t_buffer->next;
7300 current_buffer->next = NULL;
7301 current_buffer->capa = capa;
7302 buffer_length_or_invalid = enc->case_map(flags,
7303 &source_current, source_end,
7304 current_buffer->space,
7305 current_buffer->space+current_buffer->capa,
7307 if (buffer_length_or_invalid < 0) {
7308 current_buffer =
DATA_PTR(buffer_anchor);
7310 mapping_buffer_free(current_buffer);
7313 target_length += current_buffer->used = buffer_length_or_invalid;
7315 if (CASEMAP_DEBUG) {
7316 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7319 if (buffer_count==1) {
7320 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7323 char *target_current;
7326 target_current = RSTRING_PTR(target);
7327 current_buffer =
DATA_PTR(buffer_anchor);
7328 while (current_buffer) {
7329 memcpy(target_current, current_buffer->space, current_buffer->used);
7330 target_current += current_buffer->used;
7331 current_buffer = current_buffer->next;
7334 current_buffer =
DATA_PTR(buffer_anchor);
7336 mapping_buffer_free(current_buffer);
7341 str_enc_copy(target, source);
7350 const OnigUChar *source_current, *source_end;
7351 OnigUChar *target_current, *target_end;
7352 long old_length = RSTRING_LEN(source);
7353 int length_or_invalid;
7355 if (old_length == 0)
return Qnil;
7357 source_current = (OnigUChar*)RSTRING_PTR(source);
7358 source_end = (OnigUChar*)RSTRING_END(source);
7359 if (source == target) {
7360 target_current = (OnigUChar*)source_current;
7361 target_end = (OnigUChar*)source_end;
7364 target_current = (OnigUChar*)RSTRING_PTR(target);
7365 target_end = (OnigUChar*)RSTRING_END(target);
7368 length_or_invalid = onigenc_ascii_only_case_map(flags,
7369 &source_current, source_end,
7370 target_current, target_end, enc);
7371 if (length_or_invalid < 0)
7373 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7374 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7375 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7377 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7380 str_enc_copy(target, source);
7386upcase_single(
VALUE str)
7388 char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
7389 bool modified =
false;
7392 unsigned int c = *(
unsigned char*)s;
7394 if (
'a' <= c && c <=
'z') {
7395 *s =
'A' + (c -
'a');
7423rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7426 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7428 flags = check_case_options(argc, argv, flags);
7429 str_modify_keep_cr(str);
7430 enc = str_true_enc(str);
7431 if (case_option_single_p(flags, enc, str)) {
7432 if (upcase_single(str))
7433 flags |= ONIGENC_CASE_MODIFIED;
7435 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7436 rb_str_ascii_casemap(str, str, &flags, enc);
7438 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7440 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7462rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7465 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7468 flags = check_case_options(argc, argv, flags);
7469 enc = str_true_enc(str);
7470 if (case_option_single_p(flags, enc, str)) {
7471 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7472 str_enc_copy(ret, str);
7475 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7477 rb_str_ascii_casemap(str, ret, &flags, enc);
7480 ret = rb_str_casemap(str, &flags, enc);
7487downcase_single(
VALUE str)
7489 char *s = RSTRING_PTR(str), *send = RSTRING_END(str);
7490 bool modified =
false;
7493 unsigned int c = *(
unsigned char*)s;
7495 if (
'A' <= c && c <=
'Z') {
7496 *s =
'a' + (c -
'A');
7525rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
7528 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7530 flags = check_case_options(argc, argv, flags);
7531 str_modify_keep_cr(str);
7532 enc = str_true_enc(str);
7533 if (case_option_single_p(flags, enc, str)) {
7534 if (downcase_single(str))
7535 flags |= ONIGENC_CASE_MODIFIED;
7537 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7538 rb_str_ascii_casemap(str, str, &flags, enc);
7540 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7542 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7564rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
7567 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
7570 flags = check_case_options(argc, argv, flags);
7571 enc = str_true_enc(str);
7572 if (case_option_single_p(flags, enc, str)) {
7573 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
7574 str_enc_copy(ret, str);
7575 downcase_single(ret);
7577 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
7579 rb_str_ascii_casemap(str, ret, &flags, enc);
7582 ret = rb_str_casemap(str, &flags, enc);
7610rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
7613 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7615 flags = check_case_options(argc, argv, flags);
7616 str_modify_keep_cr(str);
7617 enc = str_true_enc(str);
7618 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
7619 if (flags&ONIGENC_CASE_ASCII_ONLY)
7620 rb_str_ascii_casemap(str, str, &flags, enc);
7622 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7624 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7648rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
7651 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
7654 flags = check_case_options(argc, argv, flags);
7655 enc = str_true_enc(str);
7656 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
7657 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7659 rb_str_ascii_casemap(str, ret, &flags, enc);
7662 ret = rb_str_casemap(str, &flags, enc);
7689rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
7692 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7694 flags = check_case_options(argc, argv, flags);
7695 str_modify_keep_cr(str);
7696 enc = str_true_enc(str);
7697 if (flags&ONIGENC_CASE_ASCII_ONLY)
7698 rb_str_ascii_casemap(str, str, &flags, enc);
7700 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7702 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7726rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
7729 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
7732 flags = check_case_options(argc, argv, flags);
7733 enc = str_true_enc(str);
7734 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
7735 if (flags&ONIGENC_CASE_ASCII_ONLY) {
7737 rb_str_ascii_casemap(str, ret, &flags, enc);
7740 ret = rb_str_casemap(str, &flags, enc);
7745typedef unsigned char *USTR;
7749 unsigned int now, max;
7761 if (t->p == t->pend)
return -1;
7762 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
7765 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7767 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
7769 if (t->p < t->pend) {
7770 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
7773 if (t->now < 0x80 && c < 0x80) {
7775 "invalid range \"%c-%c\" in string transliteration",
7790 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
7791 if (t->now == t->max) {
7796 if (t->now < t->max) {
7812 const unsigned int errc = -1;
7813 unsigned int trans[256];
7815 struct tr trsrc, trrepl;
7817 unsigned int c, c0, last = 0;
7818 int modify = 0, i, l;
7819 unsigned char *s, *send;
7821 int singlebyte = single_byte_optimizable(str);
7825#define CHECK_IF_ASCII(c) \
7826 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
7827 (cr = ENC_CODERANGE_VALID) : 0)
7831 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
7832 if (RSTRING_LEN(repl) == 0) {
7833 return rb_str_delete_bang(1, &src, str);
7837 e1 = rb_enc_check(str, src);
7838 e2 = rb_enc_check(str, repl);
7843 enc = rb_enc_check(src, repl);
7845 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
7846 if (RSTRING_LEN(src) > 1 &&
7847 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
7848 trsrc.p + l < trsrc.pend) {
7852 trrepl.p = RSTRING_PTR(repl);
7853 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
7854 trsrc.gen = trrepl.gen = 0;
7855 trsrc.now = trrepl.now = 0;
7856 trsrc.max = trrepl.max = 0;
7859 for (i=0; i<256; i++) {
7862 while ((c = trnext(&trsrc, enc)) != errc) {
7867 if (!hash) hash = rb_hash_new();
7871 while ((c = trnext(&trrepl, enc)) != errc)
7874 for (i=0; i<256; i++) {
7875 if (trans[i] != errc) {
7883 for (i=0; i<256; i++) {
7886 while ((c = trnext(&trsrc, enc)) != errc) {
7887 r = trnext(&trrepl, enc);
7888 if (r == errc) r = trrepl.now;
7891 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
7894 if (!hash) hash = rb_hash_new();
7902 str_modify_keep_cr(str);
7903 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)RSTRING_END(str);
7904 termlen = rb_enc_mbminlen(enc);
7907 long offset, max = RSTRING_LEN(str);
7908 unsigned int save = -1;
7909 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
7914 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
7915 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
7924 if (cflag) c = last;
7927 else if (cflag) c = errc;
7933 if (c != (
unsigned int)-1) {
7939 tlen = rb_enc_codelen(c, enc);
7945 if (enc != e1) may_modify = 1;
7947 if ((offset = t - buf) + tlen > max) {
7948 size_t MAYBE_UNUSED(old) = max + termlen;
7949 max = offset + tlen + (send - s);
7950 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
7953 rb_enc_mbcput(c, t, enc);
7954 if (may_modify && memcmp(s, t, tlen) != 0) {
7960 if (!STR_EMBED_P(str)) {
7961 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
7963 TERM_FILL((
char *)t, termlen);
7964 RSTRING(str)->as.heap.ptr = (
char *)buf;
7965 RSTRING(str)->as.heap.len = t - buf;
7966 STR_SET_NOEMBED(str);
7967 RSTRING(str)->as.heap.aux.capa = max;
7969 else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) {
7971 c = (
unsigned char)*s;
7972 if (trans[c] != errc) {
7989 long offset, max = (long)((send - s) * 1.2);
7990 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
7994 c0 = c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, e1);
7995 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8003 if (cflag) c = last;
8006 else if (cflag) c = errc;
8010 c = cflag ? last : errc;
8013 tlen = rb_enc_codelen(c, enc);
8018 if (enc != e1) may_modify = 1;
8020 if ((offset = t - buf) + tlen > max) {
8021 size_t MAYBE_UNUSED(old) = max + termlen;
8022 max = offset + tlen + (long)((send - s) * 1.2);
8023 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8027 rb_enc_mbcput(c, t, enc);
8028 if (may_modify && memcmp(s, t, tlen) != 0) {
8036 if (!STR_EMBED_P(str)) {
8037 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8039 TERM_FILL((
char *)t, termlen);
8040 RSTRING(str)->as.heap.ptr = (
char *)buf;
8041 RSTRING(str)->as.heap.len = t - buf;
8042 STR_SET_NOEMBED(str);
8043 RSTRING(str)->as.heap.aux.capa = max;
8049 rb_enc_associate(str, enc);
8068 return tr_trans(str, src, repl, 0);
8115 tr_trans(str, src, repl, 0);
8119#define TR_TABLE_MAX (UCHAR_MAX+1)
8120#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8122tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8125 const unsigned int errc = -1;
8126 char buf[TR_TABLE_MAX];
8129 VALUE table = 0, ptable = 0;
8130 int i, l, cflag = 0;
8132 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8133 tr.gen =
tr.now =
tr.max = 0;
8135 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8140 for (i=0; i<TR_TABLE_MAX; i++) {
8143 stable[TR_TABLE_MAX] = cflag;
8145 else if (stable[TR_TABLE_MAX] && !cflag) {
8146 stable[TR_TABLE_MAX] = 0;
8148 for (i=0; i<TR_TABLE_MAX; i++) {
8152 while ((c = trnext(&
tr, enc)) != errc) {
8153 if (c < TR_TABLE_MAX) {
8154 buf[(
unsigned char)c] = !cflag;
8159 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8162 table = ptable ? ptable : rb_hash_new();
8166 table = rb_hash_new();
8171 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8172 rb_hash_aset(table, key,
Qtrue);
8176 for (i=0; i<TR_TABLE_MAX; i++) {
8177 stable[i] = stable[i] && buf[i];
8179 if (!table && !cflag) {
8186tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8188 if (c < TR_TABLE_MAX) {
8189 return table[c] != 0;
8195 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8196 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8200 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8203 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8217rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8219 char squeez[TR_TABLE_SIZE];
8222 VALUE del = 0, nodel = 0;
8224 int i, ascompat, cr;
8226 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8228 for (i=0; i<argc; i++) {
8232 enc = rb_enc_check(str, s);
8233 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8236 str_modify_keep_cr(str);
8237 ascompat = rb_enc_asciicompat(enc);
8238 s = t = RSTRING_PTR(str);
8239 send = RSTRING_END(str);
8245 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8256 c = rb_enc_codepoint_len(s, send, &clen, enc);
8258 if (tr_find(c, squeez, del, nodel)) {
8262 if (t != s) rb_enc_mbcput(c, t, enc);
8269 TERM_FILL(t, TERM_LEN(str));
8270 STR_SET_LEN(str, t - RSTRING_PTR(str));
8273 if (modify)
return str;
8293rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8296 rb_str_delete_bang(argc, argv, str);
8310rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8312 char squeez[TR_TABLE_SIZE];
8314 VALUE del = 0, nodel = 0;
8315 unsigned char *s, *send, *t;
8317 int ascompat, singlebyte = single_byte_optimizable(str);
8321 enc = STR_ENC_GET(str);
8324 for (i=0; i<argc; i++) {
8328 enc = rb_enc_check(str, s);
8329 if (singlebyte && !single_byte_optimizable(s))
8331 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8335 str_modify_keep_cr(str);
8336 s = t = (
unsigned char *)RSTRING_PTR(str);
8337 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8338 send = (
unsigned char *)RSTRING_END(str);
8340 ascompat = rb_enc_asciicompat(enc);
8344 unsigned int c = *s++;
8345 if (c != save || (argc > 0 && !squeez[c])) {
8355 if (ascompat && (c = *s) < 0x80) {
8356 if (c != save || (argc > 0 && !squeez[c])) {
8362 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8364 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8365 if (t != s) rb_enc_mbcput(c, t, enc);
8374 TERM_FILL((
char *)t, TERM_LEN(str));
8375 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8376 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8380 if (modify)
return str;
8403rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8406 rb_str_squeeze_bang(argc, argv, str);
8424 return tr_trans(str, src, repl, 1);
8447 tr_trans(str, src, repl, 1);
8476rb_str_count(
int argc,
VALUE *argv,
VALUE str)
8478 char table[TR_TABLE_SIZE];
8480 VALUE del = 0, nodel = 0, tstr;
8490 enc = rb_enc_check(str, tstr);
8493 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
8494 (ptstr = RSTRING_PTR(tstr),
8495 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
8496 !is_broken_string(str)) {
8498 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
8500 s = RSTRING_PTR(str);
8501 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8502 send = RSTRING_END(str);
8504 if (*(
unsigned char*)s++ == c) n++;
8510 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
8511 for (i=1; i<argc; i++) {
8514 enc = rb_enc_check(str, tstr);
8515 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
8518 s = RSTRING_PTR(str);
8519 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
8520 send = RSTRING_END(str);
8521 ascompat = rb_enc_asciicompat(enc);
8525 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8533 c = rb_enc_codepoint_len(s, send, &clen, enc);
8534 if (tr_find(c, table, del, nodel)) {
8545rb_fs_check(
VALUE val)
8549 if (
NIL_P(val))
return 0;
8554static const char isspacetable[256] = {
8555 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8556 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8557 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8558 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8562 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8564 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8565 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8566 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8567 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8568 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8569 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8570 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
8573#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
8576split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
8578 if (empty_count >= 0 && len == 0) {
8579 return empty_count + 1;
8581 if (empty_count > 0) {
8585 rb_ary_push(result, str_new_empty_String(str));
8586 }
while (--empty_count > 0);
8590 rb_yield(str_new_empty_String(str));
8591 }
while (--empty_count > 0);
8596 rb_ary_push(result, str);
8605 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
8609literal_split_pattern(
VALUE spat, split_type_t default_type)
8617 return SPLIT_TYPE_CHARS;
8619 else if (rb_enc_asciicompat(enc)) {
8620 if (len == 1 && ptr[0] ==
' ') {
8621 return SPLIT_TYPE_AWK;
8626 if (rb_enc_ascget(ptr, ptr + len, &l, enc) ==
' ' && len == l) {
8627 return SPLIT_TYPE_AWK;
8630 return default_type;
8643rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
8648 split_type_t split_type;
8649 long beg, end, i = 0, empty_count = -1;
8654 if (rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
8656 if (lim <= 0) limit =
Qnil;
8657 else if (lim == 1) {
8658 if (RSTRING_LEN(str) == 0)
8669 if (
NIL_P(limit) && !lim) empty_count = 0;
8671 enc = STR_ENC_GET(str);
8672 split_type = SPLIT_TYPE_REGEXP;
8674 spat = get_pat_quoted(spat, 0);
8676 else if (
NIL_P(spat = rb_fs)) {
8677 split_type = SPLIT_TYPE_AWK;
8679 else if (!(spat = rb_fs_check(spat))) {
8685 if (split_type != SPLIT_TYPE_AWK) {
8689 tmp = RREGEXP_SRC(spat);
8690 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
8691 if (split_type == SPLIT_TYPE_AWK) {
8693 split_type = SPLIT_TYPE_STRING;
8698 mustnot_broken(spat);
8699 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
8707#define SPLIT_STR(beg, len) (empty_count = split_string(result, str, beg, len, empty_count))
8709 if (result) result = rb_ary_new();
8711 char *ptr = RSTRING_PTR(str);
8712 char *eptr = RSTRING_END(str);
8713 if (split_type == SPLIT_TYPE_AWK) {
8719 if (is_ascii_string(str)) {
8720 while (ptr < eptr) {
8721 c = (
unsigned char)*ptr++;
8723 if (ascii_isspace(c)) {
8729 if (!
NIL_P(limit) && lim <= i)
break;
8732 else if (ascii_isspace(c)) {
8733 SPLIT_STR(beg, end-beg);
8736 if (!
NIL_P(limit)) ++i;
8744 while (ptr < eptr) {
8747 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
8756 if (!
NIL_P(limit) && lim <= i)
break;
8760 SPLIT_STR(beg, end-beg);
8763 if (!
NIL_P(limit)) ++i;
8771 else if (split_type == SPLIT_TYPE_STRING) {
8772 char *str_start = ptr;
8773 char *substr_start = ptr;
8774 char *sptr = RSTRING_PTR(spat);
8775 long slen = RSTRING_LEN(spat);
8777 mustnot_broken(str);
8778 enc = rb_enc_check(str, spat);
8779 while (ptr < eptr &&
8780 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
8782 char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
8783 if (t != ptr + end) {
8787 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
8790 if (!
NIL_P(limit) && lim <= ++i)
break;
8792 beg = ptr - str_start;
8794 else if (split_type == SPLIT_TYPE_CHARS) {
8795 char *str_start = ptr;
8798 mustnot_broken(str);
8799 enc = rb_enc_get(str);
8800 while (ptr < eptr &&
8801 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
8802 SPLIT_STR(ptr - str_start, n);
8804 if (!
NIL_P(limit) && lim <= ++i)
break;
8806 beg = ptr - str_start;
8809 long len = RSTRING_LEN(str);
8817 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
8822 if (start == end && BEG(0) == END(0)) {
8827 else if (last_null == 1) {
8828 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
8835 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
8841 SPLIT_STR(beg, end-beg);
8842 beg = start = END(0);
8846 for (idx=1; idx < regs->num_regs; idx++) {
8847 if (BEG(idx) == -1)
continue;
8848 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
8850 if (!
NIL_P(limit) && lim <= ++i)
break;
8852 if (match) rb_match_unbusy(match);
8854 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
8855 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
8858 return result ? result : str;
8868 return rb_str_split_m(1, &sep, str);
8871#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
8877 rb_ary_push(ary, e);
8886#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
8889chomp_newline(
const char *p,
const char *e,
rb_encoding *enc)
8891 const char *prev = rb_enc_prev_char(p, e, e, enc);
8892 if (rb_enc_is_newline(prev, e, enc)) {
8894 prev = rb_enc_prev_char(p, e, e, enc);
8895 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
8907 RSTRING_LEN(rs) != 1 ||
8908 RSTRING_PTR(rs)[0] !=
'\n')) {
8914#define rb_rs get_rs()
8921 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
8922 long pos, len, rslen;
8925 if (rb_scan_args(argc, argv,
"01:", &rs, &opts) == 0)
8928 static ID keywords[1];
8930 keywords[0] = rb_intern_const(
"chomp");
8933 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
8937 if (!ENUM_ELEM(ary, str)) {
8945 if (!RSTRING_LEN(str))
goto end;
8947 ptr = subptr = RSTRING_PTR(str);
8948 pend = RSTRING_END(str);
8949 len = RSTRING_LEN(str);
8951 rslen = RSTRING_LEN(rs);
8954 enc = rb_enc_get(str);
8956 enc = rb_enc_check(str, rs);
8961 const char *eol = NULL;
8963 while (subend < pend) {
8964 long chomp_rslen = 0;
8966 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
8968 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
8969 if (rb_enc_is_newline(subend + n, pend, enc)) {
8970 if (eol == subend)
break;
8974 chomp_rslen = -rslen;
8978 if (!subptr) subptr = subend;
8982 }
while (subend < pend);
8984 if (rslen == 0) chomp_rslen = 0;
8986 subend - subptr + (chomp ? chomp_rslen : rslen));
8987 if (ENUM_ELEM(ary, line)) {
8988 str_mod_check(str, ptr, len);
8990 subptr = eol = NULL;
8995 rsptr = RSTRING_PTR(rs);
8996 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
8997 rb_enc_is_newline(rsptr, rsptr + RSTRING_LEN(rs), enc)) {
9005 rsptr = RSTRING_PTR(rs);
9006 rslen = RSTRING_LEN(rs);
9009 while (subptr < pend) {
9010 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9013 adjusted = rb_enc_right_char_head(subptr, hit, pend, enc);
9014 if (hit != adjusted) {
9018 subend = hit += rslen;
9021 subend = chomp_newline(subptr, subend, enc);
9028 if (ENUM_ELEM(ary, line)) {
9029 str_mod_check(str, ptr, len);
9034 if (subptr != pend) {
9037 pend = chomp_newline(subptr, pend, enc);
9039 else if (pend - subptr >= rslen &&
9040 memcmp(pend - rslen, rsptr, rslen) == 0) {
9045 ENUM_ELEM(ary, line);
9066rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9069 return rb_str_enumerate_lines(argc, argv, str, 0);
9082rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9084 VALUE ary = WANTARRAY(
"lines", 0);
9085 return rb_str_enumerate_lines(argc, argv, str, ary);
9099 for (i=0; i<RSTRING_LEN(str); i++) {
9100 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9118rb_str_each_byte(
VALUE str)
9121 return rb_str_enumerate_bytes(str, 0);
9133rb_str_bytes(
VALUE str)
9135 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9136 return rb_str_enumerate_bytes(str, ary);
9154 ptr = RSTRING_PTR(str);
9155 len = RSTRING_LEN(str);
9156 enc = rb_enc_get(str);
9159 for (i = 0; i < len; i += n) {
9160 n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
9165 for (i = 0; i < len; i += n) {
9166 n = rb_enc_mbclen(ptr + i, ptr + len, enc);
9187rb_str_each_char(
VALUE str)
9190 return rb_str_enumerate_chars(str, 0);
9202rb_str_chars(
VALUE str)
9205 return rb_str_enumerate_chars(str, ary);
9209rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9214 const char *ptr, *end;
9217 if (single_byte_optimizable(str))
9218 return rb_str_enumerate_bytes(str, ary);
9221 ptr = RSTRING_PTR(str);
9222 end = RSTRING_END(str);
9223 enc = STR_ENC_GET(str);
9226 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9247rb_str_each_codepoint(
VALUE str)
9250 return rb_str_enumerate_codepoints(str, 0);
9262rb_str_codepoints(
VALUE str)
9265 return rb_str_enumerate_codepoints(str, ary);
9271 int encidx = rb_enc_to_index(enc);
9272 regex_t *reg_grapheme_cluster = NULL;
9273 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9276 if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) {
9277 reg_grapheme_cluster = reg_grapheme_cluster_utf8;
9279 if (!reg_grapheme_cluster) {
9280 const OnigUChar source_ascii[] =
"\\X";
9282 const OnigUChar *source = source_ascii;
9283 size_t source_len =
sizeof(source_ascii) - 1;
9285#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9286#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9287#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9288#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9289#define CASE_UTF(e) \
9290 case ENCINDEX_UTF_##e: { \
9291 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9292 source = source_UTF_##e; \
9293 source_len = sizeof(source_UTF_##e); \
9296 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9303 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9304 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9306 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9307 onig_error_code_to_str(message, r, &einfo);
9308 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9310 if (encidx == rb_utf8_encindex()) {
9311 reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
9314 return reg_grapheme_cluster;
9320 size_t grapheme_cluster_count = 0;
9321 regex_t *reg_grapheme_cluster = NULL;
9323 const char *ptr, *end;
9325 if (!rb_enc_unicode_p(enc)) {
9329 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9330 ptr = RSTRING_PTR(str);
9331 end = RSTRING_END(str);
9334 OnigPosition len = onig_match(reg_grapheme_cluster,
9335 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9336 (
const OnigUChar *)ptr, NULL, 0);
9337 if (len <= 0)
break;
9338 grapheme_cluster_count++;
9342 return SIZET2NUM(grapheme_cluster_count);
9346rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9349 regex_t *reg_grapheme_cluster = NULL;
9351 const char *ptr0, *ptr, *end;
9353 if (!rb_enc_unicode_p(enc)) {
9354 return rb_str_enumerate_chars(str, ary);
9358 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9359 ptr0 = ptr = RSTRING_PTR(str);
9360 end = RSTRING_END(str);
9363 OnigPosition len = onig_match(reg_grapheme_cluster,
9364 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9365 (
const OnigUChar *)ptr, NULL, 0);
9366 if (len <= 0)
break;
9387rb_str_each_grapheme_cluster(
VALUE str)
9390 return rb_str_enumerate_grapheme_clusters(str, 0);
9402rb_str_grapheme_clusters(
VALUE str)
9405 return rb_str_enumerate_grapheme_clusters(str, ary);
9409chopped_length(
VALUE str)
9412 const char *p, *p2, *beg, *end;
9414 beg = RSTRING_PTR(str);
9415 end = beg + RSTRING_LEN(str);
9416 if (beg >= end)
return 0;
9417 p = rb_enc_prev_char(beg, end, end, enc);
9419 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
9420 p2 = rb_enc_prev_char(beg, p, end, enc);
9421 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
9437rb_str_chop_bang(
VALUE str)
9439 str_modify_keep_cr(str);
9440 if (RSTRING_LEN(str) > 0) {
9442 len = chopped_length(str);
9443 STR_SET_LEN(str, len);
9444 TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
9463rb_str_chop(
VALUE str)
9469smart_chomp(
VALUE str,
const char *e,
const char *p)
9472 if (rb_enc_mbminlen(enc) > 1) {
9473 const char *pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
9474 if (rb_enc_is_newline(pp, e, enc)) {
9477 pp = e - rb_enc_mbminlen(enc);
9479 pp = rb_enc_left_char_head(p, pp, e, enc);
9480 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9488 if (--e > p && *(e-1) ==
'\r') {
9505 char *pp, *e, *rsptr;
9507 char *
const p = RSTRING_PTR(str);
9508 long len = RSTRING_LEN(str);
9510 if (len == 0)
return 0;
9513 return smart_chomp(str, e, p);
9516 enc = rb_enc_get(str);
9519 if (rb_enc_mbminlen(enc) > 1) {
9521 pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
9522 if (!rb_enc_is_newline(pp, e, enc))
break;
9524 pp -= rb_enc_mbminlen(enc);
9526 pp = rb_enc_left_char_head(p, pp, e, enc);
9527 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
9534 while (e > p && *(e-1) ==
'\n') {
9536 if (e > p && *(e-1) ==
'\r')
9542 if (rslen > len)
return len;
9544 enc = rb_enc_get(rs);
9545 newline = rsptr[rslen-1];
9546 if (rslen == rb_enc_mbminlen(enc)) {
9548 if (newline ==
'\n')
9549 return smart_chomp(str, e, p);
9552 if (rb_enc_is_newline(rsptr, rsptr+rslen, enc))
9553 return smart_chomp(str, e, p);
9557 enc = rb_enc_check(str, rs);
9558 if (is_broken_string(rs)) {
9562 if (p[len-1] == newline &&
9564 memcmp(rsptr, pp, rslen) == 0)) {
9565 if (rb_enc_left_char_head(p, pp, e, enc) == pp)
9578chomp_rs(
int argc,
const VALUE *argv)
9580 rb_check_arity(argc, 0, 1);
9594 long olen = RSTRING_LEN(str);
9595 long len = chompped_length(str, rs);
9596 if (len >= olen)
return Qnil;
9597 str_modify_keep_cr(str);
9598 STR_SET_LEN(str, len);
9599 TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
9616rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
9619 str_modifiable(str);
9620 if (RSTRING_LEN(str) == 0)
return Qnil;
9621 rs = chomp_rs(argc, argv);
9623 return rb_str_chomp_string(str, rs);
9636rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
9638 VALUE rs = chomp_rs(argc, argv);
9646 const char *
const start = s;
9648 if (!s || s >= e)
return 0;
9651 if (single_byte_optimizable(str)) {
9652 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
9657 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
9677rb_str_lstrip_bang(
VALUE str)
9683 str_modify_keep_cr(str);
9684 enc = STR_ENC_GET(str);
9686 loffset = lstrip_offset(str, start, start+olen, enc);
9688 long len = olen-loffset;
9689 s = start + loffset;
9690 memmove(start, s, len);
9691 STR_SET_LEN(str, len);
9692 TERM_FILL(start+len, rb_enc_mbminlen(enc));
9715rb_str_lstrip(
VALUE str)
9720 loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str));
9721 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
9730 rb_str_check_dummy_enc(enc);
9734 if (!s || s >= e)
return 0;
9738 if (single_byte_optimizable(str)) {
9740 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
9745 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
9746 unsigned int c = rb_enc_codepoint(tp, e, enc);
9765rb_str_rstrip_bang(
VALUE str)
9771 str_modify_keep_cr(str);
9772 enc = STR_ENC_GET(str);
9774 roffset = rstrip_offset(str, start, start+olen, enc);
9776 long len = olen - roffset;
9778 STR_SET_LEN(str, len);
9779 TERM_FILL(start+len, rb_enc_mbminlen(enc));
9802rb_str_rstrip(
VALUE str)
9808 enc = STR_ENC_GET(str);
9810 roffset = rstrip_offset(str, start, start+olen, enc);
9812 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
9828rb_str_strip_bang(
VALUE str)
9831 long olen, loffset, roffset;
9834 str_modify_keep_cr(str);
9835 enc = STR_ENC_GET(str);
9837 loffset = lstrip_offset(str, start, start+olen, enc);
9838 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9840 if (loffset > 0 || roffset > 0) {
9841 long len = olen-roffset;
9844 memmove(start, start + loffset, len);
9846 STR_SET_LEN(str, len);
9847 TERM_FILL(start+len, rb_enc_mbminlen(enc));
9870rb_str_strip(
VALUE str)
9873 long olen, loffset, roffset;
9877 loffset = lstrip_offset(str, start, start+olen, enc);
9878 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
9880 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
9885scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
9887 VALUE result, match;
9890 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
9894 end = pos + RSTRING_LEN(pat);
9907 if (RSTRING_LEN(str) > end)
9908 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
9909 RSTRING_END(str), enc);
9916 if (!regs || regs->num_regs == 1) {
9921 for (i=1; i < regs->num_regs; i++) {
9926 rb_ary_push(result, s);
9979 long last = -1, prev = 0;
9980 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
9982 pat = get_pat_quoted(pat, 1);
9983 mustnot_broken(str);
9985 VALUE ary = rb_ary_new();
9987 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
9990 rb_ary_push(ary, result);
9992 if (last >= 0) rb_pat_search(pat, str, last, 1);
9997 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10001 str_mod_check(str, p, len);
10003 if (last >= 0) rb_pat_search(pat, str, last, 1);
10027rb_str_hex(
VALUE str)
10029 return rb_str_to_inum(str, 16, FALSE);
10054rb_str_oct(
VALUE str)
10056 return rb_str_to_inum(str, -8, FALSE);
10059#ifndef HAVE_CRYPT_R
10065} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10068crypt_mutex_initialize(
void)
10139# define CRYPT_END() ALLOCV_END(databuf)
10141 extern char *crypt(
const char *,
const char *);
10142# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10145 const char *s, *saltp;
10148 char salt_8bit_clean[3];
10152 mustnot_wchar(str);
10153 mustnot_wchar(salt);
10155 saltp = RSTRING_PTR(salt);
10156 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10161 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
10162 salt_8bit_clean[0] = saltp[0] & 0x7f;
10163 salt_8bit_clean[1] = saltp[1] & 0x7f;
10164 salt_8bit_clean[2] =
'\0';
10165 saltp = salt_8bit_clean;
10170# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10171 data->initialized = 0;
10173 res = crypt_r(s, saltp, data);
10175 crypt_mutex_initialize();
10177 res = crypt(s, saltp);
10203 c = rb_enc_codepoint(RSTRING_PTR(s), RSTRING_END(s), STR_ENC_GET(s));
10218 char *ptr, *p, *pend;
10221 unsigned long sum0 = 0;
10223 if (rb_check_arity(argc, 0, 1) && (bits =
NUM2INT(argv[0])) < 0) {
10226 ptr = p = RSTRING_PTR(str);
10227 len = RSTRING_LEN(str);
10232 sum = rb_funcall(sum,
'+', 1,
LONG2FIX(sum0));
10233 str_mod_check(str, ptr, len);
10236 sum0 += (
unsigned char)*p;
10242 sum = rb_funcall(sum,
'+', 1,
LONG2FIX(sum0));
10247 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10248 sum0 &= (((
unsigned long)1)<<bits)-1;
10256 sum = rb_funcall(sum,
'+', 1,
LONG2FIX(sum0));
10260 mod = rb_funcall(mod,
'-', 1,
INT2FIX(1));
10261 sum = rb_funcall(sum,
'&', 1, mod);
10268rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10272 long width, len, flen = 1, fclen = 1;
10275 const char *f =
" ";
10276 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10278 int singlebyte = 1, cr;
10281 rb_scan_args(argc, argv,
"11", &w, &pad);
10282 enc = STR_ENC_GET(str);
10283 termlen = rb_enc_mbminlen(enc);
10287 enc = rb_enc_check(str, pad);
10288 f = RSTRING_PTR(pad);
10289 flen = RSTRING_LEN(pad);
10290 fclen = str_strlen(pad, enc);
10291 singlebyte = single_byte_optimizable(pad);
10292 if (flen == 0 || fclen == 0) {
10296 len = str_strlen(str, enc);
10297 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10299 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10303 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10304 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10306 size = RSTRING_LEN(str);
10307 if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10308 (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10309 (len += llen2 + rlen2) >= LONG_MAX - size) {
10313 res = str_new0(
rb_cString, 0, len, termlen);
10314 p = RSTRING_PTR(res);
10316 memset(p, *f, llen);
10320 while (llen >= fclen) {
10326 memcpy(p, f, llen2);
10330 memcpy(p, RSTRING_PTR(str), size);
10333 memset(p, *f, rlen);
10337 while (rlen >= fclen) {
10343 memcpy(p, f, rlen2);
10347 TERM_FILL(p, termlen);
10348 STR_SET_LEN(res, p-RSTRING_PTR(res));
10349 rb_enc_associate(res, enc);
10371rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10373 return rb_str_justify(argc, argv, str,
'l');
10387rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10389 return rb_str_justify(argc, argv, str,
'r');
10404rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10406 return rb_str_justify(argc, argv, str,
'c');
10422 sep = get_pat_quoted(sep, 0);
10434 pos = rb_str_index(str, sep, 0);
10435 if (pos < 0)
goto failed;
10440 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
10443 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
10457 long pos = RSTRING_LEN(str);
10459 sep = get_pat_quoted(sep, 0);
10472 pos = rb_str_rindex(str, sep, pos);
10482 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
10484 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
10496rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
10500 for (i=0; i<argc; i++) {
10501 VALUE tmp = argv[i];
10503 if (rb_reg_start_with_p(tmp, str))
10508 rb_enc_check(str, tmp);
10509 if (RSTRING_LEN(str) < RSTRING_LEN(tmp))
continue;
10510 if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
10526rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
10532 for (i=0; i<argc; i++) {
10533 VALUE tmp = argv[i];
10536 enc = rb_enc_check(str, tmp);
10537 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
10538 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
10539 p = RSTRING_PTR(str);
10542 if (rb_enc_left_char_head(p, s, e, enc) != s)
10544 if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
10560deleted_prefix_length(
VALUE str,
VALUE prefix)
10562 char *strptr, *prefixptr;
10563 long olen, prefixlen;
10566 if (is_broken_string(prefix))
return 0;
10567 rb_enc_check(str, prefix);
10570 prefixlen = RSTRING_LEN(prefix);
10571 if (prefixlen <= 0)
return 0;
10572 olen = RSTRING_LEN(str);
10573 if (olen < prefixlen)
return 0;
10574 strptr = RSTRING_PTR(str);
10575 prefixptr = RSTRING_PTR(prefix);
10576 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
10591rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
10594 str_modify_keep_cr(str);
10596 prefixlen = deleted_prefix_length(str, prefix);
10597 if (prefixlen <= 0)
return Qnil;
10611rb_str_delete_prefix(
VALUE str,
VALUE prefix)
10615 prefixlen = deleted_prefix_length(str, prefix);
10616 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
10618 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
10631deleted_suffix_length(
VALUE str,
VALUE suffix)
10633 char *strptr, *suffixptr, *s;
10634 long olen, suffixlen;
10638 if (is_broken_string(suffix))
return 0;
10639 enc = rb_enc_check(str, suffix);
10642 suffixlen = RSTRING_LEN(suffix);
10643 if (suffixlen <= 0)
return 0;
10644 olen = RSTRING_LEN(str);
10645 if (olen < suffixlen)
return 0;
10646 strptr = RSTRING_PTR(str);
10647 suffixptr = RSTRING_PTR(suffix);
10648 s = strptr + olen - suffixlen;
10649 if (memcmp(s, suffixptr, suffixlen) != 0)
return 0;
10650 if (rb_enc_left_char_head(strptr, s, strptr + olen, enc) != s)
return 0;
10665rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
10667 long olen, suffixlen, len;
10668 str_modifiable(str);
10670 suffixlen = deleted_suffix_length(str, suffix);
10671 if (suffixlen <= 0)
return Qnil;
10673 olen = RSTRING_LEN(str);
10674 str_modify_keep_cr(str);
10675 len = olen - suffixlen;
10676 STR_SET_LEN(str, len);
10677 TERM_FILL(&RSTRING_PTR(str)[len], TERM_LEN(str));
10693rb_str_delete_suffix(
VALUE str,
VALUE suffix)
10697 suffixlen = deleted_suffix_length(str, suffix);
10698 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
10700 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
10715 val = rb_fs_check(val);
10718 "value of %"PRIsVALUE
" must be String or Regexp",
10722 rb_warn_deprecated(
"`$;'", NULL);
10739 str_modifiable(str);
10740 rb_enc_associate(str, rb_to_encoding(enc));
10757 if (
FL_TEST(str, STR_NOEMBED)) {
10761 str2 = str_alloc_embed(
rb_cString, RSTRING_EMBED_LEN(str) + TERM_LEN(str));
10763 str_replace_shared_without_enc(str2, str);
10765 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
10798rb_str_valid_encoding_p(
VALUE str)
10818rb_str_is_ascii_only_p(
VALUE str)
10828 static const char ellipsis[] =
"...";
10829 const long ellipsislen =
sizeof(ellipsis) - 1;
10831 const long blen = RSTRING_LEN(str);
10832 const char *
const p = RSTRING_PTR(str), *e = p + blen;
10833 VALUE estr, ret = 0;
10836 if (len * rb_enc_mbminlen(enc) >= blen ||
10837 (e =
rb_enc_nth(p, e, len, enc)) - p == blen) {
10840 else if (len <= ellipsislen ||
10841 !(e = rb_enc_step_back(p, e, e, len = ellipsislen, enc))) {
10842 if (rb_enc_asciicompat(enc)) {
10844 rb_enc_associate(ret, enc);
10851 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
10856 rb_enc_from_encoding(enc), 0,
Qnil);
10875 rb_enc_name(enc), rb_enc_name(e));
10894 if (enc == STR_ENC_GET(str)) {
10899 return enc_str_scrub(enc, str, repl, cr);
10907 const char *rep, *p, *e, *p1, *sp;
10920 if (!
NIL_P(repl)) {
10921 repl = str_compat_and_valid(repl, enc);
10924 if (rb_enc_dummy_p(enc)) {
10927 encidx = rb_enc_to_index(enc);
10929#define DEFAULT_REPLACE_CHAR(str) do { \
10930 static const char replace[sizeof(str)-1] = str; \
10931 rep = replace; replen = (int)sizeof(replace); \
10934 slen = RSTRING_LEN(str);
10935 p = RSTRING_PTR(str);
10936 e = RSTRING_END(str);
10940 if (rb_enc_asciicompat(enc)) {
10946 else if (!
NIL_P(repl)) {
10947 rep = RSTRING_PTR(repl);
10948 replen = RSTRING_LEN(repl);
10951 else if (encidx == rb_utf8_encindex()) {
10952 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
10956 DEFAULT_REPLACE_CHAR(
"?");
10961 p = search_nonascii(p, e);
10966 int ret = rb_enc_precise_mbclen(p, e, enc);
10979 long clen = rb_enc_mbmaxlen(enc);
10985 if (e - p < clen) clen = e - p;
10992 for (; clen > 1; clen--) {
10993 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11004 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11005 str_mod_check(str, sp, slen);
11006 repl = str_compat_and_valid(repl, enc);
11013 p = search_nonascii(p, e);
11039 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11040 str_mod_check(str, sp, slen);
11041 repl = str_compat_and_valid(repl, enc);
11050 long mbminlen = rb_enc_mbminlen(enc);
11054 else if (!
NIL_P(repl)) {
11055 rep = RSTRING_PTR(repl);
11056 replen = RSTRING_LEN(repl);
11058 else if (encidx == ENCINDEX_UTF_16BE) {
11059 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11061 else if (encidx == ENCINDEX_UTF_16LE) {
11062 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11064 else if (encidx == ENCINDEX_UTF_32BE) {
11065 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11067 else if (encidx == ENCINDEX_UTF_32LE) {
11068 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11071 DEFAULT_REPLACE_CHAR(
"?");
11075 int ret = rb_enc_precise_mbclen(p, e, enc);
11084 long clen = rb_enc_mbmaxlen(enc);
11088 if (e - p < clen) clen = e - p;
11089 if (clen <= mbminlen * 2) {
11094 for (; clen > mbminlen; clen-=mbminlen) {
11095 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11105 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11106 str_mod_check(str, sp, slen);
11107 repl = str_compat_and_valid(repl, enc);
11132 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11133 str_mod_check(str, sp, slen);
11134 repl = str_compat_and_valid(repl, enc);
11155 VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) :
Qnil;
11170str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11172 VALUE repl = argc ? (rb_check_arity(argc, 0, 1), argv[0]) :
Qnil;
11178static ID id_normalize;
11179static ID id_normalized_p;
11180static VALUE mUnicodeNormalize;
11183unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11185 static int UnicodeNormalizeRequired = 0;
11188 if (!UnicodeNormalizeRequired) {
11189 rb_require(
"unicode_normalize/normalize.rb");
11190 UnicodeNormalizeRequired = 1;
11193 if (rb_check_arity(argc, 0, 1)) argv2[1] = argv[0];
11194 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11231rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11233 return unicode_normalize_common(argc, argv, str, id_normalize);
11247rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11249 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11276rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11278 return unicode_normalize_common(argc, argv, str, id_normalized_p);
11413#define sym_equal rb_obj_equal
11416sym_printable(
const char *s,
const char *send,
rb_encoding *enc)
11420 int c = rb_enc_precise_mbclen(s, send, enc);
11424 c = rb_enc_mbc_to_codepoint(s, send, enc);
11425 if (!rb_enc_isprint(c, enc))
return FALSE;
11432rb_str_symname_p(
VALUE sym)
11437 rb_encoding *resenc = rb_default_internal_encoding();
11439 if (resenc == NULL) resenc = rb_default_external_encoding();
11440 enc = STR_ENC_GET(sym);
11441 ptr = RSTRING_PTR(sym);
11442 len = RSTRING_LEN(sym);
11443 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (
long)strlen(ptr) ||
11451rb_str_quote_unprintable(
VALUE str)
11459 resenc = rb_default_internal_encoding();
11460 if (resenc == NULL) resenc = rb_default_external_encoding();
11461 enc = STR_ENC_GET(str);
11462 ptr = RSTRING_PTR(str);
11463 len = RSTRING_LEN(str);
11464 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
11465 !sym_printable(ptr, ptr + len, enc)) {
11466 return rb_str_escape(str);
11471MJIT_FUNC_EXPORTED
VALUE
11472rb_id_quote_unprintable(
ID id)
11474 VALUE str = rb_id2str(
id);
11475 if (!rb_str_symname_p(str)) {
11476 return rb_str_escape(str);
11494sym_inspect(
VALUE sym)
11501 if (!rb_str_symname_p(str)) {
11503 len = RSTRING_LEN(str);
11505 dest = RSTRING_PTR(str);
11506 memmove(dest + 1, dest, len);
11511 str = rb_enc_str_new(0, len + 1, enc);
11512 dest = RSTRING_PTR(str);
11513 memcpy(dest + 1, ptr, len);
11538MJIT_FUNC_EXPORTED
VALUE
11539rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
11643 return rb_str_match(
rb_sym2str(sym), other);
11658sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
11660 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
11673sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
11675 return rb_str_match_m_p(argc, argv, sym);
11693 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
11707sym_length(
VALUE sym)
11721sym_empty(
VALUE sym)
11755sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
11771sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
11787sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
11801sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
11803 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
11816sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
11818 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
11830sym_encoding(
VALUE sym)
11836string_for_symbol(
VALUE name)
11855 name = string_for_symbol(name);
11865 name = string_for_symbol(name);
11889 return rb_fstring(str);
11896 return register_fstring(setup_fake_str(&fake_str,
ptr,
len, ENCINDEX_US_ASCII), TRUE);
11908 if (UNLIKELY(rb_enc_autoload_p(enc))) {
11909 rb_enc_autoload(enc);
11913 return register_fstring(rb_setup_fake_str(&fake_str,
ptr,
len, enc), TRUE);
11926 assert(rb_vm_fstring_table());
11927 st_foreach(rb_vm_fstring_table(), fstring_set_class_i,
rb_cString);
11984 sym_ascii =
ID2SYM(rb_intern_const(
"ascii"));
11985 sym_turkic =
ID2SYM(rb_intern_const(
"turkic"));
11986 sym_lithuanian =
ID2SYM(rb_intern_const(
"lithuanian"));
11987 sym_fold =
ID2SYM(rb_intern_const(
"fold"));
12080 id_normalize = rb_intern_const(
"normalize");
12081 id_normalized_p = rb_intern_const(
"normalized?");
12090 rb_gc_register_address(&rb_fs);
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define ISSPACE
Old name of rb_isspace.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define OBJ_FREEZE_RAW
Old name of RB_OBJ_FREEZE_RAW.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ISDIGIT
Old name of rb_isdigit.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define ISALPHA
Old name of rb_isalpha.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define ISASCII
Old name of rb_isascii.
#define TOLOWER
Old name of rb_tolower.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define ISPRINT
Old name of rb_isprint.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports always regardless of runtime -W flag.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
void rb_bug(const char *fmt,...)
Interpreter panic switch.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
void rb_fatal(const char *fmt,...)
Raises the unsung "fatal" exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eArgError
ArgumentError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_cObject
Documented in include/ruby/internal/globals.h.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_cSymbol
Sumbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
#define rb_check_frozen
Just another name of rb_check_frozen.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
VALUE rb_str_to_interned_str(VALUE str)
Identical to rb_interned_str(), except it takes a Ruby's string instead of C's.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "default external" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "default external" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
ID rb_to_id(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
ID rb_intern_str(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
@ RSTRING_EMBED_LEN_MAX
Max possible number of characters that can be embedded.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
union RString::@50 as
String's specific fields.
struct RString::@50::@51 heap
Strings that use separated memory region for contents use this pattern.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
struct RString::@50::@52 embed
Embedded contents.
char ary[RSTRING_EMBED_LEN_MAX+1]
When a string is short enough, it uses this area to store the contents themselves.
long len
Length of the string, not including terminating NUL character.
union RString::@50::@51::@53 aux
Auxiliary info.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t VALUE
Type that represents a Ruby object.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.