Ruby 3.2.2p53 (2023-03-30 revision e51014f9c05aa65cbf203442d37fef7c12390015)
regexec.c
1/**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33#ifdef RUBY
34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35#else
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37#endif
38
39#ifndef USE_TOKEN_THREADED_VM
40# ifdef __GNUC__
41# define USE_TOKEN_THREADED_VM 1
42# else
43# define USE_TOKEN_THREADED_VM 0
44# endif
45#endif
46
47#ifdef RUBY
48# define ENC_DUMMY_FLAG (1<<24)
49static inline int
50rb_enc_asciicompat(OnigEncoding enc)
51{
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53}
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59#endif /* RUBY */
60
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67static int
68is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
70{
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
79 }
80 else
81 return 1;
82 }
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
92 }
93 }
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96 }
97}
98#else /* USE_CRNL_AS_LINE_TERMINATOR */
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101#endif /* USE_CRNL_AS_LINE_TERMINATOR */
102
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
105
106static void
107history_tree_clear(OnigCaptureTreeNode* node)
108{
109 int i;
110
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
115 }
116 }
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
126 }
127}
128
129static void
130history_tree_free(OnigCaptureTreeNode* node)
131{
132 history_tree_clear(node);
133 xfree(node);
134}
135
136static void
137history_root_free(OnigRegion* r)
138{
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
142 }
143}
144
145static OnigCaptureTreeNode*
146history_node_new(void)
147{
148 OnigCaptureTreeNode* node;
149
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
158
159 return node;
160}
161
162static int
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164{
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
166
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
169
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
175 }
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
185 }
186 parent->childs = tmp;
187 }
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
190 }
191 parent->allocated = n;
192 }
193
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
197}
198
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
201{
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
204
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
207
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
215 }
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
221 }
222 }
223
224 return clone;
225}
226
227extern OnigCaptureTreeNode*
228onig_get_capture_tree(OnigRegion* region)
229{
230 return region->history_root;
231}
232#endif /* USE_CAPTURE_HISTORY */
233
234#ifdef USE_CACHE_MATCH_OPT
235
236/* count number of jump-like opcodes for allocation of cache memory. */
237static OnigPosition
238count_num_cache_opcode(regex_t* reg, long* num, long* table_size)
239{
240 UChar* p = reg->p;
241 UChar* pend = p + reg->used;
242 LengthType len;
243 MemNumType mem;
244 MemNumType current_mem = -1;
245 long current_mem_num = 0;
246 OnigEncoding enc = reg->enc;
247
248 *num = 0;
249 *table_size = 0;
250
251 while (p < pend) {
252 switch (*p++) {
253 case OP_FINISH:
254 case OP_END:
255 break;
256
257 case OP_EXACT1: p++; break;
258 case OP_EXACT2: p += 2; break;
259 case OP_EXACT3: p += 3; break;
260 case OP_EXACT4: p += 4; break;
261 case OP_EXACT5: p += 5; break;
262 case OP_EXACTN:
263 GET_LENGTH_INC(len, p); p += len; break;
264 case OP_EXACTMB2N1: p += 2; break;
265 case OP_EXACTMB2N2: p += 4; break;
266 case OP_EXACTMB2N3: p += 6; break;
267 case OP_EXACTMB2N:
268 GET_LENGTH_INC(len, p); p += len * 2; break;
269 case OP_EXACTMB3N:
270 GET_LENGTH_INC(len, p); p += len * 3; break;
271 case OP_EXACTMBN:
272 {
273 int mb_len;
274 GET_LENGTH_INC(mb_len, p);
275 GET_LENGTH_INC(len, p);
276 p += mb_len * len;
277 }
278 break;
279
280 case OP_EXACT1_IC:
281 len = enclen(enc, p, pend); p += len; break;
282 case OP_EXACTN_IC:
283 GET_LENGTH_INC(len, p); p += len; break;
284
285 case OP_CCLASS:
286 case OP_CCLASS_NOT:
287 p += SIZE_BITSET; break;
288 case OP_CCLASS_MB:
289 case OP_CCLASS_MB_NOT:
290 GET_LENGTH_INC(len, p); p += len; break;
291 case OP_CCLASS_MIX:
292 case OP_CCLASS_MIX_NOT:
293 p += SIZE_BITSET;
294 GET_LENGTH_INC(len, p);
295 p += len;
296 break;
297
298 case OP_ANYCHAR:
299 case OP_ANYCHAR_ML:
300 break;
301 case OP_ANYCHAR_STAR:
302 case OP_ANYCHAR_ML_STAR:
303 *num += 1; *table_size += 1; break;
304 case OP_ANYCHAR_STAR_PEEK_NEXT:
305 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
306 p++; *num += 1; *table_size += 1; break;
307
308 case OP_WORD:
309 case OP_NOT_WORD:
310 case OP_WORD_BOUND:
311 case OP_NOT_WORD_BOUND:
312 case OP_WORD_BEGIN:
313 case OP_WORD_END:
314 break;
315
316 case OP_ASCII_WORD:
317 case OP_NOT_ASCII_WORD:
318 case OP_ASCII_WORD_BOUND:
319 case OP_NOT_ASCII_WORD_BOUND:
320 case OP_ASCII_WORD_BEGIN:
321 case OP_ASCII_WORD_END:
322 break;
323
324 case OP_BEGIN_BUF:
325 case OP_END_BUF:
326 case OP_BEGIN_LINE:
327 case OP_END_LINE:
328 case OP_SEMI_END_BUF:
329 case OP_BEGIN_POSITION:
330 break;
331
332 case OP_BACKREF1:
333 case OP_BACKREF2:
334 case OP_BACKREFN:
335 case OP_BACKREFN_IC:
336 case OP_BACKREF_MULTI:
337 case OP_BACKREF_MULTI_IC:
338 case OP_BACKREF_WITH_LEVEL:
339 goto fail;
340
341 case OP_MEMORY_START:
342 case OP_MEMORY_START_PUSH:
343 case OP_MEMORY_END_PUSH:
344 case OP_MEMORY_END_PUSH_REC:
345 case OP_MEMORY_END:
346 case OP_MEMORY_END_REC:
347 p += SIZE_MEMNUM; break;
348
349 case OP_KEEP:
350 break;
351
352 case OP_FAIL:
353 break;
354 case OP_JUMP:
355 p += SIZE_RELADDR;
356 break;
357 case OP_PUSH:
358 p += SIZE_RELADDR;
359 *num += 1;
360 *table_size += 1;
361 break;
362 case OP_POP:
363 break;
364 case OP_PUSH_OR_JUMP_EXACT1:
365 case OP_PUSH_IF_PEEK_NEXT:
366 p += SIZE_RELADDR + 1; *num += 1; *table_size += 1; break;
367 case OP_REPEAT:
368 case OP_REPEAT_NG:
369 if (current_mem != -1) {
370 // A nested OP_REPEAT is not yet supported.
371 goto fail;
372 }
373 GET_MEMNUM_INC(mem, p);
374 p += SIZE_RELADDR;
375 if (reg->repeat_range[mem].lower == 0) {
376 *num += 1;
377 *table_size += 1;
378 }
379 reg->repeat_range[mem].base_num = *num;
380 current_mem = mem;
381 current_mem_num = *num;
382 break;
383 case OP_REPEAT_INC:
384 case OP_REPEAT_INC_NG:
385 GET_MEMNUM_INC(mem, p);
386 if (mem != current_mem) {
387 // A lone or invalid OP_REPEAT_INC is found.
388 goto fail;
389 }
390 {
391 long inner_num = *num - current_mem_num;
392 OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
393 repeat_range->inner_num = inner_num;
394 *num -= inner_num;
395 *num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
396 if (repeat_range->lower < repeat_range->upper) {
397 *table_size += 1;
398 }
399 current_mem = -1;
400 current_mem_num = 0;
401 }
402 break;
403 case OP_REPEAT_INC_SG:
404 case OP_REPEAT_INC_NG_SG:
405 // TODO: Support nested OP_REPEAT.
406 goto fail;
407 case OP_NULL_CHECK_START:
408 case OP_NULL_CHECK_END:
409 case OP_NULL_CHECK_END_MEMST:
410 case OP_NULL_CHECK_END_MEMST_PUSH:
411 p += SIZE_MEMNUM; break;
412
413 case OP_PUSH_POS:
414 case OP_POP_POS:
415 case OP_PUSH_POS_NOT:
416 case OP_FAIL_POS:
417 case OP_PUSH_STOP_BT:
418 case OP_POP_STOP_BT:
419 case OP_LOOK_BEHIND:
420 case OP_PUSH_LOOK_BEHIND_NOT:
421 case OP_FAIL_LOOK_BEHIND_NOT:
422 case OP_PUSH_ABSENT_POS:
423 case OP_ABSENT_END:
424 case OP_ABSENT:
425 goto fail;
426
427 case OP_CALL:
428 case OP_RETURN:
429 goto fail;
430
431 case OP_CONDITION:
432 goto fail;
433
434 case OP_STATE_CHECK_PUSH:
435 case OP_STATE_CHECK_PUSH_OR_JUMP:
436 case OP_STATE_CHECK:
437 case OP_STATE_CHECK_ANYCHAR_STAR:
438 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
439 goto fail;
440
441 case OP_SET_OPTION_PUSH:
442 case OP_SET_OPTION:
443 p += SIZE_OPTION;
444 break;
445
446 default:
447 goto bytecode_error;
448 }
449 }
450
451 return 0;
452
453fail:
454 *num = NUM_CACHE_OPCODE_FAIL;
455 return 0;
456
457bytecode_error:
458 return ONIGERR_UNDEFINED_BYTECODE;
459}
460
461static OnigPosition
462init_cache_index_table(regex_t* reg, OnigCacheIndex *table)
463{
464 UChar* pbegin;
465 UChar* p = reg->p;
466 UChar* pend = p + reg->used;
467 LengthType len;
468 MemNumType mem;
469 MemNumType current_mem = -1;
470 long num = 0;
471 long current_mem_num = 0;
472 OnigEncoding enc = reg->enc;
473
474 while (p < pend) {
475 pbegin = p;
476 switch (*p++) {
477 case OP_FINISH:
478 case OP_END:
479 break;
480
481 case OP_EXACT1: p++; break;
482 case OP_EXACT2: p += 2; break;
483 case OP_EXACT3: p += 3; break;
484 case OP_EXACT4: p += 4; break;
485 case OP_EXACT5: p += 5; break;
486 case OP_EXACTN:
487 GET_LENGTH_INC(len, p); p += len; break;
488 case OP_EXACTMB2N1: p += 2; break;
489 case OP_EXACTMB2N2: p += 4; break;
490 case OP_EXACTMB2N3: p += 6; break;
491 case OP_EXACTMB2N:
492 GET_LENGTH_INC(len, p); p += len * 2; break;
493 case OP_EXACTMB3N:
494 GET_LENGTH_INC(len, p); p += len * 3; break;
495 case OP_EXACTMBN:
496 {
497 int mb_len;
498 GET_LENGTH_INC(mb_len, p);
499 GET_LENGTH_INC(len, p);
500 p += mb_len * len;
501 }
502 break;
503
504 case OP_EXACT1_IC:
505 len = enclen(enc, p, pend); p += len; break;
506 case OP_EXACTN_IC:
507 GET_LENGTH_INC(len, p); p += len; break;
508
509 case OP_CCLASS:
510 case OP_CCLASS_NOT:
511 p += SIZE_BITSET; break;
512 case OP_CCLASS_MB:
513 case OP_CCLASS_MB_NOT:
514 GET_LENGTH_INC(len, p); p += len; break;
515 case OP_CCLASS_MIX:
516 case OP_CCLASS_MIX_NOT:
517 p += SIZE_BITSET;
518 GET_LENGTH_INC(len, p);
519 p += len;
520 break;
521
522 case OP_ANYCHAR:
523 case OP_ANYCHAR_ML:
524 break;
525 case OP_ANYCHAR_STAR:
526 case OP_ANYCHAR_ML_STAR:
527 table->addr = pbegin;
528 table->num = num - current_mem_num;
529 table->outer_repeat = current_mem;
530 num++;
531 table++;
532 break;
533 case OP_ANYCHAR_STAR_PEEK_NEXT:
534 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
535 p++;
536 table->addr = pbegin;
537 table->num = num - current_mem_num;
538 table->outer_repeat = current_mem;
539 num++;
540 table++;
541 break;
542
543 case OP_WORD:
544 case OP_NOT_WORD:
545 case OP_WORD_BOUND:
546 case OP_NOT_WORD_BOUND:
547 case OP_WORD_BEGIN:
548 case OP_WORD_END:
549 break;
550
551 case OP_ASCII_WORD:
552 case OP_NOT_ASCII_WORD:
553 case OP_ASCII_WORD_BOUND:
554 case OP_NOT_ASCII_WORD_BOUND:
555 case OP_ASCII_WORD_BEGIN:
556 case OP_ASCII_WORD_END:
557 break;
558
559 case OP_BEGIN_BUF:
560 case OP_END_BUF:
561 case OP_BEGIN_LINE:
562 case OP_END_LINE:
563 case OP_SEMI_END_BUF:
564 case OP_BEGIN_POSITION:
565 break;
566
567 case OP_BACKREF1:
568 case OP_BACKREF2:
569 case OP_BACKREFN:
570 case OP_BACKREFN_IC:
571 case OP_BACKREF_MULTI:
572 case OP_BACKREF_MULTI_IC:
573 case OP_BACKREF_WITH_LEVEL:
574 goto unexpected_bytecode_error;
575
576 case OP_MEMORY_START:
577 case OP_MEMORY_START_PUSH:
578 case OP_MEMORY_END_PUSH:
579 case OP_MEMORY_END_PUSH_REC:
580 case OP_MEMORY_END:
581 case OP_MEMORY_END_REC:
582 p += SIZE_MEMNUM; break;
583
584 case OP_KEEP:
585 break;
586
587 case OP_FAIL:
588 break;
589 case OP_JUMP:
590 p += SIZE_RELADDR;
591 break;
592 case OP_PUSH:
593 p += SIZE_RELADDR;
594 table->addr = pbegin;
595 table->num = num - current_mem_num;
596 table->outer_repeat = current_mem;
597 num++;
598 table++;
599 break;
600 case OP_POP:
601 break;
602 case OP_PUSH_OR_JUMP_EXACT1:
603 case OP_PUSH_IF_PEEK_NEXT:
604 p += SIZE_RELADDR + 1;
605 table->addr = pbegin;
606 table->num = num - current_mem_num;
607 table->outer_repeat = current_mem;
608 num++;
609 table++;
610 break;
611 case OP_REPEAT:
612 case OP_REPEAT_NG:
613 GET_MEMNUM_INC(mem, p);
614 p += SIZE_RELADDR;
615 if (reg->repeat_range[mem].lower == 0) {
616 table->addr = pbegin;
617 table->num = num - current_mem_num;
618 table->outer_repeat = -1;
619 num++;
620 table++;
621 }
622 current_mem = mem;
623 current_mem_num = num;
624 break;
625 case OP_REPEAT_INC:
626 case OP_REPEAT_INC_NG:
627 GET_MEMNUM_INC(mem, p);
628 {
629 long inner_num = num - current_mem_num;
630 OnigRepeatRange *repeat_range = &reg->repeat_range[mem];
631 if (repeat_range->lower < repeat_range->upper) {
632 table->addr = pbegin;
633 table->num = num - current_mem_num;
634 table->outer_repeat = mem;
635 table++;
636 }
637 num -= inner_num;
638 num += inner_num * repeat_range->lower + (inner_num + 1) * (repeat_range->upper == 0x7fffffff ? 1 : repeat_range->upper - repeat_range->lower);
639 current_mem = -1;
640 current_mem_num = 0;
641 }
642 break;
643 case OP_REPEAT_INC_SG:
644 case OP_REPEAT_INC_NG_SG:
645 // TODO: support OP_REPEAT opcodes.
646 goto unexpected_bytecode_error;
647 case OP_NULL_CHECK_START:
648 case OP_NULL_CHECK_END:
649 case OP_NULL_CHECK_END_MEMST:
650 case OP_NULL_CHECK_END_MEMST_PUSH:
651 p += SIZE_MEMNUM; break;
652
653 case OP_PUSH_POS:
654 case OP_POP_POS:
655 case OP_PUSH_POS_NOT:
656 case OP_FAIL_POS:
657 case OP_PUSH_STOP_BT:
658 case OP_POP_STOP_BT:
659 case OP_LOOK_BEHIND:
660 case OP_PUSH_LOOK_BEHIND_NOT:
661 case OP_FAIL_LOOK_BEHIND_NOT:
662 case OP_PUSH_ABSENT_POS:
663 case OP_ABSENT_END:
664 case OP_ABSENT:
665 goto unexpected_bytecode_error;
666
667 case OP_CALL:
668 case OP_RETURN:
669 goto unexpected_bytecode_error;
670
671 case OP_CONDITION:
672 goto unexpected_bytecode_error;
673
674 case OP_STATE_CHECK_PUSH:
675 case OP_STATE_CHECK_PUSH_OR_JUMP:
676 case OP_STATE_CHECK:
677 case OP_STATE_CHECK_ANYCHAR_STAR:
678 case OP_STATE_CHECK_ANYCHAR_ML_STAR:
679 goto unexpected_bytecode_error;
680
681 case OP_SET_OPTION_PUSH:
682 case OP_SET_OPTION:
683 p += SIZE_OPTION;
684 break;
685
686 default:
687 goto bytecode_error;
688 }
689 }
690
691 return 0;
692
693unexpected_bytecode_error:
694 return ONIGERR_UNEXPECTED_BYTECODE;
695
696bytecode_error:
697 return ONIGERR_UNDEFINED_BYTECODE;
698}
699#else /* USE_MATCH_CACHE */
700static OnigPosition
701count_num_cache_opcode(regex_t* reg, long* num, long* table_size)
702{
703 *num = NUM_CACHE_OPCODE_FAIL;
704 return 0;
705}
706#endif
707
708extern int
709onig_check_linear_time(OnigRegexType* reg)
710{
711 long num = 0, table_size = 0;
712 count_num_cache_opcode(reg, &num, &table_size);
713 return num != NUM_CACHE_OPCODE_FAIL;
714}
715
716extern void
717onig_region_clear(OnigRegion* region)
718{
719 int i;
720
721 for (i = 0; i < region->num_regs; i++) {
722 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
723 }
724#ifdef USE_CAPTURE_HISTORY
725 history_root_free(region);
726#endif
727}
728
729extern int
730onig_region_resize(OnigRegion* region, int n)
731{
732 region->num_regs = n;
733
734 if (n < ONIG_NREGION)
735 n = ONIG_NREGION;
736
737 if (region->allocated == 0) {
738 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
739 if (region->beg == 0)
740 return ONIGERR_MEMORY;
741
742 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
743 if (region->end == 0) {
744 xfree(region->beg);
745 return ONIGERR_MEMORY;
746 }
747
748 region->allocated = n;
749 }
750 else if (region->allocated < n) {
751 OnigPosition *tmp;
752
753 region->allocated = 0;
754 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
755 if (tmp == 0) {
756 xfree(region->beg);
757 xfree(region->end);
758 return ONIGERR_MEMORY;
759 }
760 region->beg = tmp;
761 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
762 if (tmp == 0) {
763 xfree(region->beg);
764 xfree(region->end);
765 return ONIGERR_MEMORY;
766 }
767 region->end = tmp;
768
769 region->allocated = n;
770 }
771
772 return 0;
773}
774
775static int
776onig_region_resize_clear(OnigRegion* region, int n)
777{
778 int r;
779
780 r = onig_region_resize(region, n);
781 if (r != 0) return r;
782 onig_region_clear(region);
783 return 0;
784}
785
786extern int
787onig_region_set(OnigRegion* region, int at, int beg, int end)
788{
789 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
790
791 if (at >= region->allocated) {
792 int r = onig_region_resize(region, at + 1);
793 if (r < 0) return r;
794 }
795
796 region->beg[at] = beg;
797 region->end[at] = end;
798 return 0;
799}
800
801extern void
802onig_region_init(OnigRegion* region)
803{
804 region->num_regs = 0;
805 region->allocated = 0;
806 region->beg = (OnigPosition* )0;
807 region->end = (OnigPosition* )0;
808#ifdef USE_CAPTURE_HISTORY
809 region->history_root = (OnigCaptureTreeNode* )0;
810#endif
811}
812
813extern OnigRegion*
814onig_region_new(void)
815{
816 OnigRegion* r;
817
818 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
819 if (r)
820 onig_region_init(r);
821 return r;
822}
823
824extern void
825onig_region_free(OnigRegion* r, int free_self)
826{
827 if (r) {
828 if (r->allocated > 0) {
829 if (r->beg) xfree(r->beg);
830 if (r->end) xfree(r->end);
831 r->allocated = 0;
832 }
833#ifdef USE_CAPTURE_HISTORY
834 history_root_free(r);
835#endif
836 if (free_self) xfree(r);
837 }
838}
839
840extern void
841onig_region_copy(OnigRegion* to, const OnigRegion* from)
842{
843#define RREGC_SIZE (sizeof(int) * from->num_regs)
844 int i, r;
845
846 if (to == from) return;
847
848 r = onig_region_resize(to, from->num_regs);
849 if (r) return;
850
851 for (i = 0; i < from->num_regs; i++) {
852 to->beg[i] = from->beg[i];
853 to->end[i] = from->end[i];
854 }
855 to->num_regs = from->num_regs;
856
857#ifdef USE_CAPTURE_HISTORY
858 history_root_free(to);
859
860 if (IS_NOT_NULL(from->history_root)) {
861 to->history_root = history_tree_clone(from->history_root);
862 }
863#endif
864}
865
866
868#define INVALID_STACK_INDEX -1
869
870/* stack type */
871/* used by normal-POP */
872#define STK_ALT 0x0001
873#define STK_LOOK_BEHIND_NOT 0x0002
874#define STK_POS_NOT 0x0003
875/* handled by normal-POP */
876#define STK_MEM_START 0x0100
877#define STK_MEM_END 0x8200
878#define STK_REPEAT_INC 0x0300
879#define STK_STATE_CHECK_MARK 0x1000
880/* avoided by normal-POP */
881#define STK_NULL_CHECK_START 0x3000
882#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
883#define STK_MEM_END_MARK 0x8400
884#define STK_POS 0x0500 /* used when POP-POS */
885#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
886#define STK_REPEAT 0x0700
887#define STK_CALL_FRAME 0x0800
888#define STK_RETURN 0x0900
889#define STK_VOID 0x0a00 /* for fill a blank */
890#define STK_ABSENT_POS 0x0b00 /* for absent */
891#define STK_ABSENT 0x0c00 /* absent inner loop marker */
892
893/* stack type check mask */
894#define STK_MASK_POP_USED 0x00ff
895#define STK_MASK_TO_VOID_TARGET 0x10ff
896#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
897
898#ifdef USE_CACHE_MATCH_OPT
899#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa) do {\
900 (msa).enable_cache_match_opt = 0;\
901 (msa).num_fail = 0;\
902 (msa).num_cache_opcode = NUM_CACHE_OPCODE_UNINIT;\
903 (msa).num_cache_table = 0;\
904 (msa).cache_index_table = (OnigCacheIndex *)0;\
905 (msa).match_cache = (uint8_t *)0;\
906} while(0)
907#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa) do {\
908 if ((msa).cache_index_table) xfree((msa).cache_index_table);\
909 if ((msa).match_cache) xfree((msa).match_cache);\
910} while(0)
911#else
912#define MATCH_ARG_INIT_CACHE_MATCH_OPT(msa)
913#define MATCH_ARG_FREE_CACHE_MATCH_OPT(msa)
914#endif
915
916#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
917# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
918 (msa).stack_p = (void* )0;\
919 (msa).options = (arg_option);\
920 (msa).region = (arg_region);\
921 (msa).start = (arg_start);\
922 (msa).gpos = (arg_gpos);\
923 (msa).best_len = ONIG_MISMATCH;\
924 (msa).counter = 0;\
925 (msa).end_time = 0;\
926 MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\
927} while(0)
928#else
929# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
930 (msa).stack_p = (void* )0;\
931 (msa).options = (arg_option);\
932 (msa).region = (arg_region);\
933 (msa).start = (arg_start);\
934 (msa).gpos = (arg_gpos);\
935 (msa).counter = 0;\
936 (msa).end_time = 0;\
937 MATCH_ARG_INIT_CACHE_MATCH_OPT(msa);\
938} while(0)
939#endif
940
941#ifdef USE_COMBINATION_EXPLOSION_CHECK
942
943# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
944
945# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
946 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
947 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
948 offset = ((offset) * (state_num)) >> 3;\
949 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
950 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
951 (msa).state_check_buff = (void* )xmalloc(size);\
952 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
953 }\
954 else \
955 (msa).state_check_buff = (void* )xalloca(size);\
956 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
957 (size_t )(size - (offset))); \
958 (msa).state_check_buff_size = size;\
959 }\
960 else {\
961 (msa).state_check_buff = (void* )0;\
962 (msa).state_check_buff_size = 0;\
963 }\
964 }\
965 else {\
966 (msa).state_check_buff = (void* )0;\
967 (msa).state_check_buff_size = 0;\
968 }\
969 } while(0)
970
971# define MATCH_ARG_FREE(msa) do {\
972 if ((msa).stack_p) xfree((msa).stack_p);\
973 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
974 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
975 }\
976 MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\
977} while(0)
978#else /* USE_COMBINATION_EXPLOSION_CHECK */
979# define MATCH_ARG_FREE(msa) do {\
980 if ((msa).stack_p) xfree((msa).stack_p);\
981 MATCH_ARG_FREE_CACHE_MATCH_OPT(msa);\
982} while (0)
983#endif /* USE_COMBINATION_EXPLOSION_CHECK */
984
985
986
987#define MAX_PTR_NUM 100
988
989#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
990 if (ptr_num > MAX_PTR_NUM) {\
991 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
992 heap_addr = alloc_addr;\
993 if (msa->stack_p) {\
994 stk_alloc = (OnigStackType* )(msa->stack_p);\
995 stk_base = stk_alloc;\
996 stk = stk_base;\
997 stk_end = stk_base + msa->stack_n;\
998 } else {\
999 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
1000 stk_base = stk_alloc;\
1001 stk = stk_base;\
1002 stk_end = stk_base + (stack_num);\
1003 }\
1004 } else if (msa->stack_p) {\
1005 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
1006 heap_addr = NULL;\
1007 stk_alloc = (OnigStackType* )(msa->stack_p);\
1008 stk_base = stk_alloc;\
1009 stk = stk_base;\
1010 stk_end = stk_base + msa->stack_n;\
1011 }\
1012 else {\
1013 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
1014 + sizeof(OnigStackType) * (stack_num));\
1015 heap_addr = NULL;\
1016 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
1017 stk_base = stk_alloc;\
1018 stk = stk_base;\
1019 stk_end = stk_base + (stack_num);\
1020 }\
1021} while(0)
1022
1023#define STACK_SAVE do{\
1024 if (stk_base != stk_alloc) {\
1025 msa->stack_p = stk_base;\
1026 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
1027 };\
1028} while(0)
1029
1030static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
1031
1032extern unsigned int
1033onig_get_match_stack_limit_size(void)
1034{
1035 return MatchStackLimitSize;
1036}
1037
1038extern int
1039onig_set_match_stack_limit_size(unsigned int size)
1040{
1041 MatchStackLimitSize = size;
1042 return 0;
1043}
1044
1045static int
1046stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
1047 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
1048{
1049 size_t n;
1050 OnigStackType *x, *stk_base, *stk_end, *stk;
1051
1052 stk_base = *arg_stk_base;
1053 stk_end = *arg_stk_end;
1054 stk = *arg_stk;
1055
1056 n = stk_end - stk_base;
1057 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
1058 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
1059 if (IS_NULL(x)) {
1060 STACK_SAVE;
1061 return ONIGERR_MEMORY;
1062 }
1063 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
1064 n *= 2;
1065 }
1066 else {
1067 unsigned int limit_size = MatchStackLimitSize;
1068 n *= 2;
1069 if (limit_size != 0 && n > limit_size) {
1070 if ((unsigned int )(stk_end - stk_base) == limit_size)
1071 return ONIGERR_MATCH_STACK_LIMIT_OVER;
1072 else
1073 n = limit_size;
1074 }
1075 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
1076 if (IS_NULL(x)) {
1077 STACK_SAVE;
1078 return ONIGERR_MEMORY;
1079 }
1080 }
1081 *arg_stk = x + (stk - stk_base);
1082 *arg_stk_base = x;
1083 *arg_stk_end = x + n;
1084 return 0;
1085}
1086
1087#define STACK_ENSURE(n) do {\
1088 if (stk_end - stk < (n)) {\
1089 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
1090 if (r != 0) {\
1091 STACK_SAVE;\
1092 if (xmalloc_base) xfree(xmalloc_base);\
1093 return r;\
1094 }\
1095 }\
1096} while(0)
1097
1098#define STACK_AT(index) (stk_base + (index))
1099#define GET_STACK_INDEX(stk) ((stk) - stk_base)
1100
1101#define STACK_PUSH_TYPE(stack_type) do {\
1102 STACK_ENSURE(1);\
1103 stk->type = (stack_type);\
1104 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1105 STACK_INC;\
1106} while(0)
1107
1108#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
1109
1110#ifdef USE_COMBINATION_EXPLOSION_CHECK
1111# define STATE_CHECK_POS(s,snum) \
1112 (((s) - str) * num_comb_exp_check + ((snum) - 1))
1113# define STATE_CHECK_VAL(v,snum) do {\
1114 if (state_check_buff != NULL) {\
1115 ptrdiff_t x = STATE_CHECK_POS(s,snum);\
1116 (v) = state_check_buff[x/8] & (1<<(x%8));\
1117 }\
1118 else (v) = 0;\
1119} while(0)
1120
1121
1122# define ELSE_IF_STATE_CHECK_MARK(stk) \
1123 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
1124 ptrdiff_t x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
1125 state_check_buff[x/8] |= (1<<(x%8)); \
1126 }
1127
1128# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1129 STACK_ENSURE(1);\
1130 stk->type = (stack_type);\
1131 stk->u.state.pcode = (pat);\
1132 stk->u.state.pstr = (s);\
1133 stk->u.state.pstr_prev = (sprev);\
1134 stk->u.state.state_check = 0;\
1135 stk->u.state.pkeep = (keep);\
1136 STACK_INC;\
1137} while(0)
1138
1139# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1140 stk->type = (stack_type);\
1141 stk->u.state.pcode = (pat);\
1142 stk->u.state.state_check = 0;\
1143 STACK_INC;\
1144} while(0)
1145
1146# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
1147 STACK_ENSURE(1);\
1148 stk->type = STK_ALT;\
1149 stk->u.state.pcode = (pat);\
1150 stk->u.state.pstr = (s);\
1151 stk->u.state.pstr_prev = (sprev);\
1152 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
1153 stk->u.state.pkeep = (keep);\
1154 STACK_INC;\
1155} while(0)
1156
1157# define STACK_PUSH_STATE_CHECK(s,snum) do {\
1158 if (state_check_buff != NULL) {\
1159 STACK_ENSURE(1);\
1160 stk->type = STK_STATE_CHECK_MARK;\
1161 stk->u.state.pstr = (s);\
1162 stk->u.state.state_check = (snum);\
1163 STACK_INC;\
1164 }\
1165} while(0)
1166
1167#else /* USE_COMBINATION_EXPLOSION_CHECK */
1168
1169# define ELSE_IF_STATE_CHECK_MARK(stk)
1170
1171# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
1172 STACK_ENSURE(1);\
1173 stk->type = (stack_type);\
1174 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1175 stk->u.state.pcode = (pat);\
1176 stk->u.state.pstr = (s);\
1177 stk->u.state.pstr_prev = (sprev);\
1178 stk->u.state.pkeep = (keep);\
1179 STACK_INC;\
1180} while(0)
1181
1182# define STACK_PUSH_ENSURED(stack_type,pat) do {\
1183 stk->type = (stack_type);\
1184 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1185 stk->u.state.pcode = (pat);\
1186 STACK_INC;\
1187} while(0)
1188#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1189
1190#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
1191#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
1192#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
1193#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
1194#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
1195#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
1196 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
1197
1198#ifdef USE_CACHE_MATCH_OPT
1199
1200#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache) do {\
1201 if (enable) {\
1202 long cache_index = find_cache_index_table((reg), (stk), (repeat_stk), (table), (num_cache_table), (p));\
1203 if (cache_index >= 0) {\
1204 long key = (num_cache_size) * (long)(pos) + cache_index;\
1205 long index = key >> 3;\
1206 long mask = 1 << (key & 7);\
1207 if ((match_cache)[index] & mask) {\
1208 goto fail;\
1209 }\
1210 (match_cache)[index] |= mask;\
1211 }\
1212 }\
1213} while (0)
1214
1215static long
1216find_cache_index_table(regex_t* reg, OnigStackType *stk, OnigStackIndex *repeat_stk, OnigCacheIndex* table, long num_cache_table, UChar* p)
1217{
1218 long l = 0, r = num_cache_table - 1, m = 0;
1219 OnigCacheIndex* item;
1220 OnigRepeatRange* range;
1221 OnigStackType *stkp;
1222 int count = 0;
1223 int is_inc = *p == OP_REPEAT_INC || *p == OP_REPEAT_INC_NG;
1224
1225 while (l <= r) {
1226 m = (l + r) / 2;
1227 if (table[m].addr == p) break;
1228 if (table[m].addr < p) l = m + 1;
1229 else r = m - 1;
1230 }
1231
1232 if (!(0 <= m && m < num_cache_table && table[m].addr == p)) {
1233 return -1;
1234 }
1235
1236 item = &table[m];
1237 if (item->outer_repeat == -1) {
1238 return item->num;
1239 }
1240
1241 range = &reg->repeat_range[item->outer_repeat];
1242
1243 stkp = &stk[repeat_stk[item->outer_repeat]];
1244 count = is_inc ? stkp->u.repeat.count - 1 : stkp->u.repeat.count;
1245
1246 if (count < range->lower) {
1247 return range->base_num + range->inner_num * count + item->num;
1248 }
1249
1250 if (range->upper == 0x7fffffff) {
1251 return range->base_num + range->inner_num * (range->lower - (is_inc ? 1 : 0)) + (is_inc ? 0 : 1) + item->num;
1252 }
1253
1254 return range->base_num + range->inner_num * (range->lower - 1) + (range->inner_num + 1) * (count - range->lower + 1) + item->num;
1255}
1256
1257static void
1258reset_match_cache(regex_t* reg, UChar* pbegin, UChar* pend, long pos, uint8_t* match_cache, OnigCacheIndex *table, long num_cache_size, long num_cache_table)
1259{
1260 long l = 0, r = num_cache_table - 1, m1 = 0, m2 = 0;
1261 int is_inc = *pend == OP_REPEAT_INC || *pend == OP_REPEAT_INC_NG;
1262 OnigCacheIndex *item1, *item2;
1263 long k1, k2, base;
1264
1265 while (l <= r) {
1266 m1 = (l + r) / 2;
1267 if (table[m1].addr == pbegin) break;
1268 if (table[m1].addr < pbegin) l = m1 + 1;
1269 else r = m1 - 1;
1270 }
1271
1272 l = 0, r = num_cache_table - 1;
1273 while (l <= r) {
1274 m2 = (l + r) / 2;
1275 if (table[m2].addr == pend) break;
1276 if (table[m2].addr < pend) l = m2 + 1;
1277 else r = m2 - 1;
1278 }
1279
1280 if (table[m1].addr < pbegin && m1 + 1 < num_cache_table) m1++;
1281 if (table[m2].addr > pend && m2 - 1 > 0) m2--;
1282
1283 item1 = &table[m1];
1284 item2 = &table[m2];
1285
1286 if (item1->outer_repeat < 0) k1 = item1->num;
1287 else k1 = reg->repeat_range[item1->outer_repeat].base_num + item1->num;
1288
1289 if (item2->outer_repeat < 0) k2 = item2->num;
1290 else {
1291 OnigRepeatRange *range = &reg->repeat_range[item2->outer_repeat];
1292 if (range->upper == 0x7fffffff) k2 = range->base_num + range->inner_num * range->lower + (is_inc ? 0 : 1) + item2->num;
1293 else k2 = range->base_num + range->inner_num * range->lower + (range->inner_num + 1) * (range->upper - range->lower - (is_inc ? 1 : 0)) + item2->num;
1294 }
1295
1296 base = pos * num_cache_size;
1297 k1 += base;
1298 k2 += base;
1299
1300 if ((k1 >> 3) == (k2 >> 3)) {
1301 match_cache[k1 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1)) | ((1 << (k1 & 7)) - 1);
1302 } else {
1303 long i = k1 >> 3;
1304 if (k1 & 7) {
1305 match_cache[k1 >> 3] &= (1 << ((k1 & 7) - 1)) - 1;
1306 i++;
1307 }
1308 if (i < (k2 >> 3)) {
1309 xmemset(&match_cache[i], 0, (k2 >> 3) - i);
1310 if (k2 & 7) {
1311 match_cache[k2 >> 3] &= (((1 << (8 - (k2 & 7) - 1)) - 1) << ((k2 & 7) + 1));
1312 }
1313 }
1314 }
1315}
1316
1317#else
1318#define DO_CACHE_MATCH_OPT(reg,stk,repeat_stk,enable,p,num_cache_table,num_cache_size,table,pos,match_cache)
1319#endif /* USE_CACHE_MATCH_OPT */
1320
1321#define STACK_PUSH_REPEAT(id, pat) do {\
1322 STACK_ENSURE(1);\
1323 stk->type = STK_REPEAT;\
1324 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1325 stk->u.repeat.num = (id);\
1326 stk->u.repeat.pcode = (pat);\
1327 stk->u.repeat.count = 0;\
1328 STACK_INC;\
1329} while(0)
1330
1331#define STACK_PUSH_REPEAT_INC(sindex) do {\
1332 STACK_ENSURE(1);\
1333 stk->type = STK_REPEAT_INC;\
1334 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1335 stk->u.repeat_inc.si = (sindex);\
1336 STACK_INC;\
1337} while(0)
1338
1339#define STACK_PUSH_MEM_START(mnum, s) do {\
1340 STACK_ENSURE(1);\
1341 stk->type = STK_MEM_START;\
1342 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1343 stk->u.mem.num = (mnum);\
1344 stk->u.mem.pstr = (s);\
1345 stk->u.mem.start = mem_start_stk[mnum];\
1346 stk->u.mem.end = mem_end_stk[mnum];\
1347 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
1348 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
1349 STACK_INC;\
1350} while(0)
1351
1352#define STACK_PUSH_MEM_END(mnum, s) do {\
1353 STACK_ENSURE(1);\
1354 stk->type = STK_MEM_END;\
1355 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1356 stk->u.mem.num = (mnum);\
1357 stk->u.mem.pstr = (s);\
1358 stk->u.mem.start = mem_start_stk[mnum];\
1359 stk->u.mem.end = mem_end_stk[mnum];\
1360 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
1361 STACK_INC;\
1362} while(0)
1363
1364#define STACK_PUSH_MEM_END_MARK(mnum) do {\
1365 STACK_ENSURE(1);\
1366 stk->type = STK_MEM_END_MARK;\
1367 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1368 stk->u.mem.num = (mnum);\
1369 STACK_INC;\
1370} while(0)
1371
1372#define STACK_GET_MEM_START(mnum, k) do {\
1373 int level = 0;\
1374 k = stk;\
1375 while (k > stk_base) {\
1376 k--;\
1377 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
1378 && k->u.mem.num == (mnum)) {\
1379 level++;\
1380 }\
1381 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1382 if (level == 0) break;\
1383 level--;\
1384 }\
1385 }\
1386} while(0)
1387
1388#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
1389 int level = 0;\
1390 while (k < stk) {\
1391 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
1392 if (level == 0) (start) = k->u.mem.pstr;\
1393 level++;\
1394 }\
1395 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
1396 level--;\
1397 if (level == 0) {\
1398 (end) = k->u.mem.pstr;\
1399 break;\
1400 }\
1401 }\
1402 k++;\
1403 }\
1404} while(0)
1405
1406#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
1407 STACK_ENSURE(1);\
1408 stk->type = STK_NULL_CHECK_START;\
1409 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1410 stk->u.null_check.num = (cnum);\
1411 stk->u.null_check.pstr = (s);\
1412 STACK_INC;\
1413} while(0)
1414
1415#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
1416 STACK_ENSURE(1);\
1417 stk->type = STK_NULL_CHECK_END;\
1418 stk->null_check = (OnigStackIndex)(stk - stk_base);\
1419 stk->u.null_check.num = (cnum);\
1420 STACK_INC;\
1421} while(0)
1422
1423#define STACK_PUSH_CALL_FRAME(pat) do {\
1424 STACK_ENSURE(1);\
1425 stk->type = STK_CALL_FRAME;\
1426 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1427 stk->u.call_frame.ret_addr = (pat);\
1428 STACK_INC;\
1429} while(0)
1430
1431#define STACK_PUSH_RETURN do {\
1432 STACK_ENSURE(1);\
1433 stk->type = STK_RETURN;\
1434 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1435 STACK_INC;\
1436} while(0)
1437
1438#define STACK_PUSH_ABSENT_POS(start, end) do {\
1439 STACK_ENSURE(1);\
1440 stk->type = STK_ABSENT_POS;\
1441 stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\
1442 stk->u.absent_pos.abs_pstr = (start);\
1443 stk->u.absent_pos.end_pstr = (end);\
1444 STACK_INC;\
1445} while(0)
1446
1447
1448#ifdef ONIG_DEBUG
1449# define STACK_BASE_CHECK(p, at) \
1450 if ((p) < stk_base) {\
1451 fprintf(stderr, "at %s\n", at);\
1452 goto stack_error;\
1453 }
1454#else
1455# define STACK_BASE_CHECK(p, at)
1456#endif
1457
1458#define STACK_POP_ONE do {\
1459 stk--;\
1460 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
1461} while(0)
1462
1463#define STACK_POP do {\
1464 switch (pop_level) {\
1465 case STACK_POP_LEVEL_FREE:\
1466 while (1) {\
1467 stk--;\
1468 STACK_BASE_CHECK(stk, "STACK_POP"); \
1469 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1470 ELSE_IF_STATE_CHECK_MARK(stk);\
1471 }\
1472 break;\
1473 case STACK_POP_LEVEL_MEM_START:\
1474 while (1) {\
1475 stk--;\
1476 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
1477 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1478 else if (stk->type == STK_MEM_START) {\
1479 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1480 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1481 }\
1482 ELSE_IF_STATE_CHECK_MARK(stk);\
1483 }\
1484 break;\
1485 default:\
1486 while (1) {\
1487 stk--;\
1488 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
1489 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
1490 else if (stk->type == STK_MEM_START) {\
1491 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1492 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1493 }\
1494 else if (stk->type == STK_REPEAT_INC) {\
1495 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1496 }\
1497 else if (stk->type == STK_MEM_END) {\
1498 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1499 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1500 }\
1501 ELSE_IF_STATE_CHECK_MARK(stk);\
1502 }\
1503 break;\
1504 }\
1505} while(0)
1506
1507#define STACK_POP_TIL_POS_NOT do {\
1508 while (1) {\
1509 stk--;\
1510 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
1511 if (stk->type == STK_POS_NOT) break;\
1512 else if (stk->type == STK_MEM_START) {\
1513 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1514 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1515 }\
1516 else if (stk->type == STK_REPEAT_INC) {\
1517 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1518 }\
1519 else if (stk->type == STK_MEM_END) {\
1520 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1521 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1522 }\
1523 ELSE_IF_STATE_CHECK_MARK(stk);\
1524 }\
1525} while(0)
1526
1527#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
1528 while (1) {\
1529 stk--;\
1530 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
1531 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
1532 else if (stk->type == STK_MEM_START) {\
1533 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1534 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1535 }\
1536 else if (stk->type == STK_REPEAT_INC) {\
1537 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1538 }\
1539 else if (stk->type == STK_MEM_END) {\
1540 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1541 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1542 }\
1543 ELSE_IF_STATE_CHECK_MARK(stk);\
1544 }\
1545} while(0)
1546
1547#define STACK_POP_TIL_ABSENT do {\
1548 while (1) {\
1549 stk--;\
1550 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
1551 if (stk->type == STK_ABSENT) break;\
1552 else if (stk->type == STK_MEM_START) {\
1553 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1554 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1555 }\
1556 else if (stk->type == STK_REPEAT_INC) {\
1557 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
1558 }\
1559 else if (stk->type == STK_MEM_END) {\
1560 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
1561 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
1562 }\
1563 ELSE_IF_STATE_CHECK_MARK(stk);\
1564 }\
1565} while(0)
1566
1567#define STACK_POP_ABSENT_POS(start, end) do {\
1568 stk--;\
1569 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
1570 (start) = stk->u.absent_pos.abs_pstr;\
1571 (end) = stk->u.absent_pos.end_pstr;\
1572} while(0)
1573
1574#define STACK_POS_END(k) do {\
1575 k = stk;\
1576 while (1) {\
1577 k--;\
1578 STACK_BASE_CHECK(k, "STACK_POS_END"); \
1579 if (IS_TO_VOID_TARGET(k)) {\
1580 k->type = STK_VOID;\
1581 }\
1582 else if (k->type == STK_POS) {\
1583 k->type = STK_VOID;\
1584 break;\
1585 }\
1586 }\
1587} while(0)
1588
1589#define STACK_STOP_BT_END do {\
1590 OnigStackType *k = stk;\
1591 while (1) {\
1592 k--;\
1593 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
1594 if (IS_TO_VOID_TARGET(k)) {\
1595 k->type = STK_VOID;\
1596 }\
1597 else if (k->type == STK_STOP_BT) {\
1598 k->type = STK_VOID;\
1599 break;\
1600 }\
1601 }\
1602} while(0)
1603
1604#define STACK_NULL_CHECK(isnull,id,s) do {\
1605 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1606 while (1) {\
1607 k--;\
1608 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
1609 if (k->type == STK_NULL_CHECK_START) {\
1610 if (k->u.null_check.num == (id)) {\
1611 (isnull) = (k->u.null_check.pstr == (s));\
1612 break;\
1613 }\
1614 }\
1615 }\
1616} while(0)
1617
1618#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
1619 int level = 0;\
1620 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1621 while (1) {\
1622 k--;\
1623 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
1624 if (k->type == STK_NULL_CHECK_START) {\
1625 if (k->u.null_check.num == (id)) {\
1626 if (level == 0) {\
1627 (isnull) = (k->u.null_check.pstr == (s));\
1628 break;\
1629 }\
1630 else level--;\
1631 }\
1632 }\
1633 else if (k->type == STK_NULL_CHECK_END) {\
1634 level++;\
1635 }\
1636 }\
1637} while(0)
1638
1639#define STACK_NULL_CHECK_MEMST(isnull,ischange,id,s,reg) do {\
1640 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1641 while (1) {\
1642 k--;\
1643 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
1644 if (k->type == STK_NULL_CHECK_START) {\
1645 if (k->u.null_check.num == (id)) {\
1646 if (k->u.null_check.pstr != (s)) {\
1647 (isnull) = 0;\
1648 break;\
1649 }\
1650 else {\
1651 UChar* endp;\
1652 (isnull) = 1;\
1653 while (k < stk) {\
1654 if (k->type == STK_MEM_START) {\
1655 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1656 (isnull) = 0; (ischange) = 1; break;\
1657 }\
1658 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1659 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1660 else\
1661 endp = (UChar* )k->u.mem.end;\
1662 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1663 (isnull) = 0; (ischange) = 1; break;\
1664 }\
1665 else if (endp != s) {\
1666 (isnull) = -1; /* empty, but position changed */ \
1667 }\
1668 }\
1669 k++;\
1670 }\
1671 break;\
1672 }\
1673 }\
1674 }\
1675 }\
1676} while(0)
1677
1678#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1679 int level = 0;\
1680 OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\
1681 while (1) {\
1682 k--;\
1683 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1684 if (k->type == STK_NULL_CHECK_START) {\
1685 if (k->u.null_check.num == (id)) {\
1686 if (level == 0) {\
1687 if (k->u.null_check.pstr != (s)) {\
1688 (isnull) = 0;\
1689 break;\
1690 }\
1691 else {\
1692 UChar* endp;\
1693 (isnull) = 1;\
1694 while (k < stk) {\
1695 if (k->type == STK_MEM_START) {\
1696 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1697 (isnull) = 0; break;\
1698 }\
1699 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1700 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1701 else\
1702 endp = (UChar* )k->u.mem.end;\
1703 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1704 (isnull) = 0; break;\
1705 }\
1706 else if (endp != s) {\
1707 (isnull) = -1; /* empty, but position changed */ \
1708 }\
1709 }\
1710 k++;\
1711 }\
1712 break;\
1713 }\
1714 }\
1715 else {\
1716 level--;\
1717 }\
1718 }\
1719 }\
1720 else if (k->type == STK_NULL_CHECK_END) {\
1721 if (k->u.null_check.num == (id)) level++;\
1722 }\
1723 }\
1724} while(0)
1725
1726#define STACK_GET_REPEAT(id, k) do {\
1727 int level = 0;\
1728 k = stk;\
1729 while (1) {\
1730 k--;\
1731 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1732 if (k->type == STK_REPEAT) {\
1733 if (level == 0) {\
1734 if (k->u.repeat.num == (id)) {\
1735 break;\
1736 }\
1737 }\
1738 }\
1739 else if (k->type == STK_CALL_FRAME) level--;\
1740 else if (k->type == STK_RETURN) level++;\
1741 }\
1742} while(0)
1743
1744#define STACK_RETURN(addr) do {\
1745 int level = 0;\
1746 OnigStackType* k = stk;\
1747 while (1) {\
1748 k--;\
1749 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1750 if (k->type == STK_CALL_FRAME) {\
1751 if (level == 0) {\
1752 (addr) = k->u.call_frame.ret_addr;\
1753 break;\
1754 }\
1755 else level--;\
1756 }\
1757 else if (k->type == STK_RETURN)\
1758 level++;\
1759 }\
1760} while(0)
1761
1762
1763#define STRING_CMP(s1,s2,len) do {\
1764 while (len-- > 0) {\
1765 if (*s1++ != *s2++) goto fail;\
1766 }\
1767} while(0)
1768
1769#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1770 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1771 goto fail; \
1772} while(0)
1773
1774static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1775 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1776{
1777 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1778 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1779 UChar *p1, *p2, *end1, *s2;
1780 int len1, len2;
1781
1782 s2 = *ps2;
1783 end1 = s1 + mblen;
1784 while (s1 < end1) {
1785 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1786 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1787 if (len1 != len2) return 0;
1788 p1 = buf1;
1789 p2 = buf2;
1790 while (len1-- > 0) {
1791 if (*p1 != *p2) return 0;
1792 p1++;
1793 p2++;
1794 }
1795 }
1796
1797 *ps2 = s2;
1798 return 1;
1799}
1800
1801#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1802 is_fail = 0;\
1803 while (len-- > 0) {\
1804 if (*s1++ != *s2++) {\
1805 is_fail = 1; break;\
1806 }\
1807 }\
1808} while(0)
1809
1810#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1811 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1812 is_fail = 1; \
1813 else \
1814 is_fail = 0; \
1815} while(0)
1816
1817
1818#define IS_EMPTY_STR (str == end)
1819#define ON_STR_BEGIN(s) ((s) == str)
1820#define ON_STR_END(s) ((s) == end)
1821#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1822# define DATA_ENSURE_CHECK1 (s < right_range)
1823# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1824# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1825# define DATA_ENSURE_CONTINUE(n) if (s + (n) > right_range) continue
1826# define ABSENT_END_POS right_range
1827#else
1828# define DATA_ENSURE_CHECK1 (s < end)
1829# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1830# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1831# define DATA_ENSURE_CONTINUE(n) if (s + (n) > end) continue
1832# define ABSENT_END_POS end
1833#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1834
1835
1836#ifdef USE_CAPTURE_HISTORY
1837static int
1838make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1839 OnigStackType* stk_top, UChar* str, regex_t* reg)
1840{
1841 int n, r;
1842 OnigCaptureTreeNode* child;
1843 OnigStackType* k = *kp;
1844
1845 while (k < stk_top) {
1846 if (k->type == STK_MEM_START) {
1847 n = k->u.mem.num;
1848 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1849 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1850 child = history_node_new();
1851 CHECK_NULL_RETURN_MEMERR(child);
1852 child->group = n;
1853 child->beg = k->u.mem.pstr - str;
1854 r = history_tree_add_child(node, child);
1855 if (r != 0) {
1856 history_tree_free(child);
1857 return r;
1858 }
1859 *kp = (k + 1);
1860 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1861 if (r != 0) return r;
1862
1863 k = *kp;
1864 child->end = k->u.mem.pstr - str;
1865 }
1866 }
1867 else if (k->type == STK_MEM_END) {
1868 if (k->u.mem.num == node->group) {
1869 node->end = k->u.mem.pstr - str;
1870 *kp = k;
1871 return 0;
1872 }
1873 }
1874 k++;
1875 }
1876
1877 return 1; /* 1: root node ending. */
1878}
1879#endif /* USE_CAPTURE_HISTORY */
1880
1881#ifdef USE_BACKREF_WITH_LEVEL
1882static int
1883mem_is_in_memp(int mem, int num, UChar* memp)
1884{
1885 int i;
1886 MemNumType m;
1887
1888 for (i = 0; i < num; i++) {
1889 GET_MEMNUM_INC(m, memp);
1890 if (mem == (int )m) return 1;
1891 }
1892 return 0;
1893}
1894
1895static int backref_match_at_nested_level(regex_t* reg,
1896 OnigStackType* top, OnigStackType* stk_base,
1897 int ignore_case, int case_fold_flag,
1898 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1899{
1900 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1901 int level;
1902 OnigStackType* k;
1903
1904 level = 0;
1905 k = top;
1906 k--;
1907 while (k >= stk_base) {
1908 if (k->type == STK_CALL_FRAME) {
1909 level--;
1910 }
1911 else if (k->type == STK_RETURN) {
1912 level++;
1913 }
1914 else if (level == nest) {
1915 if (k->type == STK_MEM_START) {
1916 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1917 pstart = k->u.mem.pstr;
1918 if (pend != NULL_UCHARP) {
1919 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1920 p = pstart;
1921 ss = *s;
1922
1923 if (ignore_case != 0) {
1924 if (string_cmp_ic(reg->enc, case_fold_flag,
1925 pstart, &ss, pend - pstart, send) == 0)
1926 return 0; /* or goto next_mem; */
1927 }
1928 else {
1929 while (p < pend) {
1930 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1931 }
1932 }
1933
1934 *s = ss;
1935 return 1;
1936 }
1937 }
1938 }
1939 else if (k->type == STK_MEM_END) {
1940 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1941 pend = k->u.mem.pstr;
1942 }
1943 }
1944 }
1945 k--;
1946 }
1947
1948 return 0;
1949}
1950#endif /* USE_BACKREF_WITH_LEVEL */
1951
1952
1953#ifdef ONIG_DEBUG_STATISTICS
1954
1955# ifdef _WIN32
1956# include <windows.h>
1957static LARGE_INTEGER ts, te, freq;
1958# define GETTIME(t) QueryPerformanceCounter(&(t))
1959# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1960 * 1000000 / freq.QuadPart)
1961# else /* _WIN32 */
1962
1963# define USE_TIMEOFDAY
1964
1965# ifdef USE_TIMEOFDAY
1966# ifdef HAVE_SYS_TIME_H
1967# include <sys/time.h>
1968# endif
1969# ifdef HAVE_UNISTD_H
1970# include <unistd.h>
1971# endif
1972static struct timeval ts, te;
1973# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1974# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1975 (((te).tv_sec - (ts).tv_sec)*1000000))
1976# else /* USE_TIMEOFDAY */
1977# ifdef HAVE_SYS_TIMES_H
1978# include <sys/times.h>
1979# endif
1980static struct tms ts, te;
1981# define GETTIME(t) times(&(t))
1982# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1983# endif /* USE_TIMEOFDAY */
1984
1985# endif /* _WIN32 */
1986
1987static int OpCounter[256];
1988static int OpPrevCounter[256];
1989static unsigned long OpTime[256];
1990static int OpCurr = OP_FINISH;
1991static int OpPrevTarget = OP_FAIL;
1992static int MaxStackDepth = 0;
1993
1994# define MOP_IN(opcode) do {\
1995 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1996 OpCurr = opcode;\
1997 OpCounter[opcode]++;\
1998 GETTIME(ts);\
1999} while(0)
2000
2001# define MOP_OUT do {\
2002 GETTIME(te);\
2003 OpTime[OpCurr] += TIMEDIFF(te, ts);\
2004} while(0)
2005
2006extern void
2007onig_statistics_init(void)
2008{
2009 int i;
2010 for (i = 0; i < 256; i++) {
2011 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
2012 }
2013 MaxStackDepth = 0;
2014# ifdef _WIN32
2015 QueryPerformanceFrequency(&freq);
2016# endif
2017}
2018
2019extern void
2020onig_print_statistics(FILE* f)
2021{
2022 int i;
2023 fprintf(f, " count prev time\n");
2024 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
2025 fprintf(f, "%8d: %8d: %10lu: %s\n",
2026 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
2027 }
2028 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
2029}
2030
2031# define STACK_INC do {\
2032 stk++;\
2033 if (stk - stk_base > MaxStackDepth) \
2034 MaxStackDepth = stk - stk_base;\
2035} while(0)
2036
2037#else /* ONIG_DEBUG_STATISTICS */
2038# define STACK_INC stk++
2039
2040# define MOP_IN(opcode)
2041# define MOP_OUT
2042#endif /* ONIG_DEBUG_STATISTICS */
2043
2044
2045#ifdef ONIG_DEBUG_MATCH
2046static char *
2047stack_type_str(int stack_type)
2048{
2049 switch (stack_type) {
2050 case STK_ALT: return "Alt ";
2051 case STK_LOOK_BEHIND_NOT: return "LBNot ";
2052 case STK_POS_NOT: return "PosNot";
2053 case STK_MEM_START: return "MemS ";
2054 case STK_MEM_END: return "MemE ";
2055 case STK_REPEAT_INC: return "RepInc";
2056 case STK_STATE_CHECK_MARK: return "StChMk";
2057 case STK_NULL_CHECK_START: return "NulChS";
2058 case STK_NULL_CHECK_END: return "NulChE";
2059 case STK_MEM_END_MARK: return "MemEMk";
2060 case STK_POS: return "Pos ";
2061 case STK_STOP_BT: return "StopBt";
2062 case STK_REPEAT: return "Rep ";
2063 case STK_CALL_FRAME: return "Call ";
2064 case STK_RETURN: return "Ret ";
2065 case STK_VOID: return "Void ";
2066 case STK_ABSENT_POS: return "AbsPos";
2067 case STK_ABSENT: return "Absent";
2068 default: return " ";
2069 }
2070}
2071#endif
2072
2073/* match data(str - end) from position (sstart). */
2074/* if sstart == str then set sprev to NULL. */
2075static OnigPosition
2076match_at(regex_t* reg, const UChar* str, const UChar* end,
2077#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
2078 const UChar* right_range,
2079#endif
2080 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
2081{
2082 static const UChar FinishCode[] = { OP_FINISH };
2083
2084 int i, num_mem, pop_level;
2085 ptrdiff_t n, best_len;
2086 LengthType tlen, tlen2;
2087 MemNumType mem;
2088 RelAddrType addr;
2089 OnigOptionType option = reg->options;
2090 OnigEncoding encode = reg->enc;
2091 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
2092 UChar *s, *q, *sbegin;
2093 UChar *p = reg->p;
2094 UChar *pbegin = p;
2095 UChar *pkeep;
2096 char *alloca_base;
2097 char *xmalloc_base = NULL;
2098 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
2099 OnigStackType *stkp; /* used as any purpose. */
2100 OnigStackIndex si;
2101 OnigStackIndex *repeat_stk;
2102 OnigStackIndex *mem_start_stk, *mem_end_stk;
2103#ifdef USE_COMBINATION_EXPLOSION_CHECK
2104 int scv;
2105 unsigned char* state_check_buff = msa->state_check_buff;
2106 int num_comb_exp_check = reg->num_comb_exp_check;
2107#endif
2108
2109#if USE_TOKEN_THREADED_VM
2110# define OP_OFFSET 1
2111# define VM_LOOP JUMP;
2112# define VM_LOOP_END
2113# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
2114# define DEFAULT L_DEFAULT:
2115# define NEXT sprev = sbegin; JUMP
2116# define JUMP pbegin = p; RB_GNUC_EXTENSION_BLOCK(goto *oplabels[*p++])
2117
2118 RB_GNUC_EXTENSION static const void *oplabels[] = {
2119 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
2120 &&L_OP_END, /* pattern code terminator (success end) */
2121
2122 &&L_OP_EXACT1, /* single byte, N = 1 */
2123 &&L_OP_EXACT2, /* single byte, N = 2 */
2124 &&L_OP_EXACT3, /* single byte, N = 3 */
2125 &&L_OP_EXACT4, /* single byte, N = 4 */
2126 &&L_OP_EXACT5, /* single byte, N = 5 */
2127 &&L_OP_EXACTN, /* single byte */
2128 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
2129 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
2130 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
2131 &&L_OP_EXACTMB2N, /* mb-length = 2 */
2132 &&L_OP_EXACTMB3N, /* mb-length = 3 */
2133 &&L_OP_EXACTMBN, /* other length */
2134
2135 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
2136 &&L_OP_EXACTN_IC, /* single byte, ignore case */
2137
2138 &&L_OP_CCLASS,
2139 &&L_OP_CCLASS_MB,
2140 &&L_OP_CCLASS_MIX,
2141 &&L_OP_CCLASS_NOT,
2142 &&L_OP_CCLASS_MB_NOT,
2143 &&L_OP_CCLASS_MIX_NOT,
2144
2145 &&L_OP_ANYCHAR, /* "." */
2146 &&L_OP_ANYCHAR_ML, /* "." multi-line */
2147 &&L_OP_ANYCHAR_STAR, /* ".*" */
2148 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
2149 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
2150 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
2151
2152 &&L_OP_WORD,
2153 &&L_OP_NOT_WORD,
2154 &&L_OP_WORD_BOUND,
2155 &&L_OP_NOT_WORD_BOUND,
2156# ifdef USE_WORD_BEGIN_END
2157 &&L_OP_WORD_BEGIN,
2158 &&L_OP_WORD_END,
2159# else
2160 &&L_DEFAULT,
2161 &&L_DEFAULT,
2162# endif
2163 &&L_OP_ASCII_WORD,
2164 &&L_OP_NOT_ASCII_WORD,
2165 &&L_OP_ASCII_WORD_BOUND,
2166 &&L_OP_NOT_ASCII_WORD_BOUND,
2167# ifdef USE_WORD_BEGIN_END
2168 &&L_OP_ASCII_WORD_BEGIN,
2169 &&L_OP_ASCII_WORD_END,
2170# else
2171 &&L_DEFAULT,
2172 &&L_DEFAULT,
2173# endif
2174
2175 &&L_OP_BEGIN_BUF,
2176 &&L_OP_END_BUF,
2177 &&L_OP_BEGIN_LINE,
2178 &&L_OP_END_LINE,
2179 &&L_OP_SEMI_END_BUF,
2180 &&L_OP_BEGIN_POSITION,
2181
2182 &&L_OP_BACKREF1,
2183 &&L_OP_BACKREF2,
2184 &&L_OP_BACKREFN,
2185 &&L_OP_BACKREFN_IC,
2186 &&L_OP_BACKREF_MULTI,
2187 &&L_OP_BACKREF_MULTI_IC,
2188# ifdef USE_BACKREF_WITH_LEVEL
2189 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
2190# else
2191 &&L_DEFAULT,
2192# endif
2193 &&L_OP_MEMORY_START,
2194 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
2195 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
2196# ifdef USE_SUBEXP_CALL
2197 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
2198# else
2199 &&L_DEFAULT,
2200# endif
2201 &&L_OP_MEMORY_END,
2202# ifdef USE_SUBEXP_CALL
2203 &&L_OP_MEMORY_END_REC, /* push marker to stack */
2204# else
2205 &&L_DEFAULT,
2206# endif
2207
2208 &&L_OP_KEEP,
2209
2210 &&L_OP_FAIL, /* pop stack and move */
2211 &&L_OP_JUMP,
2212 &&L_OP_PUSH,
2213 &&L_OP_POP,
2214# ifdef USE_OP_PUSH_OR_JUMP_EXACT
2215 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
2216# else
2217 &&L_DEFAULT,
2218# endif
2219 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
2220 &&L_OP_REPEAT, /* {n,m} */
2221 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
2222 &&L_OP_REPEAT_INC,
2223 &&L_OP_REPEAT_INC_NG, /* non greedy */
2224 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
2225 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
2226 &&L_OP_NULL_CHECK_START, /* null loop checker start */
2227 &&L_OP_NULL_CHECK_END, /* null loop checker end */
2228# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2229 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
2230# else
2231 &&L_DEFAULT,
2232# endif
2233# ifdef USE_SUBEXP_CALL
2234 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
2235# else
2236 &&L_DEFAULT,
2237# endif
2238
2239 &&L_OP_PUSH_POS, /* (?=...) start */
2240 &&L_OP_POP_POS, /* (?=...) end */
2241 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
2242 &&L_OP_FAIL_POS, /* (?!...) end */
2243 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
2244 &&L_OP_POP_STOP_BT, /* (?>...) end */
2245 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
2246 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
2247 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
2248 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
2249 &&L_OP_ABSENT, /* (?~...) start of inner loop */
2250 &&L_OP_ABSENT_END, /* (?~...) end */
2251
2252# ifdef USE_SUBEXP_CALL
2253 &&L_OP_CALL, /* \g<name> */
2254 &&L_OP_RETURN,
2255# else
2256 &&L_DEFAULT,
2257 &&L_DEFAULT,
2258# endif
2259 &&L_OP_CONDITION,
2260
2261# ifdef USE_COMBINATION_EXPLOSION_CHECK
2262 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
2263 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
2264 &&L_OP_STATE_CHECK, /* check only */
2265# else
2266 &&L_DEFAULT,
2267 &&L_DEFAULT,
2268 &&L_DEFAULT,
2269# endif
2270# ifdef USE_COMBINATION_EXPLOSION_CHECK
2271 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
2272 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
2273# else
2274 &&L_DEFAULT,
2275 &&L_DEFAULT,
2276# endif
2277 /* no need: IS_DYNAMIC_OPTION() == 0 */
2278# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2279 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
2280 &&L_OP_SET_OPTION /* set option */
2281# else
2282 &&L_DEFAULT,
2283 &&L_DEFAULT
2284# endif
2285 };
2286#else /* USE_TOKEN_THREADED_VM */
2287
2288# define OP_OFFSET 0
2289# define VM_LOOP \
2290 while (1) { \
2291 OPCODE_EXEC_HOOK; \
2292 pbegin = p; \
2293 sbegin = s; \
2294 switch (*p++) {
2295# define VM_LOOP_END } sprev = sbegin; }
2296# define CASE(x) case x:
2297# define DEFAULT default:
2298# define NEXT break
2299# define JUMP continue; break
2300#endif /* USE_TOKEN_THREADED_VM */
2301
2302
2303#ifdef USE_SUBEXP_CALL
2304/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
2305 etc. Additional space is required. */
2306# define ADD_NUMMEM 1
2307#else
2308/* Stack #0 not is used. */
2309# define ADD_NUMMEM 0
2310#endif
2311
2312 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
2313
2314 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
2315 pop_level = reg->stack_pop_level;
2316 num_mem = reg->num_mem;
2317 repeat_stk = (OnigStackIndex* )alloca_base;
2318
2319 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
2320 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
2321 {
2322 OnigStackIndex *pp = mem_start_stk;
2323 for (; pp < repeat_stk + n; pp += 2) {
2324 pp[0] = INVALID_STACK_INDEX;
2325 pp[1] = INVALID_STACK_INDEX;
2326 }
2327 }
2328#ifndef USE_SUBEXP_CALL
2329 mem_start_stk--; /* for index start from 1,
2330 mem_start_stk[1]..mem_start_stk[num_mem] */
2331 mem_end_stk--; /* for index start from 1,
2332 mem_end_stk[1]..mem_end_stk[num_mem] */
2333#endif
2334
2335#ifdef ONIG_DEBUG_MATCH
2336 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
2337 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
2338 fprintf(stderr, "size: %d, start offset: %d\n",
2339 (int )(end - str), (int )(sstart - str));
2340 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
2341#endif
2342
2343 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
2344 best_len = ONIG_MISMATCH;
2345 s = (UChar* )sstart;
2346 pkeep = (UChar* )sstart;
2347
2348
2349#ifdef ONIG_DEBUG_MATCH
2350# define OPCODE_EXEC_HOOK \
2351 if (s) { \
2352 UChar *op, *q, *bp, buf[50]; \
2353 int len; \
2354 op = p - OP_OFFSET; \
2355 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
2356 bp = buf; \
2357 q = s; \
2358 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
2359 for (i = 0; i < 7 && q < end; i++) { \
2360 len = enclen(encode, q, end); \
2361 while (len-- > 0) *bp++ = *q++; \
2362 } \
2363 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
2364 } \
2365 xmemcpy(bp, "\"", 1); bp += 1; \
2366 *bp = 0; \
2367 fputs((char* )buf, stderr); \
2368 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
2369 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
2370 stk - stk_base - 1, \
2371 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
2372 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
2373 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
2374 fprintf(stderr, "\n"); \
2375 }
2376#else
2377# define OPCODE_EXEC_HOOK ((void) 0)
2378#endif
2379
2380
2381 VM_LOOP {
2382 CASE(OP_END) MOP_IN(OP_END);
2383 n = s - sstart;
2384 if (n > best_len) {
2385 OnigRegion* region;
2386#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2387 if (IS_FIND_LONGEST(option)) {
2388 if (n > msa->best_len) {
2389 msa->best_len = n;
2390 msa->best_s = (UChar* )sstart;
2391 }
2392 else
2393 goto end_best_len;
2394 }
2395#endif
2396 best_len = n;
2397 region = msa->region;
2398 if (region) {
2399 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
2400 region->end[0] = s - str;
2401 for (i = 1; i <= num_mem; i++) {
2402 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
2403 if (BIT_STATUS_AT(reg->bt_mem_start, i))
2404 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
2405 else
2406 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
2407
2408 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
2409 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
2410 : (UChar* )((void* )mem_end_stk[i])) - str;
2411 }
2412 else {
2413 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
2414 }
2415 }
2416
2417#ifdef USE_CAPTURE_HISTORY
2418 if (reg->capture_history != 0) {
2419 int r;
2420 OnigCaptureTreeNode* node;
2421
2422 if (IS_NULL(region->history_root)) {
2423 region->history_root = node = history_node_new();
2424 CHECK_NULL_RETURN_MEMERR(node);
2425 }
2426 else {
2427 node = region->history_root;
2428 history_tree_clear(node);
2429 }
2430
2431 node->group = 0;
2432 node->beg = ((pkeep > s) ? s : pkeep) - str;
2433 node->end = s - str;
2434
2435 stkp = stk_base;
2436 r = make_capture_history_tree(region->history_root, &stkp,
2437 stk, (UChar* )str, reg);
2438 if (r < 0) {
2439 best_len = r; /* error code */
2440 goto finish;
2441 }
2442 }
2443#endif /* USE_CAPTURE_HISTORY */
2444 } /* if (region) */
2445 } /* n > best_len */
2446
2447#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
2448 end_best_len:
2449#endif
2450 MOP_OUT;
2451
2452 if (IS_FIND_CONDITION(option)) {
2453 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
2454 best_len = ONIG_MISMATCH;
2455 goto fail; /* for retry */
2456 }
2457 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
2458 goto fail; /* for retry */
2459 }
2460 }
2461
2462 /* default behavior: return first-matching result. */
2463 goto finish;
2464 NEXT;
2465
2466 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
2467 DATA_ENSURE(1);
2468 if (*p != *s) goto fail;
2469 p++; s++;
2470 MOP_OUT;
2471 NEXT;
2472
2473 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
2474 {
2475 int len;
2476 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2477
2478 DATA_ENSURE(1);
2479 len = ONIGENC_MBC_CASE_FOLD(encode,
2480 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2481 case_fold_flag,
2482 &s, end, lowbuf);
2483 DATA_ENSURE(0);
2484 q = lowbuf;
2485 while (len-- > 0) {
2486 if (*p != *q) {
2487 goto fail;
2488 }
2489 p++; q++;
2490 }
2491 }
2492 MOP_OUT;
2493 NEXT;
2494
2495 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
2496 DATA_ENSURE(2);
2497 if (*p != *s) goto fail;
2498 p++; s++;
2499 if (*p != *s) goto fail;
2500 sprev = s;
2501 p++; s++;
2502 MOP_OUT;
2503 JUMP;
2504
2505 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
2506 DATA_ENSURE(3);
2507 if (*p != *s) goto fail;
2508 p++; s++;
2509 if (*p != *s) goto fail;
2510 p++; s++;
2511 if (*p != *s) goto fail;
2512 sprev = s;
2513 p++; s++;
2514 MOP_OUT;
2515 JUMP;
2516
2517 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
2518 DATA_ENSURE(4);
2519 if (*p != *s) goto fail;
2520 p++; s++;
2521 if (*p != *s) goto fail;
2522 p++; s++;
2523 if (*p != *s) goto fail;
2524 p++; s++;
2525 if (*p != *s) goto fail;
2526 sprev = s;
2527 p++; s++;
2528 MOP_OUT;
2529 JUMP;
2530
2531 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
2532 DATA_ENSURE(5);
2533 if (*p != *s) goto fail;
2534 p++; s++;
2535 if (*p != *s) goto fail;
2536 p++; s++;
2537 if (*p != *s) goto fail;
2538 p++; s++;
2539 if (*p != *s) goto fail;
2540 p++; s++;
2541 if (*p != *s) goto fail;
2542 sprev = s;
2543 p++; s++;
2544 MOP_OUT;
2545 JUMP;
2546
2547 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
2548 GET_LENGTH_INC(tlen, p);
2549 DATA_ENSURE(tlen);
2550 while (tlen-- > 0) {
2551 if (*p++ != *s++) goto fail;
2552 }
2553 sprev = s - 1;
2554 MOP_OUT;
2555 JUMP;
2556
2557 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
2558 {
2559 int len;
2560 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
2561
2562 GET_LENGTH_INC(tlen, p);
2563 endp = p + tlen;
2564
2565 while (p < endp) {
2566 sprev = s;
2567 DATA_ENSURE(1);
2568 len = ONIGENC_MBC_CASE_FOLD(encode,
2569 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
2570 case_fold_flag,
2571 &s, end, lowbuf);
2572 DATA_ENSURE(0);
2573 q = lowbuf;
2574 while (len-- > 0) {
2575 if (*p != *q) goto fail;
2576 p++; q++;
2577 }
2578 }
2579 }
2580
2581 MOP_OUT;
2582 JUMP;
2583
2584 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
2585 DATA_ENSURE(2);
2586 if (*p != *s) goto fail;
2587 p++; s++;
2588 if (*p != *s) goto fail;
2589 p++; s++;
2590 MOP_OUT;
2591 NEXT;
2592
2593 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
2594 DATA_ENSURE(4);
2595 if (*p != *s) goto fail;
2596 p++; s++;
2597 if (*p != *s) goto fail;
2598 p++; s++;
2599 sprev = s;
2600 if (*p != *s) goto fail;
2601 p++; s++;
2602 if (*p != *s) goto fail;
2603 p++; s++;
2604 MOP_OUT;
2605 JUMP;
2606
2607 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
2608 DATA_ENSURE(6);
2609 if (*p != *s) goto fail;
2610 p++; s++;
2611 if (*p != *s) goto fail;
2612 p++; s++;
2613 if (*p != *s) goto fail;
2614 p++; s++;
2615 if (*p != *s) goto fail;
2616 p++; s++;
2617 sprev = s;
2618 if (*p != *s) goto fail;
2619 p++; s++;
2620 if (*p != *s) goto fail;
2621 p++; s++;
2622 MOP_OUT;
2623 JUMP;
2624
2625 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
2626 GET_LENGTH_INC(tlen, p);
2627 DATA_ENSURE(tlen * 2);
2628 while (tlen-- > 0) {
2629 if (*p != *s) goto fail;
2630 p++; s++;
2631 if (*p != *s) goto fail;
2632 p++; s++;
2633 }
2634 sprev = s - 2;
2635 MOP_OUT;
2636 JUMP;
2637
2638 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
2639 GET_LENGTH_INC(tlen, p);
2640 DATA_ENSURE(tlen * 3);
2641 while (tlen-- > 0) {
2642 if (*p != *s) goto fail;
2643 p++; s++;
2644 if (*p != *s) goto fail;
2645 p++; s++;
2646 if (*p != *s) goto fail;
2647 p++; s++;
2648 }
2649 sprev = s - 3;
2650 MOP_OUT;
2651 JUMP;
2652
2653 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2654 GET_LENGTH_INC(tlen, p); /* mb-len */
2655 GET_LENGTH_INC(tlen2, p); /* string len */
2656 tlen2 *= tlen;
2657 DATA_ENSURE(tlen2);
2658 while (tlen2-- > 0) {
2659 if (*p != *s) goto fail;
2660 p++; s++;
2661 }
2662 sprev = s - tlen;
2663 MOP_OUT;
2664 JUMP;
2665
2666 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2667 DATA_ENSURE(1);
2668 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2669 p += SIZE_BITSET;
2670 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2671 MOP_OUT;
2672 NEXT;
2673
2674 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2675 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2676
2677 cclass_mb:
2678 GET_LENGTH_INC(tlen, p);
2679 {
2680 OnigCodePoint code;
2681 UChar *ss;
2682 int mb_len;
2683
2684 DATA_ENSURE(1);
2685 mb_len = enclen(encode, s, end);
2686 DATA_ENSURE(mb_len);
2687 ss = s;
2688 s += mb_len;
2689 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2690
2691#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2692 if (! onig_is_in_code_range(p, code)) goto fail;
2693#else
2694 q = p;
2695 ALIGNMENT_RIGHT(q);
2696 if (! onig_is_in_code_range(q, code)) goto fail;
2697#endif
2698 }
2699 p += tlen;
2700 MOP_OUT;
2701 NEXT;
2702
2703 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2704 DATA_ENSURE(1);
2705 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2706 p += SIZE_BITSET;
2707 goto cclass_mb;
2708 }
2709 else {
2710 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2711 goto fail;
2712
2713 p += SIZE_BITSET;
2714 GET_LENGTH_INC(tlen, p);
2715 p += tlen;
2716 s++;
2717 }
2718 MOP_OUT;
2719 NEXT;
2720
2721 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2722 DATA_ENSURE(1);
2723 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2724 p += SIZE_BITSET;
2725 s += enclen(encode, s, end);
2726 MOP_OUT;
2727 NEXT;
2728
2729 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2730 DATA_ENSURE(1);
2731 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2732 s++;
2733 GET_LENGTH_INC(tlen, p);
2734 p += tlen;
2735 goto cc_mb_not_success;
2736 }
2737
2738 cclass_mb_not:
2739 GET_LENGTH_INC(tlen, p);
2740 {
2741 OnigCodePoint code;
2742 UChar *ss;
2743 int mb_len = enclen(encode, s, end);
2744
2745 if (! DATA_ENSURE_CHECK(mb_len)) {
2746 DATA_ENSURE(1);
2747 s = (UChar* )end;
2748 p += tlen;
2749 goto cc_mb_not_success;
2750 }
2751
2752 ss = s;
2753 s += mb_len;
2754 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2755
2756#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2757 if (onig_is_in_code_range(p, code)) goto fail;
2758#else
2759 q = p;
2760 ALIGNMENT_RIGHT(q);
2761 if (onig_is_in_code_range(q, code)) goto fail;
2762#endif
2763 }
2764 p += tlen;
2765
2766 cc_mb_not_success:
2767 MOP_OUT;
2768 NEXT;
2769
2770 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2771 DATA_ENSURE(1);
2772 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2773 p += SIZE_BITSET;
2774 goto cclass_mb_not;
2775 }
2776 else {
2777 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2778 goto fail;
2779
2780 p += SIZE_BITSET;
2781 GET_LENGTH_INC(tlen, p);
2782 p += tlen;
2783 s++;
2784 }
2785 MOP_OUT;
2786 NEXT;
2787
2788 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2789 DATA_ENSURE(1);
2790 n = enclen(encode, s, end);
2791 DATA_ENSURE(n);
2792 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2793 s += n;
2794 MOP_OUT;
2795 NEXT;
2796
2797 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2798 DATA_ENSURE(1);
2799 n = enclen(encode, s, end);
2800 DATA_ENSURE(n);
2801 s += n;
2802 MOP_OUT;
2803 NEXT;
2804
2805 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2806 while (DATA_ENSURE_CHECK1) {
2807 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2808 STACK_PUSH_ALT(p, s, sprev, pkeep);
2809 n = enclen(encode, s, end);
2810 DATA_ENSURE(n);
2811 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2812 sprev = s;
2813 s += n;
2814 }
2815 MOP_OUT;
2816 JUMP;
2817
2818 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2819 while (DATA_ENSURE_CHECK1) {
2820 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2821 STACK_PUSH_ALT(p, s, sprev, pkeep);
2822 n = enclen(encode, s, end);
2823 if (n > 1) {
2824 DATA_ENSURE(n);
2825 sprev = s;
2826 s += n;
2827 }
2828 else {
2829 sprev = s;
2830 s++;
2831 }
2832 }
2833 MOP_OUT;
2834 JUMP;
2835
2836 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2837 while (DATA_ENSURE_CHECK1) {
2838 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
2839 if (*p == *s) {
2840 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2841 } else {
2842 /* We need to increment num_fail here, for invoking a cache optimization correctly. */
2843 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR` simply in this case.*/
2844#ifdef USE_CACHE_MATCH_OPT
2845 msa->num_fail++;
2846#endif
2847 }
2848 n = enclen(encode, s, end);
2849 DATA_ENSURE(n);
2850 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2851 sprev = s;
2852 s += n;
2853 }
2854 p++;
2855 MOP_OUT;
2856 NEXT;
2857
2858 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2859 while (DATA_ENSURE_CHECK1) {
2860 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
2861 if (*p == *s) {
2862 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2863 } else {
2864 /* We need to increment num_fail here, for invoking a cache optimization correctly. */
2865 /* Actually, the matching will be failed if we use `OP_ANYCHAR_STAR_ML` simply in this case.*/
2866#ifdef USE_CACHE_MATCH_OPT
2867 msa->num_fail++;
2868#endif
2869 }
2870 n = enclen(encode, s, end);
2871 if (n > 1) {
2872 DATA_ENSURE(n);
2873 sprev = s;
2874 s += n;
2875 }
2876 else {
2877 sprev = s;
2878 s++;
2879 }
2880 }
2881 p++;
2882 MOP_OUT;
2883 NEXT;
2884
2885#ifdef USE_COMBINATION_EXPLOSION_CHECK
2886 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2887 GET_STATE_CHECK_NUM_INC(mem, p);
2888 while (DATA_ENSURE_CHECK1) {
2889 STATE_CHECK_VAL(scv, mem);
2890 if (scv) goto fail;
2891
2892 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2893 n = enclen(encode, s, end);
2894 DATA_ENSURE(n);
2895 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2896 sprev = s;
2897 s += n;
2898 }
2899 MOP_OUT;
2900 NEXT;
2901
2902 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2903 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2904
2905 GET_STATE_CHECK_NUM_INC(mem, p);
2906 while (DATA_ENSURE_CHECK1) {
2907 STATE_CHECK_VAL(scv, mem);
2908 if (scv) goto fail;
2909
2910 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2911 n = enclen(encode, s, end);
2912 if (n > 1) {
2913 DATA_ENSURE(n);
2914 sprev = s;
2915 s += n;
2916 }
2917 else {
2918 sprev = s;
2919 s++;
2920 }
2921 }
2922 MOP_OUT;
2923 NEXT;
2924#endif /* USE_COMBINATION_EXPLOSION_CHECK */
2925
2926 CASE(OP_WORD) MOP_IN(OP_WORD);
2927 DATA_ENSURE(1);
2928 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2929 goto fail;
2930
2931 s += enclen(encode, s, end);
2932 MOP_OUT;
2933 NEXT;
2934
2935 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2936 DATA_ENSURE(1);
2937 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2938 goto fail;
2939
2940 s += enclen(encode, s, end);
2941 MOP_OUT;
2942 NEXT;
2943
2944 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2945 DATA_ENSURE(1);
2946 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2947 goto fail;
2948
2949 s += enclen(encode, s, end);
2950 MOP_OUT;
2951 NEXT;
2952
2953 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2954 DATA_ENSURE(1);
2955 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2956 goto fail;
2957
2958 s += enclen(encode, s, end);
2959 MOP_OUT;
2960 NEXT;
2961
2962 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2963 if (ON_STR_BEGIN(s)) {
2964 DATA_ENSURE(1);
2965 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2966 goto fail;
2967 }
2968 else if (ON_STR_END(s)) {
2969 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2970 goto fail;
2971 }
2972 else {
2973 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2974 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2975 goto fail;
2976 }
2977 MOP_OUT;
2978 JUMP;
2979
2980 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2981 if (ON_STR_BEGIN(s)) {
2982 DATA_ENSURE(1);
2983 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2984 goto fail;
2985 }
2986 else if (ON_STR_END(s)) {
2987 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2988 goto fail;
2989 }
2990 else {
2991 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2992 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2993 goto fail;
2994 }
2995 MOP_OUT;
2996 JUMP;
2997
2998 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
2999 if (ON_STR_BEGIN(s)) {
3000 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
3001 goto fail;
3002 }
3003 else if (ON_STR_END(s)) {
3004 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
3005 goto fail;
3006 }
3007 else {
3008 if (ONIGENC_IS_MBC_WORD(encode, s, end)
3009 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
3010 goto fail;
3011 }
3012 MOP_OUT;
3013 JUMP;
3014
3015 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
3016 if (ON_STR_BEGIN(s)) {
3017 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
3018 goto fail;
3019 }
3020 else if (ON_STR_END(s)) {
3021 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3022 goto fail;
3023 }
3024 else {
3025 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
3026 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
3027 goto fail;
3028 }
3029 MOP_OUT;
3030 JUMP;
3031
3032#ifdef USE_WORD_BEGIN_END
3033 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
3034 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
3035 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3036 MOP_OUT;
3037 JUMP;
3038 }
3039 }
3040 goto fail;
3041 NEXT;
3042
3043 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
3044 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3045 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3046 MOP_OUT;
3047 JUMP;
3048 }
3049 }
3050 goto fail;
3051 NEXT;
3052
3053 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
3054 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
3055 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
3056 MOP_OUT;
3057 JUMP;
3058 }
3059 }
3060 goto fail;
3061 NEXT;
3062
3063 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
3064 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
3065 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
3066 MOP_OUT;
3067 JUMP;
3068 }
3069 }
3070 goto fail;
3071 NEXT;
3072#endif
3073
3074 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
3075 if (! ON_STR_BEGIN(s)) goto fail;
3076 if (IS_NOTBOS(msa->options)) goto fail;
3077
3078 MOP_OUT;
3079 JUMP;
3080
3081 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
3082 if (! ON_STR_END(s)) goto fail;
3083 if (IS_NOTEOS(msa->options)) goto fail;
3084
3085 MOP_OUT;
3086 JUMP;
3087
3088 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
3089 if (ON_STR_BEGIN(s)) {
3090 if (IS_NOTBOL(msa->options)) goto fail;
3091 MOP_OUT;
3092 JUMP;
3093 }
3094 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
3095#ifdef USE_CRNL_AS_LINE_TERMINATOR
3096 && !(IS_NEWLINE_CRLF(option)
3097 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
3098#endif
3099 && !ON_STR_END(s)) {
3100 MOP_OUT;
3101 JUMP;
3102 }
3103 goto fail;
3104 NEXT;
3105
3106 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
3107 if (ON_STR_END(s)) {
3108#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3109 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3110#endif
3111 if (IS_NOTEOL(msa->options)) goto fail;
3112 MOP_OUT;
3113 JUMP;
3114#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3115 }
3116#endif
3117 }
3118 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3119 MOP_OUT;
3120 JUMP;
3121 }
3122 goto fail;
3123 NEXT;
3124
3125 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
3126 if (ON_STR_END(s)) {
3127#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3128 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
3129#endif
3130 if (IS_NOTEOL(msa->options)) goto fail;
3131 MOP_OUT;
3132 JUMP;
3133#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3134 }
3135#endif
3136 }
3137 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
3138 UChar* ss = s + enclen(encode, s, end);
3139 if (ON_STR_END(ss)) {
3140 MOP_OUT;
3141 JUMP;
3142 }
3143#ifdef USE_CRNL_AS_LINE_TERMINATOR
3144 else if (IS_NEWLINE_CRLF(option)
3145 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
3146 ss += enclen(encode, ss, end);
3147 if (ON_STR_END(ss)) {
3148 MOP_OUT;
3149 JUMP;
3150 }
3151 }
3152#endif
3153 }
3154 goto fail;
3155 NEXT;
3156
3157 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
3158 if (s != msa->gpos)
3159 goto fail;
3160
3161 MOP_OUT;
3162 JUMP;
3163
3164 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
3165 GET_MEMNUM_INC(mem, p);
3166 STACK_PUSH_MEM_START(mem, s);
3167 MOP_OUT;
3168 JUMP;
3169
3170 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
3171 GET_MEMNUM_INC(mem, p);
3172 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
3173 mem_end_stk[mem] = INVALID_STACK_INDEX;
3174 MOP_OUT;
3175 JUMP;
3176
3177 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
3178 GET_MEMNUM_INC(mem, p);
3179 STACK_PUSH_MEM_END(mem, s);
3180 MOP_OUT;
3181 JUMP;
3182
3183 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
3184 GET_MEMNUM_INC(mem, p);
3185 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3186 MOP_OUT;
3187 JUMP;
3188
3189 CASE(OP_KEEP) MOP_IN(OP_KEEP);
3190 pkeep = s;
3191 MOP_OUT;
3192 JUMP;
3193
3194#ifdef USE_SUBEXP_CALL
3195 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
3196 GET_MEMNUM_INC(mem, p);
3197 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
3198 STACK_PUSH_MEM_END(mem, s);
3199 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3200 MOP_OUT;
3201 JUMP;
3202
3203 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
3204 GET_MEMNUM_INC(mem, p);
3205 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
3206 STACK_GET_MEM_START(mem, stkp);
3207
3208 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3209 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
3210 else
3211 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
3212
3213 STACK_PUSH_MEM_END_MARK(mem);
3214 MOP_OUT;
3215 JUMP;
3216#endif
3217
3218 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
3219 mem = 1;
3220 goto backref;
3221 NEXT;
3222
3223 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
3224 mem = 2;
3225 goto backref;
3226 NEXT;
3227
3228 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
3229 GET_MEMNUM_INC(mem, p);
3230 backref:
3231 {
3232 int len;
3233 UChar *pstart, *pend;
3234
3235 /* if you want to remove following line,
3236 you should check in parse and compile time. */
3237 if (mem > num_mem) goto fail;
3238 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3239 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3240
3241 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3242 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3243 else
3244 pstart = (UChar* )((void* )mem_start_stk[mem]);
3245
3246 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3247 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3248 : (UChar* )((void* )mem_end_stk[mem]));
3249 n = pend - pstart;
3250 DATA_ENSURE(n);
3251 sprev = s;
3252 STRING_CMP(pstart, s, n);
3253 while (sprev + (len = enclen(encode, sprev, end)) < s)
3254 sprev += len;
3255
3256 MOP_OUT;
3257 JUMP;
3258 }
3259
3260 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
3261 GET_MEMNUM_INC(mem, p);
3262 {
3263 int len;
3264 UChar *pstart, *pend;
3265
3266 /* if you want to remove following line,
3267 you should check in parse and compile time. */
3268 if (mem > num_mem) goto fail;
3269 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
3270 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
3271
3272 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3273 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3274 else
3275 pstart = (UChar* )((void* )mem_start_stk[mem]);
3276
3277 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3278 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3279 : (UChar* )((void* )mem_end_stk[mem]));
3280 n = pend - pstart;
3281 DATA_ENSURE(n);
3282 sprev = s;
3283 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
3284 while (sprev + (len = enclen(encode, sprev, end)) < s)
3285 sprev += len;
3286
3287 MOP_OUT;
3288 JUMP;
3289 }
3290 NEXT;
3291
3292 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
3293 {
3294 int len, is_fail;
3295 UChar *pstart, *pend, *swork;
3296
3297 GET_LENGTH_INC(tlen, p);
3298 for (i = 0; i < tlen; i++) {
3299 GET_MEMNUM_INC(mem, p);
3300
3301 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3302 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3303
3304 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3305 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3306 else
3307 pstart = (UChar* )((void* )mem_start_stk[mem]);
3308
3309 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3310 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3311 : (UChar* )((void* )mem_end_stk[mem]));
3312 n = pend - pstart;
3313 DATA_ENSURE_CONTINUE(n);
3314 sprev = s;
3315 swork = s;
3316 STRING_CMP_VALUE(pstart, swork, n, is_fail);
3317 if (is_fail) continue;
3318 s = swork;
3319 while (sprev + (len = enclen(encode, sprev, end)) < s)
3320 sprev += len;
3321
3322 p += (SIZE_MEMNUM * (tlen - i - 1));
3323 break; /* success */
3324 }
3325 if (i == tlen) goto fail;
3326 MOP_OUT;
3327 JUMP;
3328 }
3329 NEXT;
3330
3331 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
3332 {
3333 int len, is_fail;
3334 UChar *pstart, *pend, *swork;
3335
3336 GET_LENGTH_INC(tlen, p);
3337 for (i = 0; i < tlen; i++) {
3338 GET_MEMNUM_INC(mem, p);
3339
3340 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
3341 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
3342
3343 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
3344 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
3345 else
3346 pstart = (UChar* )((void* )mem_start_stk[mem]);
3347
3348 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
3349 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
3350 : (UChar* )((void* )mem_end_stk[mem]));
3351 n = pend - pstart;
3352 DATA_ENSURE_CONTINUE(n);
3353 sprev = s;
3354 swork = s;
3355 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
3356 if (is_fail) continue;
3357 s = swork;
3358 while (sprev + (len = enclen(encode, sprev, end)) < s)
3359 sprev += len;
3360
3361 p += (SIZE_MEMNUM * (tlen - i - 1));
3362 break; /* success */
3363 }
3364 if (i == tlen) goto fail;
3365 MOP_OUT;
3366 JUMP;
3367 }
3368
3369#ifdef USE_BACKREF_WITH_LEVEL
3370 CASE(OP_BACKREF_WITH_LEVEL)
3371 {
3372 int len;
3373 OnigOptionType ic;
3374 LengthType level;
3375
3376 GET_OPTION_INC(ic, p);
3377 GET_LENGTH_INC(level, p);
3378 GET_LENGTH_INC(tlen, p);
3379
3380 sprev = s;
3381 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
3382 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
3383 while (sprev + (len = enclen(encode, sprev, end)) < s)
3384 sprev += len;
3385
3386 p += (SIZE_MEMNUM * tlen);
3387 }
3388 else
3389 goto fail;
3390
3391 MOP_OUT;
3392 JUMP;
3393 }
3394
3395#endif
3396
3397#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
3398 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
3399 GET_OPTION_INC(option, p);
3400 STACK_PUSH_ALT(p, s, sprev, pkeep);
3401 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
3402 MOP_OUT;
3403 JUMP;
3404
3405 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
3406 GET_OPTION_INC(option, p);
3407 MOP_OUT;
3408 JUMP;
3409#endif
3410
3411 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
3412 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3413 STACK_PUSH_NULL_CHECK_START(mem, s);
3414 MOP_OUT;
3415 JUMP;
3416
3417 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
3418 {
3419 int isnull;
3420
3421 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3422 STACK_NULL_CHECK(isnull, mem, s);
3423 if (isnull) {
3424#ifdef ONIG_DEBUG_MATCH
3425 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
3426 (int )mem, (uintptr_t )s, s);
3427#endif
3428 null_check_found:
3429 /* empty loop founded, skip next instruction */
3430 switch (*p++) {
3431 case OP_JUMP:
3432 case OP_PUSH:
3433 p += SIZE_RELADDR;
3434 break;
3435 case OP_REPEAT_INC:
3436 case OP_REPEAT_INC_NG:
3437 case OP_REPEAT_INC_SG:
3438 case OP_REPEAT_INC_NG_SG:
3439 p += SIZE_MEMNUM;
3440 break;
3441 default:
3442 goto unexpected_bytecode_error;
3443 break;
3444 }
3445 }
3446 }
3447 MOP_OUT;
3448 JUMP;
3449
3450#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3451 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
3452 {
3453 int isnull;
3454 int ischanged = 0; // set 1 when a loop is empty but memory status is changed.
3455
3456 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3457 STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg);
3458 if (isnull) {
3459# ifdef ONIG_DEBUG_MATCH
3460 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
3461 (int )mem, (uintptr_t )s, s);
3462# endif
3463 if (isnull == -1) goto fail;
3464 goto null_check_found;
3465 }
3466# ifdef USE_CACHE_MATCH_OPT
3467 if (ischanged && msa->enable_cache_match_opt) {
3468 RelAddrType rel;
3469 OnigUChar *addr;
3470 int mem;
3471 UChar* tmp = p;
3472 switch (*tmp++) {
3473 case OP_JUMP:
3474 case OP_PUSH:
3475 GET_RELADDR_INC(rel, tmp);
3476 addr = tmp + rel;
3477 break;
3478 case OP_REPEAT_INC:
3479 case OP_REPEAT_INC_NG:
3480 GET_MEMNUM_INC(mem, tmp);
3481 addr = STACK_AT(repeat_stk[mem])->u.repeat.pcode;
3482 break;
3483 default:
3484 goto unexpected_bytecode_error;
3485 }
3486 reset_match_cache(reg, addr, pbegin, (long)(s - str), msa->match_cache, msa->cache_index_table, msa->num_cache_table ,msa->num_cache_opcode);
3487 }
3488# endif
3489 }
3490 MOP_OUT;
3491 JUMP;
3492#endif
3493
3494#ifdef USE_SUBEXP_CALL
3495 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
3496 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
3497 {
3498 int isnull;
3499
3500 GET_MEMNUM_INC(mem, p); /* mem: null check id */
3501# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3502 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
3503# else
3504 STACK_NULL_CHECK_REC(isnull, mem, s);
3505# endif
3506 if (isnull) {
3507# ifdef ONIG_DEBUG_MATCH
3508 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
3509 (int )mem, (uintptr_t )s, s);
3510# endif
3511 if (isnull == -1) goto fail;
3512 goto null_check_found;
3513 }
3514 else {
3515 STACK_PUSH_NULL_CHECK_END(mem);
3516 }
3517 }
3518 MOP_OUT;
3519 JUMP;
3520#endif
3521
3522 CASE(OP_JUMP) MOP_IN(OP_JUMP);
3523 GET_RELADDR_INC(addr, p);
3524 p += addr;
3525 MOP_OUT;
3526 CHECK_INTERRUPT_IN_MATCH_AT;
3527 JUMP;
3528
3529 CASE(OP_PUSH) MOP_IN(OP_PUSH);
3530 GET_RELADDR_INC(addr, p);
3531 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3532 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3533 MOP_OUT;
3534 JUMP;
3535
3536#ifdef USE_COMBINATION_EXPLOSION_CHECK
3537 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
3538 GET_STATE_CHECK_NUM_INC(mem, p);
3539 STATE_CHECK_VAL(scv, mem);
3540 if (scv) goto fail;
3541
3542 GET_RELADDR_INC(addr, p);
3543 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3544 MOP_OUT;
3545 JUMP;
3546
3547 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
3548 GET_STATE_CHECK_NUM_INC(mem, p);
3549 GET_RELADDR_INC(addr, p);
3550 STATE_CHECK_VAL(scv, mem);
3551 if (scv) {
3552 p += addr;
3553 }
3554 else {
3555 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
3556 }
3557 MOP_OUT;
3558 JUMP;
3559
3560 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
3561 GET_STATE_CHECK_NUM_INC(mem, p);
3562 STATE_CHECK_VAL(scv, mem);
3563 if (scv) goto fail;
3564
3565 STACK_PUSH_STATE_CHECK(s, mem);
3566 MOP_OUT;
3567 JUMP;
3568#endif /* USE_COMBINATION_EXPLOSION_CHECK */
3569
3570 CASE(OP_POP) MOP_IN(OP_POP);
3571 STACK_POP_ONE;
3572 /* We need to increment num_fail here, for invoking a cache optimization correctly, */
3573 /* because Onigmo makes a loop, which is pairwise disjoint to the following set, as atomic. */
3574#ifdef USE_CACHE_MATCH_OPT
3575 msa->num_fail++;
3576#endif
3577 MOP_OUT;
3578 JUMP;
3579
3580#ifdef USE_OP_PUSH_OR_JUMP_EXACT
3581 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
3582 GET_RELADDR_INC(addr, p);
3583 if (*p == *s && DATA_ENSURE_CHECK1) {
3584 p++;
3585 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3586 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3587 MOP_OUT;
3588 JUMP;
3589 }
3590 p += (addr + 1);
3591 MOP_OUT;
3592 JUMP;
3593#endif
3594
3595 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
3596 GET_RELADDR_INC(addr, p);
3597 if (*p == *s) {
3598 p++;
3599 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3600 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3601 MOP_OUT;
3602 JUMP;
3603 }
3604 p++;
3605 MOP_OUT;
3606 JUMP;
3607
3608 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
3609 {
3610 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3611 GET_RELADDR_INC(addr, p);
3612
3613 STACK_ENSURE(1);
3614 repeat_stk[mem] = GET_STACK_INDEX(stk);
3615 STACK_PUSH_REPEAT(mem, p);
3616
3617 if (reg->repeat_range[mem].lower == 0) {
3618 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, end - s, msa->match_cache);
3619 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
3620 }
3621 }
3622 MOP_OUT;
3623 JUMP;
3624
3625 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
3626 {
3627 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3628 GET_RELADDR_INC(addr, p);
3629
3630 STACK_ENSURE(1);
3631 repeat_stk[mem] = GET_STACK_INDEX(stk);
3632 STACK_PUSH_REPEAT(mem, p);
3633
3634 if (reg->repeat_range[mem].lower == 0) {
3635 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3636 STACK_PUSH_ALT(p, s, sprev, pkeep);
3637 p += addr;
3638 }
3639 }
3640 MOP_OUT;
3641 JUMP;
3642
3643 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
3644 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3645 si = repeat_stk[mem];
3646 stkp = STACK_AT(si);
3647
3648 repeat_inc:
3649 stkp->u.repeat.count++;
3650 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
3651 /* end of repeat. Nothing to do. */
3652 }
3653 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3654 if (*pbegin == OP_REPEAT_INC) {
3655 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3656 }
3657 STACK_PUSH_ALT(p, s, sprev, pkeep);
3658 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
3659 }
3660 else {
3661 p = stkp->u.repeat.pcode;
3662 }
3663 STACK_PUSH_REPEAT_INC(si);
3664 MOP_OUT;
3665 CHECK_INTERRUPT_IN_MATCH_AT;
3666 JUMP;
3667
3668 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
3669 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3670 STACK_GET_REPEAT(mem, stkp);
3671 si = GET_STACK_INDEX(stkp);
3672 goto repeat_inc;
3673 NEXT;
3674
3675 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
3676 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3677 si = repeat_stk[mem];
3678 stkp = STACK_AT(si);
3679
3680 repeat_inc_ng:
3681 stkp->u.repeat.count++;
3682 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
3683 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
3684 UChar* pcode = stkp->u.repeat.pcode;
3685
3686 STACK_PUSH_REPEAT_INC(si);
3687 if (*pbegin == OP_REPEAT_INC_NG) {
3688 DO_CACHE_MATCH_OPT(reg, stk_base, repeat_stk, msa->enable_cache_match_opt, pbegin, msa->num_cache_table, msa->num_cache_opcode, msa->cache_index_table, s - str, msa->match_cache);
3689 }
3690 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
3691 }
3692 else {
3693 p = stkp->u.repeat.pcode;
3694 STACK_PUSH_REPEAT_INC(si);
3695 }
3696 }
3697 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
3698 STACK_PUSH_REPEAT_INC(si);
3699 }
3700 MOP_OUT;
3701 CHECK_INTERRUPT_IN_MATCH_AT;
3702 JUMP;
3703
3704 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
3705 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
3706 STACK_GET_REPEAT(mem, stkp);
3707 si = GET_STACK_INDEX(stkp);
3708 goto repeat_inc_ng;
3709 NEXT;
3710
3711 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3712 STACK_PUSH_POS(s, sprev, pkeep);
3713 MOP_OUT;
3714 JUMP;
3715
3716 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3717 {
3718 STACK_POS_END(stkp);
3719 s = stkp->u.state.pstr;
3720 sprev = stkp->u.state.pstr_prev;
3721 }
3722 MOP_OUT;
3723 JUMP;
3724
3725 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3726 GET_RELADDR_INC(addr, p);
3727 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3728 MOP_OUT;
3729 JUMP;
3730
3731 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3732 STACK_POP_TIL_POS_NOT;
3733 goto fail;
3734 NEXT;
3735
3736 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3737 STACK_PUSH_STOP_BT;
3738 MOP_OUT;
3739 JUMP;
3740
3741 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3742 STACK_STOP_BT_END;
3743 MOP_OUT;
3744 JUMP;
3745
3746 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3747 GET_LENGTH_INC(tlen, p);
3748 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3749 if (IS_NULL(s)) goto fail;
3750 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3751 MOP_OUT;
3752 JUMP;
3753
3754 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3755 GET_RELADDR_INC(addr, p);
3756 GET_LENGTH_INC(tlen, p);
3757 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3758 if (IS_NULL(q)) {
3759 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3760 If you want to change to fail, replace following line. */
3761 p += addr;
3762 /* goto fail; */
3763 }
3764 else {
3765 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3766 s = q;
3767 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3768 }
3769 MOP_OUT;
3770 JUMP;
3771
3772 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3773 STACK_POP_TIL_LOOK_BEHIND_NOT;
3774 goto fail;
3775 NEXT;
3776
3777 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3778 /* Save the absent-start-pos and the original end-pos. */
3779 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3780 MOP_OUT;
3781 JUMP;
3782
3783 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3784 {
3785 const UChar* aend = ABSENT_END_POS;
3786 UChar* absent;
3787 UChar* selfp = p - 1;
3788
3789 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
3790 GET_RELADDR_INC(addr, p);
3791#ifdef ONIG_DEBUG_MATCH
3792 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3793#endif
3794 if ((absent > aend) && (s > absent)) {
3795 /* An empty match occurred in (?~...) at the start point.
3796 * Never match. */
3797 STACK_POP;
3798 goto fail;
3799 }
3800 else if ((s >= aend) && (s > absent)) {
3801 if (s > aend) {
3802 /* Only one (or less) character matched in the last iteration.
3803 * This is not a possible point. */
3804 goto fail;
3805 }
3806 /* All possible points were found. Try matching after (?~...). */
3807 DATA_ENSURE(0);
3808 p += addr;
3809 }
3810 else if (s == end) {
3811 /* At the end of the string, just match with it */
3812 DATA_ENSURE(0);
3813 p += addr;
3814 }
3815 else {
3816 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
3817 n = enclen(encode, s, end);
3818 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
3819 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
3820 STACK_PUSH_ABSENT;
3821 ABSENT_END_POS = aend;
3822 }
3823 }
3824 MOP_OUT;
3825 JUMP;
3826
3827 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3828 /* The pattern inside (?~...) was matched.
3829 * Set the end-pos temporary and go to next iteration. */
3830 if (sprev < ABSENT_END_POS)
3831 ABSENT_END_POS = sprev;
3832#ifdef ONIG_DEBUG_MATCH
3833 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
3834#endif
3835 STACK_POP_TIL_ABSENT;
3836 goto fail;
3837 NEXT;
3838
3839#ifdef USE_SUBEXP_CALL
3840 CASE(OP_CALL) MOP_IN(OP_CALL);
3841 GET_ABSADDR_INC(addr, p);
3842 STACK_PUSH_CALL_FRAME(p);
3843 p = reg->p + addr;
3844 MOP_OUT;
3845 JUMP;
3846
3847 CASE(OP_RETURN) MOP_IN(OP_RETURN);
3848 STACK_RETURN(p);
3849 STACK_PUSH_RETURN;
3850 MOP_OUT;
3851 JUMP;
3852#endif
3853
3854 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3855 GET_MEMNUM_INC(mem, p);
3856 GET_RELADDR_INC(addr, p);
3857 if ((mem > num_mem) ||
3858 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3859 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3860 p += addr;
3861 }
3862 MOP_OUT;
3863 JUMP;
3864
3865 CASE(OP_FINISH)
3866 goto finish;
3867 NEXT;
3868
3869 CASE(OP_FAIL)
3870 if (0) {
3871 /* fall */
3872 fail:
3873 MOP_OUT;
3874 }
3875 MOP_IN(OP_FAIL);
3876 STACK_POP;
3877 p = stk->u.state.pcode;
3878 s = stk->u.state.pstr;
3879 sprev = stk->u.state.pstr_prev;
3880 pkeep = stk->u.state.pkeep;
3881
3882#ifdef USE_CACHE_MATCH_OPT
3883 if (++msa->num_fail >= (long)(end - str) + 1 && msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
3884 msa->enable_cache_match_opt = 1;
3885 if (msa->num_cache_opcode == NUM_CACHE_OPCODE_UNINIT) {
3886 OnigPosition r = count_num_cache_opcode(reg, &msa->num_cache_opcode, &msa->num_cache_table);
3887 if (r < 0) goto bytecode_error;
3888 }
3889 if (msa->num_cache_opcode == NUM_CACHE_OPCODE_FAIL || msa->num_cache_opcode == 0) {
3890 msa->enable_cache_match_opt = 0;
3891 goto fail_match_cache_opt;
3892 }
3893 if (msa->cache_index_table == NULL) {
3894 OnigCacheIndex *table = (OnigCacheIndex *)xmalloc(msa->num_cache_table * sizeof(OnigCacheIndex));
3895 if (table == NULL) {
3896 return ONIGERR_MEMORY;
3897 }
3898 OnigPosition r = init_cache_index_table(reg, table);
3899 if (r < 0) {
3900 if (r == ONIGERR_UNEXPECTED_BYTECODE) goto unexpected_bytecode_error;
3901 else goto bytecode_error;
3902 }
3903 msa->cache_index_table = table;
3904 }
3905 size_t len = (end - str) + 1;
3906 size_t match_cache_size8 = (size_t)msa->num_cache_opcode * len;
3907 /* overflow check */
3908 if (match_cache_size8 / len != (size_t)msa->num_cache_opcode) {
3909 return ONIGERR_MEMORY;
3910 }
3911 /* Currently, int is used for the key of match_cache */
3912 if (match_cache_size8 >= LONG_MAX_LIMIT) {
3913 return ONIGERR_MEMORY;
3914 }
3915 size_t match_cache_size = (match_cache_size8 >> 3) + (match_cache_size8 & 7 ? 1 : 0);
3916 msa->match_cache = (uint8_t*)xmalloc(match_cache_size * sizeof(uint8_t));
3917 if (msa->match_cache == NULL) {
3918 return ONIGERR_MEMORY;
3919 }
3920 xmemset(msa->match_cache, 0, match_cache_size * sizeof(uint8_t));
3921 }
3922 fail_match_cache_opt:
3923#endif
3924
3925#ifdef USE_COMBINATION_EXPLOSION_CHECK
3926 if (stk->u.state.state_check != 0) {
3927 stk->type = STK_STATE_CHECK_MARK;
3928 stk++;
3929 }
3930#endif
3931
3932 MOP_OUT;
3933 CHECK_INTERRUPT_IN_MATCH_AT;
3934 JUMP;
3935
3936 DEFAULT
3937 goto bytecode_error;
3938 } VM_LOOP_END
3939
3940 finish:
3941 STACK_SAVE;
3942 if (xmalloc_base) xfree(xmalloc_base);
3943 return best_len;
3944
3945#ifdef ONIG_DEBUG
3946 stack_error:
3947 STACK_SAVE;
3948 if (xmalloc_base) xfree(xmalloc_base);
3949 return ONIGERR_STACK_BUG;
3950#endif
3951
3952 bytecode_error:
3953 STACK_SAVE;
3954 if (xmalloc_base) xfree(xmalloc_base);
3955 return ONIGERR_UNDEFINED_BYTECODE;
3956
3957 unexpected_bytecode_error:
3958 STACK_SAVE;
3959 if (xmalloc_base) xfree(xmalloc_base);
3960 return ONIGERR_UNEXPECTED_BYTECODE;
3961}
3962
3963
3964static UChar*
3965slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
3966 const UChar* text, const UChar* text_end, UChar* text_range)
3967{
3968 UChar *t, *p, *s, *end;
3969
3970 end = (UChar* )text_end;
3971 end -= target_end - target - 1;
3972 if (end > text_range)
3973 end = text_range;
3974
3975 s = (UChar* )text;
3976
3977 if (enc->max_enc_len == enc->min_enc_len) {
3978 int n = enc->max_enc_len;
3979
3980 while (s < end) {
3981 if (*s == *target) {
3982 p = s + 1;
3983 t = target + 1;
3984 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3985 return s;
3986 }
3987 s += n;
3988 }
3989 return (UChar* )NULL;
3990 }
3991 while (s < end) {
3992 if (*s == *target) {
3993 p = s + 1;
3994 t = target + 1;
3995 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3996 return s;
3997 }
3998 s += enclen(enc, s, text_end);
3999 }
4000
4001 return (UChar* )NULL;
4002}
4003
4004static int
4005str_lower_case_match(OnigEncoding enc, int case_fold_flag,
4006 const UChar* t, const UChar* tend,
4007 const UChar* p, const UChar* end)
4008{
4009 int lowlen;
4010 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
4011
4012 while (t < tend) {
4013 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
4014 q = lowbuf;
4015 while (lowlen > 0) {
4016 if (*t++ != *q++) return 0;
4017 lowlen--;
4018 }
4019 }
4020
4021 return 1;
4022}
4023
4024static UChar*
4025slow_search_ic(OnigEncoding enc, int case_fold_flag,
4026 UChar* target, UChar* target_end,
4027 const UChar* text, const UChar* text_end, UChar* text_range)
4028{
4029 UChar *s, *end;
4030
4031 end = (UChar* )text_end;
4032 end -= target_end - target - 1;
4033 if (end > text_range)
4034 end = text_range;
4035
4036 s = (UChar* )text;
4037
4038 while (s < end) {
4039 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4040 s, text_end))
4041 return s;
4042
4043 s += enclen(enc, s, text_end);
4044 }
4045
4046 return (UChar* )NULL;
4047}
4048
4049static UChar*
4050slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
4051 const UChar* text, const UChar* adjust_text,
4052 const UChar* text_end, const UChar* text_start)
4053{
4054 UChar *t, *p, *s;
4055
4056 s = (UChar* )text_end;
4057 s -= (target_end - target);
4058 if (s > text_start)
4059 s = (UChar* )text_start;
4060 else
4061 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4062
4063 while (s >= text) {
4064 if (*s == *target) {
4065 p = s + 1;
4066 t = target + 1;
4067 while (t < target_end) {
4068 if (*t != *p++)
4069 break;
4070 t++;
4071 }
4072 if (t == target_end)
4073 return s;
4074 }
4075 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4076 }
4077
4078 return (UChar* )NULL;
4079}
4080
4081static UChar*
4082slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
4083 UChar* target, UChar* target_end,
4084 const UChar* text, const UChar* adjust_text,
4085 const UChar* text_end, const UChar* text_start)
4086{
4087 UChar *s;
4088
4089 s = (UChar* )text_end;
4090 s -= (target_end - target);
4091 if (s > text_start)
4092 s = (UChar* )text_start;
4093 else
4094 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
4095
4096 while (s >= text) {
4097 if (str_lower_case_match(enc, case_fold_flag,
4098 target, target_end, s, text_end))
4099 return s;
4100
4101 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4102 }
4103
4104 return (UChar* )NULL;
4105}
4106
4107#ifndef USE_SUNDAY_QUICK_SEARCH
4108/* Boyer-Moore-Horspool search applied to a multibyte string */
4109static UChar*
4110bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4111 const UChar* text, const UChar* text_end,
4112 const UChar* text_range)
4113{
4114 const UChar *s, *se, *t, *p, *end;
4115 const UChar *tail;
4116 ptrdiff_t skip, tlen1;
4117
4118# ifdef ONIG_DEBUG_SEARCH
4119 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4120 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4121# endif
4122
4123 tail = target_end - 1;
4124 tlen1 = tail - target;
4125 end = text_range;
4126 if (end + tlen1 > text_end)
4127 end = text_end - tlen1;
4128
4129 s = text;
4130
4131 if (IS_NULL(reg->int_map)) {
4132 while (s < end) {
4133 p = se = s + tlen1;
4134 t = tail;
4135 while (*p == *t) {
4136 if (t == target) return (UChar* )s;
4137 p--; t--;
4138 }
4139 skip = reg->map[*se];
4140 t = s;
4141 do {
4142 s += enclen(reg->enc, s, end);
4143 } while ((s - t) < skip && s < end);
4144 }
4145 }
4146 else {
4147# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4148 while (s < end) {
4149 p = se = s + tlen1;
4150 t = tail;
4151 while (*p == *t) {
4152 if (t == target) return (UChar* )s;
4153 p--; t--;
4154 }
4155 skip = reg->int_map[*se];
4156 t = s;
4157 do {
4158 s += enclen(reg->enc, s, end);
4159 } while ((s - t) < skip && s < end);
4160 }
4161# endif
4162 }
4163
4164 return (UChar* )NULL;
4165}
4166
4167/* Boyer-Moore-Horspool search */
4168static UChar*
4169bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4170 const UChar* text, const UChar* text_end, const UChar* text_range)
4171{
4172 const UChar *s, *t, *p, *end;
4173 const UChar *tail;
4174
4175# ifdef ONIG_DEBUG_SEARCH
4176 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4177 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4178# endif
4179
4180 end = text_range + (target_end - target) - 1;
4181 if (end > text_end)
4182 end = text_end;
4183
4184 tail = target_end - 1;
4185 s = text + (target_end - target) - 1;
4186 if (IS_NULL(reg->int_map)) {
4187 while (s < end) {
4188 p = s;
4189 t = tail;
4190# ifdef ONIG_DEBUG_SEARCH
4191 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
4192 (intptr_t )(s - text), s);
4193# endif
4194 while (*p == *t) {
4195 if (t == target) return (UChar* )p;
4196 p--; t--;
4197 }
4198 s += reg->map[*s];
4199 }
4200 }
4201 else { /* see int_map[] */
4202# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4203 while (s < end) {
4204 p = s;
4205 t = tail;
4206 while (*p == *t) {
4207 if (t == target) return (UChar* )p;
4208 p--; t--;
4209 }
4210 s += reg->int_map[*s];
4211 }
4212# endif
4213 }
4214 return (UChar* )NULL;
4215}
4216
4217/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
4218static UChar*
4219bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4220 const UChar* text, const UChar* text_end,
4221 const UChar* text_range)
4222{
4223 const UChar *s, *se, *t, *end;
4224 const UChar *tail;
4225 ptrdiff_t skip, tlen1;
4226 OnigEncoding enc = reg->enc;
4227 int case_fold_flag = reg->case_fold_flag;
4228
4229# ifdef ONIG_DEBUG_SEARCH
4230 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4231 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4232# endif
4233
4234 tail = target_end - 1;
4235 tlen1 = tail - target;
4236 end = text_range;
4237 if (end + tlen1 > text_end)
4238 end = text_end - tlen1;
4239
4240 s = text;
4241
4242 if (IS_NULL(reg->int_map)) {
4243 while (s < end) {
4244 se = s + tlen1;
4245 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4246 s, se + 1))
4247 return (UChar* )s;
4248 skip = reg->map[*se];
4249 t = s;
4250 do {
4251 s += enclen(reg->enc, s, end);
4252 } while ((s - t) < skip && s < end);
4253 }
4254 }
4255 else {
4256# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4257 while (s < end) {
4258 se = s + tlen1;
4259 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4260 s, se + 1))
4261 return (UChar* )s;
4262 skip = reg->int_map[*se];
4263 t = s;
4264 do {
4265 s += enclen(reg->enc, s, end);
4266 } while ((s - t) < skip && s < end);
4267 }
4268# endif
4269 }
4270
4271 return (UChar* )NULL;
4272}
4273
4274/* Boyer-Moore-Horspool search (ignore case) */
4275static UChar*
4276bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4277 const UChar* text, const UChar* text_end, const UChar* text_range)
4278{
4279 const UChar *s, *p, *end;
4280 const UChar *tail;
4281 OnigEncoding enc = reg->enc;
4282 int case_fold_flag = reg->case_fold_flag;
4283
4284# ifdef ONIG_DEBUG_SEARCH
4285 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
4286 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
4287# endif
4288
4289 end = text_range + (target_end - target) - 1;
4290 if (end > text_end)
4291 end = text_end;
4292
4293 tail = target_end - 1;
4294 s = text + (target_end - target) - 1;
4295 if (IS_NULL(reg->int_map)) {
4296 while (s < end) {
4297 p = s - (target_end - target) + 1;
4298 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4299 p, s + 1))
4300 return (UChar* )p;
4301 s += reg->map[*s];
4302 }
4303 }
4304 else { /* see int_map[] */
4305# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4306 while (s < end) {
4307 p = s - (target_end - target) + 1;
4308 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4309 p, s + 1))
4310 return (UChar* )p;
4311 s += reg->int_map[*s];
4312 }
4313# endif
4314 }
4315 return (UChar* )NULL;
4316}
4317
4318#else /* USE_SUNDAY_QUICK_SEARCH */
4319
4320/* Sunday's quick search applied to a multibyte string */
4321static UChar*
4322bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
4323 const UChar* text, const UChar* text_end,
4324 const UChar* text_range)
4325{
4326 const UChar *s, *se, *t, *p, *end;
4327 const UChar *tail;
4328 ptrdiff_t skip, tlen1;
4329 OnigEncoding enc = reg->enc;
4330
4331# ifdef ONIG_DEBUG_SEARCH
4332 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4333 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4334# endif
4335
4336 tail = target_end - 1;
4337 tlen1 = tail - target;
4338 end = text_range;
4339 if (end + tlen1 > text_end)
4340 end = text_end - tlen1;
4341
4342 s = text;
4343
4344 if (IS_NULL(reg->int_map)) {
4345 while (s < end) {
4346 p = se = s + tlen1;
4347 t = tail;
4348 while (*p == *t) {
4349 if (t == target) return (UChar* )s;
4350 p--; t--;
4351 }
4352 if (s + 1 >= end) break;
4353 skip = reg->map[se[1]];
4354 t = s;
4355 do {
4356 s += enclen(enc, s, end);
4357 } while ((s - t) < skip && s < end);
4358 }
4359 }
4360 else {
4361# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4362 while (s < end) {
4363 p = se = s + tlen1;
4364 t = tail;
4365 while (*p == *t) {
4366 if (t == target) return (UChar* )s;
4367 p--; t--;
4368 }
4369 if (s + 1 >= end) break;
4370 skip = reg->int_map[se[1]];
4371 t = s;
4372 do {
4373 s += enclen(enc, s, end);
4374 } while ((s - t) < skip && s < end);
4375 }
4376# endif
4377 }
4378
4379 return (UChar* )NULL;
4380}
4381
4382/* Sunday's quick search */
4383static UChar*
4384bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
4385 const UChar* text, const UChar* text_end, const UChar* text_range)
4386{
4387 const UChar *s, *t, *p, *end;
4388 const UChar *tail;
4389 ptrdiff_t tlen1;
4390
4391# ifdef ONIG_DEBUG_SEARCH
4392 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4393 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4394# endif
4395
4396 tail = target_end - 1;
4397 tlen1 = tail - target;
4398 end = text_range + tlen1;
4399 if (end > text_end)
4400 end = text_end;
4401
4402 s = text + tlen1;
4403 if (IS_NULL(reg->int_map)) {
4404 while (s < end) {
4405 p = s;
4406 t = tail;
4407 while (*p == *t) {
4408 if (t == target) return (UChar* )p;
4409 p--; t--;
4410 }
4411 if (s + 1 >= end) break;
4412 s += reg->map[s[1]];
4413 }
4414 }
4415 else { /* see int_map[] */
4416# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4417 while (s < end) {
4418 p = s;
4419 t = tail;
4420 while (*p == *t) {
4421 if (t == target) return (UChar* )p;
4422 p--; t--;
4423 }
4424 if (s + 1 >= end) break;
4425 s += reg->int_map[s[1]];
4426 }
4427# endif
4428 }
4429 return (UChar* )NULL;
4430}
4431
4432/* Sunday's quick search applied to a multibyte string (ignore case) */
4433static UChar*
4434bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4435 const UChar* text, const UChar* text_end,
4436 const UChar* text_range)
4437{
4438 const UChar *s, *se, *t, *end;
4439 const UChar *tail;
4440 ptrdiff_t skip, tlen1;
4441 OnigEncoding enc = reg->enc;
4442 int case_fold_flag = reg->case_fold_flag;
4443
4444# ifdef ONIG_DEBUG_SEARCH
4445 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4446 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4447# endif
4448
4449 tail = target_end - 1;
4450 tlen1 = tail - target;
4451 end = text_range;
4452 if (end + tlen1 > text_end)
4453 end = text_end - tlen1;
4454
4455 s = text;
4456
4457 if (IS_NULL(reg->int_map)) {
4458 while (s < end) {
4459 se = s + tlen1;
4460 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4461 s, se + 1))
4462 return (UChar* )s;
4463 if (s + 1 >= end) break;
4464 skip = reg->map[se[1]];
4465 t = s;
4466 do {
4467 s += enclen(enc, s, end);
4468 } while ((s - t) < skip && s < end);
4469 }
4470 }
4471 else {
4472# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4473 while (s < end) {
4474 se = s + tlen1;
4475 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4476 s, se + 1))
4477 return (UChar* )s;
4478 if (s + 1 >= end) break;
4479 skip = reg->int_map[se[1]];
4480 t = s;
4481 do {
4482 s += enclen(enc, s, end);
4483 } while ((s - t) < skip && s < end);
4484 }
4485# endif
4486 }
4487
4488 return (UChar* )NULL;
4489}
4490
4491/* Sunday's quick search (ignore case) */
4492static UChar*
4493bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
4494 const UChar* text, const UChar* text_end, const UChar* text_range)
4495{
4496 const UChar *s, *p, *end;
4497 const UChar *tail;
4498 ptrdiff_t tlen1;
4499 OnigEncoding enc = reg->enc;
4500 int case_fold_flag = reg->case_fold_flag;
4501
4502# ifdef ONIG_DEBUG_SEARCH
4503 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
4504 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
4505# endif
4506
4507 tail = target_end - 1;
4508 tlen1 = tail - target;
4509 end = text_range + tlen1;
4510 if (end > text_end)
4511 end = text_end;
4512
4513 s = text + tlen1;
4514 if (IS_NULL(reg->int_map)) {
4515 while (s < end) {
4516 p = s - tlen1;
4517 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4518 p, s + 1))
4519 return (UChar* )p;
4520 if (s + 1 >= end) break;
4521 s += reg->map[s[1]];
4522 }
4523 }
4524 else { /* see int_map[] */
4525# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
4526 while (s < end) {
4527 p = s - tlen1;
4528 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
4529 p, s + 1))
4530 return (UChar* )p;
4531 if (s + 1 >= end) break;
4532 s += reg->int_map[s[1]];
4533 }
4534# endif
4535 }
4536 return (UChar* )NULL;
4537}
4538#endif /* USE_SUNDAY_QUICK_SEARCH */
4539
4540#ifdef USE_INT_MAP_BACKWARD
4541static int
4542set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
4543 int** skip)
4544{
4545 int i, len;
4546
4547 if (IS_NULL(*skip)) {
4548 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4549 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
4550 }
4551
4552 len = (int )(end - s);
4553 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
4554 (*skip)[i] = len;
4555
4556 for (i = len - 1; i > 0; i--)
4557 (*skip)[s[i]] = i;
4558
4559 return 0;
4560}
4561
4562static UChar*
4563bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
4564 const UChar* text, const UChar* adjust_text,
4565 const UChar* text_end, const UChar* text_start)
4566{
4567 const UChar *s, *t, *p;
4568
4569 s = text_end - (target_end - target);
4570 if (text_start < s)
4571 s = text_start;
4572 else
4573 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4574
4575 while (s >= text) {
4576 p = s;
4577 t = target;
4578 while (t < target_end && *p == *t) {
4579 p++; t++;
4580 }
4581 if (t == target_end)
4582 return (UChar* )s;
4583
4584 s -= reg->int_map_backward[*s];
4585 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
4586 }
4587
4588 return (UChar* )NULL;
4589}
4590#endif
4591
4592static UChar*
4593map_search(OnigEncoding enc, UChar map[],
4594 const UChar* text, const UChar* text_range, const UChar* text_end)
4595{
4596 const UChar *s = text;
4597
4598 while (s < text_range) {
4599 if (map[*s]) return (UChar* )s;
4600
4601 s += enclen(enc, s, text_end);
4602 }
4603 return (UChar* )NULL;
4604}
4605
4606static UChar*
4607map_search_backward(OnigEncoding enc, UChar map[],
4608 const UChar* text, const UChar* adjust_text,
4609 const UChar* text_start, const UChar* text_end)
4610{
4611 const UChar *s = text_start;
4612
4613 while (s >= text) {
4614 if (map[*s]) return (UChar* )s;
4615
4616 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
4617 }
4618 return (UChar* )NULL;
4619}
4620
4621extern OnigPosition
4622onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
4623 OnigOptionType option)
4624{
4625 ptrdiff_t r;
4626 UChar *prev;
4627 OnigMatchArg msa;
4628
4629 MATCH_ARG_INIT(msa, option, region, at, at);
4630#ifdef USE_COMBINATION_EXPLOSION_CHECK
4631 {
4632 ptrdiff_t offset = at - str;
4633 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4634 }
4635#endif
4636
4637 if (region) {
4638 r = onig_region_resize_clear(region, reg->num_mem + 1);
4639 }
4640 else
4641 r = 0;
4642
4643 if (r == 0) {
4644 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
4645 r = match_at(reg, str, end,
4646#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4647 end,
4648#endif
4649 at, prev, &msa);
4650 }
4651
4652 MATCH_ARG_FREE(msa);
4653 return r;
4654}
4655
4656static int
4657forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
4658 UChar* range, UChar** low, UChar** high, UChar** low_prev)
4659{
4660 UChar *p, *pprev = (UChar* )NULL;
4661
4662#ifdef ONIG_DEBUG_SEARCH
4663 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
4664 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
4665#endif
4666
4667 p = s;
4668 if (reg->dmin > 0) {
4669 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
4670 p += reg->dmin;
4671 }
4672 else {
4673 UChar *q = p + reg->dmin;
4674
4675 if (q >= end) return 0; /* fail */
4676 while (p < q) p += enclen(reg->enc, p, end);
4677 }
4678 }
4679
4680 retry:
4681 switch (reg->optimize) {
4682 case ONIG_OPTIMIZE_EXACT:
4683 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
4684 break;
4685 case ONIG_OPTIMIZE_EXACT_IC:
4686 p = slow_search_ic(reg->enc, reg->case_fold_flag,
4687 reg->exact, reg->exact_end, p, end, range);
4688 break;
4689
4690 case ONIG_OPTIMIZE_EXACT_BM:
4691 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
4692 break;
4693
4694 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4695 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
4696 break;
4697
4698 case ONIG_OPTIMIZE_EXACT_BM_IC:
4699 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
4700 break;
4701
4702 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4703 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
4704 break;
4705
4706 case ONIG_OPTIMIZE_MAP:
4707 p = map_search(reg->enc, reg->map, p, range, end);
4708 break;
4709 }
4710
4711 if (p && p < range) {
4712 if (p - reg->dmin < s) {
4713 retry_gate:
4714 pprev = p;
4715 p += enclen(reg->enc, p, end);
4716 goto retry;
4717 }
4718
4719 if (reg->sub_anchor) {
4720 UChar* prev;
4721
4722 switch (reg->sub_anchor) {
4723 case ANCHOR_BEGIN_LINE:
4724 if (!ON_STR_BEGIN(p)) {
4725 prev = onigenc_get_prev_char_head(reg->enc,
4726 (pprev ? pprev : str), p, end);
4727 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
4728 goto retry_gate;
4729 }
4730 break;
4731
4732 case ANCHOR_END_LINE:
4733 if (ON_STR_END(p)) {
4734#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4735 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
4736 (pprev ? pprev : str), p);
4737 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
4738 goto retry_gate;
4739#endif
4740 }
4741 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
4742 goto retry_gate;
4743 break;
4744 }
4745 }
4746
4747 if (reg->dmax == 0) {
4748 *low = p;
4749 if (low_prev) {
4750 if (*low > s)
4751 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
4752 else
4753 *low_prev = onigenc_get_prev_char_head(reg->enc,
4754 (pprev ? pprev : str), p, end);
4755 }
4756 }
4757 else {
4758 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4759 if (p < str + reg->dmax) {
4760 *low = (UChar* )str;
4761 if (low_prev)
4762 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4763 }
4764 else {
4765 *low = p - reg->dmax;
4766 if (*low > s) {
4767 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4768 *low, end, (const UChar** )low_prev);
4769 if (low_prev && IS_NULL(*low_prev))
4770 *low_prev = onigenc_get_prev_char_head(reg->enc,
4771 (pprev ? pprev : s), *low, end);
4772 }
4773 else {
4774 if (low_prev)
4775 *low_prev = onigenc_get_prev_char_head(reg->enc,
4776 (pprev ? pprev : str), *low, end);
4777 }
4778 }
4779 }
4780 }
4781 /* no needs to adjust *high, *high is used as range check only */
4782 *high = p - reg->dmin;
4783
4784#ifdef ONIG_DEBUG_SEARCH
4785 fprintf(stderr,
4786 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4787 *low - str, *high - str, reg->dmin, reg->dmax);
4788#endif
4789 return 1; /* success */
4790 }
4791
4792 return 0; /* fail */
4793}
4794
4795#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4796
4797static int
4798backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4799 UChar* s, const UChar* range, UChar* adjrange,
4800 UChar** low, UChar** high)
4801{
4802 UChar *p;
4803
4804 range += reg->dmin;
4805 p = s;
4806
4807 retry:
4808 switch (reg->optimize) {
4809 case ONIG_OPTIMIZE_EXACT:
4810 exact_method:
4811 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4812 range, adjrange, end, p);
4813 break;
4814
4815 case ONIG_OPTIMIZE_EXACT_IC:
4816 case ONIG_OPTIMIZE_EXACT_BM_IC:
4817 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4818 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4819 reg->exact, reg->exact_end,
4820 range, adjrange, end, p);
4821 break;
4822
4823 case ONIG_OPTIMIZE_EXACT_BM:
4824 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4825#ifdef USE_INT_MAP_BACKWARD
4826 if (IS_NULL(reg->int_map_backward)) {
4827 int r;
4828 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4829 goto exact_method;
4830
4831 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4832 &(reg->int_map_backward));
4833 if (r) return r;
4834 }
4835 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4836 end, p);
4837#else
4838 goto exact_method;
4839#endif
4840 break;
4841
4842 case ONIG_OPTIMIZE_MAP:
4843 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4844 break;
4845 }
4846
4847 if (p) {
4848 if (reg->sub_anchor) {
4849 UChar* prev;
4850
4851 switch (reg->sub_anchor) {
4852 case ANCHOR_BEGIN_LINE:
4853 if (!ON_STR_BEGIN(p)) {
4854 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4855 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4856 p = prev;
4857 goto retry;
4858 }
4859 }
4860 break;
4861
4862 case ANCHOR_END_LINE:
4863 if (ON_STR_END(p)) {
4864#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4865 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4866 if (IS_NULL(prev)) goto fail;
4867 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4868 p = prev;
4869 goto retry;
4870 }
4871#endif
4872 }
4873 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4874 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4875 if (IS_NULL(p)) goto fail;
4876 goto retry;
4877 }
4878 break;
4879 }
4880 }
4881
4882 /* no needs to adjust *high, *high is used as range check only */
4883 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4884 *low = p - reg->dmax;
4885 *high = p - reg->dmin;
4886 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4887 }
4888
4889#ifdef ONIG_DEBUG_SEARCH
4890 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4891 (int )(*low - str), (int )(*high - str));
4892#endif
4893 return 1; /* success */
4894 }
4895
4896 fail:
4897#ifdef ONIG_DEBUG_SEARCH
4898 fprintf(stderr, "backward_search_range: fail.\n");
4899#endif
4900 return 0; /* fail */
4901}
4902
4903
4904extern OnigPosition
4905onig_search(regex_t* reg, const UChar* str, const UChar* end,
4906 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4907{
4908 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4909}
4910
4911extern OnigPosition
4912onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4913 const UChar* global_pos,
4914 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4915{
4916 ptrdiff_t r;
4917 UChar *s, *prev;
4918 OnigMatchArg msa;
4919#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4920 const UChar *orig_start = start;
4921 const UChar *orig_range = range;
4922#endif
4923
4924#ifdef ONIG_DEBUG_SEARCH
4925 fprintf(stderr,
4926 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
4927 (uintptr_t )str, str, end - str, start - str, range - str);
4928#endif
4929
4930 if (region) {
4931 r = onig_region_resize_clear(region, reg->num_mem + 1);
4932 if (r) goto finish_no_msa;
4933 }
4934
4935 if (start > end || start < str) goto mismatch_no_msa;
4936
4937
4938#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4939# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4940# define MATCH_AND_RETURN_CHECK(upper_range) \
4941 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4942 if (r != ONIG_MISMATCH) {\
4943 if (r >= 0) {\
4944 if (! IS_FIND_LONGEST(reg->options)) {\
4945 goto match;\
4946 }\
4947 }\
4948 else goto finish; /* error */ \
4949 }
4950# else
4951# define MATCH_AND_RETURN_CHECK(upper_range) \
4952 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4953 if (r != ONIG_MISMATCH) {\
4954 if (r >= 0) {\
4955 goto match;\
4956 }\
4957 else goto finish; /* error */ \
4958 }
4959# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4960#else
4961# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4962# define MATCH_AND_RETURN_CHECK(none) \
4963 r = match_at(reg, str, end, s, prev, &msa);\
4964 if (r != ONIG_MISMATCH) {\
4965 if (r >= 0) {\
4966 if (! IS_FIND_LONGEST(reg->options)) {\
4967 goto match;\
4968 }\
4969 }\
4970 else goto finish; /* error */ \
4971 }
4972# else
4973# define MATCH_AND_RETURN_CHECK(none) \
4974 r = match_at(reg, str, end, s, prev, &msa);\
4975 if (r != ONIG_MISMATCH) {\
4976 if (r >= 0) {\
4977 goto match;\
4978 }\
4979 else goto finish; /* error */ \
4980 }
4981# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4982#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
4983
4984
4985 /* anchor optimize: resume search range */
4986 if (reg->anchor != 0 && str < end) {
4987 UChar *min_semi_end, *max_semi_end;
4988
4989 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4990 /* search start-position only */
4991 begin_position:
4992 if (range > start)
4993 {
4994 if (global_pos > start)
4995 {
4996 if (global_pos < range)
4997 range = global_pos + 1;
4998 }
4999 else
5000 range = start + 1;
5001 }
5002 else
5003 range = start;
5004 }
5005 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
5006 /* search str-position only */
5007 if (range > start) {
5008 if (start != str) goto mismatch_no_msa;
5009 range = str + 1;
5010 }
5011 else {
5012 if (range <= str) {
5013 start = str;
5014 range = str;
5015 }
5016 else
5017 goto mismatch_no_msa;
5018 }
5019 }
5020 else if (reg->anchor & ANCHOR_END_BUF) {
5021 min_semi_end = max_semi_end = (UChar* )end;
5022
5023 end_buf:
5024 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
5025 goto mismatch_no_msa;
5026
5027 if (range > start) {
5028 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
5029 start = min_semi_end - reg->anchor_dmax;
5030 if (start < end)
5031 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
5032 }
5033 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
5034 range = max_semi_end - reg->anchor_dmin + 1;
5035 }
5036
5037 if (start > range) goto mismatch_no_msa;
5038 /* If start == range, match with empty at end.
5039 Backward search is used. */
5040 }
5041 else {
5042 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
5043 range = min_semi_end - reg->anchor_dmax;
5044 }
5045 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
5046 start = max_semi_end - reg->anchor_dmin;
5047 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
5048 }
5049 if (range > start) goto mismatch_no_msa;
5050 }
5051 }
5052 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
5053 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
5054
5055 max_semi_end = (UChar* )end;
5056 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
5057 min_semi_end = pre_end;
5058
5059#ifdef USE_CRNL_AS_LINE_TERMINATOR
5060 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
5061 if (IS_NOT_NULL(pre_end) &&
5062 IS_NEWLINE_CRLF(reg->options) &&
5063 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
5064 min_semi_end = pre_end;
5065 }
5066#endif
5067 if (min_semi_end > str && start <= min_semi_end) {
5068 goto end_buf;
5069 }
5070 }
5071 else {
5072 min_semi_end = (UChar* )end;
5073 goto end_buf;
5074 }
5075 }
5076 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
5077 goto begin_position;
5078 }
5079 }
5080 else if (str == end) { /* empty string */
5081 static const UChar address_for_empty_string[] = "";
5082
5083#ifdef ONIG_DEBUG_SEARCH
5084 fprintf(stderr, "onig_search: empty string.\n");
5085#endif
5086
5087 if (reg->threshold_len == 0) {
5088 start = end = str = address_for_empty_string;
5089 s = (UChar* )start;
5090 prev = (UChar* )NULL;
5091
5092 MATCH_ARG_INIT(msa, option, region, start, start);
5093#ifdef USE_COMBINATION_EXPLOSION_CHECK
5094 msa.state_check_buff = (void* )0;
5095 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
5096#endif
5097 MATCH_AND_RETURN_CHECK(end);
5098 goto mismatch;
5099 }
5100 goto mismatch_no_msa;
5101 }
5102
5103#ifdef ONIG_DEBUG_SEARCH
5104 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
5105 (int )(end - str), (int )(start - str), (int )(range - str));
5106#endif
5107
5108 MATCH_ARG_INIT(msa, option, region, start, global_pos);
5109#ifdef USE_COMBINATION_EXPLOSION_CHECK
5110 {
5111 ptrdiff_t offset = (MIN(start, range) - str);
5112 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
5113 }
5114#endif
5115
5116 s = (UChar* )start;
5117 if (range > start) { /* forward search */
5118 if (s > str)
5119 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5120 else
5121 prev = (UChar* )NULL;
5122
5123 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5124 UChar *sch_range, *low, *high, *low_prev;
5125
5126 sch_range = (UChar* )range;
5127 if (reg->dmax != 0) {
5128 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5129 sch_range = (UChar* )end;
5130 else {
5131 sch_range += reg->dmax;
5132 if (sch_range > end) sch_range = (UChar* )end;
5133 }
5134 }
5135
5136 if ((end - start) < reg->threshold_len)
5137 goto mismatch;
5138
5139 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
5140 do {
5141 if (! forward_search_range(reg, str, end, s, sch_range,
5142 &low, &high, &low_prev)) goto mismatch;
5143 if (s < low) {
5144 s = low;
5145 prev = low_prev;
5146 }
5147 while (s <= high) {
5148 MATCH_AND_RETURN_CHECK(orig_range);
5149 prev = s;
5150 s += enclen(reg->enc, s, end);
5151 }
5152 } while (s < range);
5153 goto mismatch;
5154 }
5155 else { /* check only. */
5156 if (! forward_search_range(reg, str, end, s, sch_range,
5157 &low, &high, (UChar** )NULL)) goto mismatch;
5158
5159 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
5160 do {
5161 MATCH_AND_RETURN_CHECK(orig_range);
5162 prev = s;
5163 s += enclen(reg->enc, s, end);
5164
5165 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
5166 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
5167 && s < range) {
5168 prev = s;
5169 s += enclen(reg->enc, s, end);
5170 }
5171 }
5172 } while (s < range);
5173 goto mismatch;
5174 }
5175 }
5176 }
5177
5178 do {
5179 MATCH_AND_RETURN_CHECK(orig_range);
5180 prev = s;
5181 s += enclen(reg->enc, s, end);
5182 } while (s < range);
5183
5184 if (s == range) { /* because empty match with /$/. */
5185 MATCH_AND_RETURN_CHECK(orig_range);
5186 }
5187 }
5188 else { /* backward search */
5189 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
5190 UChar *low, *high, *adjrange, *sch_start;
5191
5192 if (range < end)
5193 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
5194 else
5195 adjrange = (UChar* )end;
5196
5197 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
5198 (end - range) >= reg->threshold_len) {
5199 do {
5200 sch_start = s + reg->dmax;
5201 if (sch_start > end) sch_start = (UChar* )end;
5202 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5203 &low, &high) <= 0)
5204 goto mismatch;
5205
5206 if (s > high)
5207 s = high;
5208
5209 while (s >= low) {
5210 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5211 MATCH_AND_RETURN_CHECK(orig_start);
5212 s = prev;
5213 }
5214 } while (s >= range);
5215 goto mismatch;
5216 }
5217 else { /* check only. */
5218 if ((end - range) < reg->threshold_len) goto mismatch;
5219
5220 sch_start = s;
5221 if (reg->dmax != 0) {
5222 if (reg->dmax == ONIG_INFINITE_DISTANCE)
5223 sch_start = (UChar* )end;
5224 else {
5225 sch_start += reg->dmax;
5226 if (sch_start > end) sch_start = (UChar* )end;
5227 else
5228 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
5229 start, sch_start, end);
5230 }
5231 }
5232 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
5233 &low, &high) <= 0) goto mismatch;
5234 }
5235 }
5236
5237 do {
5238 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
5239 MATCH_AND_RETURN_CHECK(orig_start);
5240 s = prev;
5241 } while (s >= range);
5242 }
5243
5244 mismatch:
5245#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
5246 if (IS_FIND_LONGEST(reg->options)) {
5247 if (msa.best_len >= 0) {
5248 s = msa.best_s;
5249 goto match;
5250 }
5251 }
5252#endif
5253 r = ONIG_MISMATCH;
5254
5255 finish:
5256 MATCH_ARG_FREE(msa);
5257
5258 /* If result is mismatch and no FIND_NOT_EMPTY option,
5259 then the region is not set in match_at(). */
5260 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
5261 onig_region_clear(region);
5262 }
5263
5264#ifdef ONIG_DEBUG
5265 if (r != ONIG_MISMATCH)
5266 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5267#endif
5268 return r;
5269
5270 mismatch_no_msa:
5271 r = ONIG_MISMATCH;
5272 finish_no_msa:
5273#ifdef ONIG_DEBUG
5274 if (r != ONIG_MISMATCH)
5275 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
5276#endif
5277 return r;
5278
5279 match:
5280 MATCH_ARG_FREE(msa);
5281 return s - str;
5282}
5283
5284extern OnigPosition
5285onig_scan(regex_t* reg, const UChar* str, const UChar* end,
5286 OnigRegion* region, OnigOptionType option,
5287 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
5288 void* callback_arg)
5289{
5290 OnigPosition r;
5291 OnigPosition n;
5292 int rs;
5293 const UChar* start;
5294
5295 n = 0;
5296 start = str;
5297 while (1) {
5298 r = onig_search(reg, str, end, start, end, region, option);
5299 if (r >= 0) {
5300 rs = scan_callback(n, r, region, callback_arg);
5301 n++;
5302 if (rs != 0)
5303 return rs;
5304
5305 if (region->end[0] == start - str) {
5306 if (start >= end) break;
5307 start += enclen(reg->enc, start, end);
5308 }
5309 else
5310 start = str + region->end[0];
5311
5312 if (start > end)
5313 break;
5314 }
5315 else if (r == ONIG_MISMATCH) {
5316 break;
5317 }
5318 else { /* error */
5319 return r;
5320 }
5321 }
5322
5323 return n;
5324}
5325
5326extern OnigEncoding
5327onig_get_encoding(const regex_t* reg)
5328{
5329 return reg->enc;
5330}
5331
5332extern OnigOptionType
5333onig_get_options(const regex_t* reg)
5334{
5335 return reg->options;
5336}
5337
5338extern OnigCaseFoldType
5339onig_get_case_fold_flag(const regex_t* reg)
5340{
5341 return reg->case_fold_flag;
5342}
5343
5344extern const OnigSyntaxType*
5345onig_get_syntax(const regex_t* reg)
5346{
5347 return reg->syntax;
5348}
5349
5350extern int
5351onig_number_of_captures(const regex_t* reg)
5352{
5353 return reg->num_mem;
5354}
5355
5356extern int
5357onig_number_of_capture_histories(const regex_t* reg)
5358{
5359#ifdef USE_CAPTURE_HISTORY
5360 int i, n;
5361
5362 n = 0;
5363 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
5364 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
5365 n++;
5366 }
5367 return n;
5368#else
5369 return 0;
5370#endif
5371}
5372
5373extern void
5374onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
5375{
5376 *to = *from;
5377}
#define RB_GNUC_EXTENSION
This is expanded to nothing for non-GCC compilers.
Definition defines.h:89
#define xfree
Old name of ruby_xfree.
Definition xmalloc.h:58
#define xrealloc
Old name of ruby_xrealloc.
Definition xmalloc.h:56
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
Definition win32.h:698