Ruby 3.2.2p53 (2023-03-30 revision e51014f9c05aa65cbf203442d37fef7c12390015)
pack.c
1/**********************************************************************
2
3 pack.c -
4
5 $Author$
6 created at: Thu Feb 10 15:17:05 JST 1994
7
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "ruby/internal/config.h"
13
14#include <ctype.h>
15#include <errno.h>
16#include <float.h>
17#include <sys/types.h>
18
19#include "internal.h"
20#include "internal/array.h"
21#include "internal/bits.h"
22#include "internal/string.h"
23#include "internal/symbol.h"
24#include "internal/variable.h"
25#include "ruby/util.h"
26
27#include "builtin.h"
28
29/*
30 * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
31 * instead of HAVE_LONG_LONG or LONG_LONG.
32 * This means q! and Q! means always the standard long long type and
33 * causes ArgumentError for platforms which has no long long type,
34 * even if the platform has an implementation specific 64bit type.
35 * This behavior is consistent with the document of pack/unpack.
36 */
37#ifdef HAVE_TRUE_LONG_LONG
38static const char natstr[] = "sSiIlLqQjJ";
39#else
40static const char natstr[] = "sSiIlLjJ";
41#endif
42static const char endstr[] = "sSiIlLqQjJ";
43
44#ifdef HAVE_TRUE_LONG_LONG
45/* It is intentional to use long long instead of LONG_LONG. */
46# define NATINT_LEN_Q NATINT_LEN(long long, 8)
47#else
48# define NATINT_LEN_Q 8
49#endif
50
51#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
52# define NATINT_PACK
53#endif
54
55#ifdef DYNAMIC_ENDIAN
56/* for universal binary of NEXTSTEP and MacOS X */
57/* useless since autoconf 2.63? */
58static int
59is_bigendian(void)
60{
61 static int init = 0;
62 static int endian_value;
63 char *p;
64
65 if (init) return endian_value;
66 init = 1;
67 p = (char*)&init;
68 return endian_value = p[0]?0:1;
69}
70# define BIGENDIAN_P() (is_bigendian())
71#elif defined(WORDS_BIGENDIAN)
72# define BIGENDIAN_P() 1
73#else
74# define BIGENDIAN_P() 0
75#endif
76
77#ifdef NATINT_PACK
78# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
79#else
80# define NATINT_LEN(type,len) ((int)sizeof(type))
81#endif
82
83typedef union {
84 float f;
85 uint32_t u;
86 char buf[4];
88typedef union {
89 double d;
90 uint64_t u;
91 char buf[8];
93#define swapf(x) swap32(x)
94#define swapd(x) swap64(x)
95
96#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
97#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
98#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
99#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
100#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
101#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
102#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
103#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
104
105#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
106#define HTONF(x) ((x).u = rb_htonf((x).u))
107#define HTOVF(x) ((x).u = rb_htovf((x).u))
108#define NTOHF(x) ((x).u = rb_ntohf((x).u))
109#define VTOHF(x) ((x).u = rb_vtohf((x).u))
110
111#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
112#define HTOND(x) ((x).u = rb_htond((x).u))
113#define HTOVD(x) ((x).u = rb_htovd((x).u))
114#define NTOHD(x) ((x).u = rb_ntohd((x).u))
115#define VTOHD(x) ((x).u = rb_vtohd((x).u))
116
117#define MAX_INTEGER_PACK_SIZE 8
118
119static const char toofew[] = "too few arguments";
120
121static void encodes(VALUE,const char*,long,int,int);
122static void qpencode(VALUE,VALUE,long);
123
124static unsigned long utf8_to_uv(const char*,long*);
125
126static ID id_associated;
127
128static void
129str_associate(VALUE str, VALUE add)
130{
131 /* assert(NIL_P(rb_attr_get(str, id_associated))); */
132 rb_ivar_set(str, id_associated, add);
133}
134
135static VALUE
136str_associated(VALUE str)
137{
138 VALUE associates = rb_ivar_lookup(str, id_associated, Qfalse);
139 if (!associates)
140 rb_raise(rb_eArgError, "no associated pointer");
141 return associates;
142}
143
144static VALUE
145associated_pointer(VALUE associates, const char *t)
146{
147 const VALUE *p = RARRAY_CONST_PTR(associates);
148 const VALUE *pend = p + RARRAY_LEN(associates);
149 for (; p < pend; p++) {
150 VALUE tmp = *p;
151 if (RB_TYPE_P(tmp, T_STRING) && RSTRING_PTR(tmp) == t) return tmp;
152 }
153 rb_raise(rb_eArgError, "non associated pointer");
155}
156
157static void
158unknown_directive(const char *mode, char type, VALUE fmt)
159{
160 char unknown[5];
161
162 if (ISPRINT(type)) {
163 unknown[0] = type;
164 unknown[1] = '\0';
165 }
166 else {
167 snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
168 }
169 fmt = rb_str_quote_unprintable(fmt);
170 rb_warn("unknown %s directive '%s' in '%"PRIsVALUE"'",
171 mode, unknown, fmt);
172}
173
174static float
175VALUE_to_float(VALUE obj)
176{
177 VALUE v = rb_to_float(obj);
178 double d = RFLOAT_VALUE(v);
179
180 if (isnan(d)) {
181 return NAN;
182 }
183 else if (d < -FLT_MAX) {
184 return -INFINITY;
185 }
186 else if (d <= FLT_MAX) {
187 return d;
188 }
189 else {
190 return INFINITY;
191 }
192}
193
194static VALUE
195pack_pack(rb_execution_context_t *ec, VALUE ary, VALUE fmt, VALUE buffer)
196{
197 static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
198 static const char spc10[] = " ";
199 const char *p, *pend;
200 VALUE res, from, associates = 0;
201 char type;
202 long len, idx, plen;
203 const char *ptr;
204 int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
205#ifdef NATINT_PACK
206 int natint; /* native integer */
207#endif
208 int integer_size, bigendian_p;
209
210 StringValue(fmt);
212 p = RSTRING_PTR(fmt);
213 pend = p + RSTRING_LEN(fmt);
214
215 if (NIL_P(buffer)) {
216 res = rb_str_buf_new(0);
217 }
218 else {
219 if (!RB_TYPE_P(buffer, T_STRING))
220 rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
221 rb_str_modify(buffer);
222 res = buffer;
223 }
224
225 idx = 0;
226
227#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
228#define MORE_ITEM (idx < RARRAY_LEN(ary))
229#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
230#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
231
232 while (p < pend) {
233 int explicit_endian = 0;
234 if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
235 rb_raise(rb_eRuntimeError, "format string modified");
236 }
237 type = *p++; /* get data type */
238#ifdef NATINT_PACK
239 natint = 0;
240#endif
241
242 if (ISSPACE(type)) continue;
243 if (type == '#') {
244 while ((p < pend) && (*p != '\n')) {
245 p++;
246 }
247 continue;
248 }
249
250 {
251 modifiers:
252 switch (*p) {
253 case '_':
254 case '!':
255 if (strchr(natstr, type)) {
256#ifdef NATINT_PACK
257 natint = 1;
258#endif
259 p++;
260 }
261 else {
262 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
263 }
264 goto modifiers;
265
266 case '<':
267 case '>':
268 if (!strchr(endstr, type)) {
269 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
270 }
271 if (explicit_endian) {
272 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
273 }
274 explicit_endian = *p++;
275 goto modifiers;
276 }
277 }
278
279 if (*p == '*') { /* set data length */
280 len = strchr("@Xxu", type) ? 0
281 : strchr("PMm", type) ? 1
282 : RARRAY_LEN(ary) - idx;
283 p++;
284 }
285 else if (ISDIGIT(*p)) {
286 errno = 0;
287 len = STRTOUL(p, (char**)&p, 10);
288 if (errno) {
289 rb_raise(rb_eRangeError, "pack length too big");
290 }
291 }
292 else {
293 len = 1;
294 }
295
296 switch (type) {
297 case 'U':
298 /* if encoding is US-ASCII, upgrade to UTF-8 */
299 if (enc_info == 1) enc_info = 2;
300 break;
301 case 'm': case 'M': case 'u':
302 /* keep US-ASCII (do nothing) */
303 break;
304 default:
305 /* fall back to BINARY */
306 enc_info = 0;
307 break;
308 }
309 switch (type) {
310 case 'A': case 'a': case 'Z':
311 case 'B': case 'b':
312 case 'H': case 'h':
313 from = NEXTFROM;
314 if (NIL_P(from)) {
315 ptr = "";
316 plen = 0;
317 }
318 else {
319 StringValue(from);
320 ptr = RSTRING_PTR(from);
321 plen = RSTRING_LEN(from);
322 }
323
324 if (p[-1] == '*')
325 len = plen;
326
327 switch (type) {
328 case 'a': /* arbitrary binary string (null padded) */
329 case 'A': /* arbitrary binary string (ASCII space padded) */
330 case 'Z': /* null terminated string */
331 if (plen >= len) {
332 rb_str_buf_cat(res, ptr, len);
333 if (p[-1] == '*' && type == 'Z')
334 rb_str_buf_cat(res, nul10, 1);
335 }
336 else {
337 rb_str_buf_cat(res, ptr, plen);
338 len -= plen;
339 while (len >= 10) {
340 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
341 len -= 10;
342 }
343 rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
344 }
345 break;
346
347#define castchar(from) (char)((from) & 0xff)
348
349 case 'b': /* bit string (ascending) */
350 {
351 int byte = 0;
352 long i, j = 0;
353
354 if (len > plen) {
355 j = (len - plen + 1)/2;
356 len = plen;
357 }
358 for (i=0; i++ < len; ptr++) {
359 if (*ptr & 1)
360 byte |= 128;
361 if (i & 7)
362 byte >>= 1;
363 else {
364 char c = castchar(byte);
365 rb_str_buf_cat(res, &c, 1);
366 byte = 0;
367 }
368 }
369 if (len & 7) {
370 char c;
371 byte >>= 7 - (len & 7);
372 c = castchar(byte);
373 rb_str_buf_cat(res, &c, 1);
374 }
375 len = j;
376 goto grow;
377 }
378 break;
379
380 case 'B': /* bit string (descending) */
381 {
382 int byte = 0;
383 long i, j = 0;
384
385 if (len > plen) {
386 j = (len - plen + 1)/2;
387 len = plen;
388 }
389 for (i=0; i++ < len; ptr++) {
390 byte |= *ptr & 1;
391 if (i & 7)
392 byte <<= 1;
393 else {
394 char c = castchar(byte);
395 rb_str_buf_cat(res, &c, 1);
396 byte = 0;
397 }
398 }
399 if (len & 7) {
400 char c;
401 byte <<= 7 - (len & 7);
402 c = castchar(byte);
403 rb_str_buf_cat(res, &c, 1);
404 }
405 len = j;
406 goto grow;
407 }
408 break;
409
410 case 'h': /* hex string (low nibble first) */
411 {
412 int byte = 0;
413 long i, j = 0;
414
415 if (len > plen) {
416 j = (len + 1) / 2 - (plen + 1) / 2;
417 len = plen;
418 }
419 for (i=0; i++ < len; ptr++) {
420 if (ISALPHA(*ptr))
421 byte |= (((*ptr & 15) + 9) & 15) << 4;
422 else
423 byte |= (*ptr & 15) << 4;
424 if (i & 1)
425 byte >>= 4;
426 else {
427 char c = castchar(byte);
428 rb_str_buf_cat(res, &c, 1);
429 byte = 0;
430 }
431 }
432 if (len & 1) {
433 char c = castchar(byte);
434 rb_str_buf_cat(res, &c, 1);
435 }
436 len = j;
437 goto grow;
438 }
439 break;
440
441 case 'H': /* hex string (high nibble first) */
442 {
443 int byte = 0;
444 long i, j = 0;
445
446 if (len > plen) {
447 j = (len + 1) / 2 - (plen + 1) / 2;
448 len = plen;
449 }
450 for (i=0; i++ < len; ptr++) {
451 if (ISALPHA(*ptr))
452 byte |= ((*ptr & 15) + 9) & 15;
453 else
454 byte |= *ptr & 15;
455 if (i & 1)
456 byte <<= 4;
457 else {
458 char c = castchar(byte);
459 rb_str_buf_cat(res, &c, 1);
460 byte = 0;
461 }
462 }
463 if (len & 1) {
464 char c = castchar(byte);
465 rb_str_buf_cat(res, &c, 1);
466 }
467 len = j;
468 goto grow;
469 }
470 break;
471 }
472 break;
473
474 case 'c': /* signed char */
475 case 'C': /* unsigned char */
476 integer_size = 1;
477 bigendian_p = BIGENDIAN_P(); /* not effective */
478 goto pack_integer;
479
480 case 's': /* s for int16_t, s! for signed short */
481 case 'S': /* S for uint16_t, S! for unsigned short */
482 integer_size = NATINT_LEN(short, 2);
483 bigendian_p = BIGENDIAN_P();
484 goto pack_integer;
485
486 case 'i': /* i and i! for signed int */
487 case 'I': /* I and I! for unsigned int */
488 integer_size = (int)sizeof(int);
489 bigendian_p = BIGENDIAN_P();
490 goto pack_integer;
491
492 case 'l': /* l for int32_t, l! for signed long */
493 case 'L': /* L for uint32_t, L! for unsigned long */
494 integer_size = NATINT_LEN(long, 4);
495 bigendian_p = BIGENDIAN_P();
496 goto pack_integer;
497
498 case 'q': /* q for int64_t, q! for signed long long */
499 case 'Q': /* Q for uint64_t, Q! for unsigned long long */
500 integer_size = NATINT_LEN_Q;
501 bigendian_p = BIGENDIAN_P();
502 goto pack_integer;
503
504 case 'j': /* j for intptr_t */
505 integer_size = sizeof(intptr_t);
506 bigendian_p = BIGENDIAN_P();
507 goto pack_integer;
508
509 case 'J': /* J for uintptr_t */
510 integer_size = sizeof(uintptr_t);
511 bigendian_p = BIGENDIAN_P();
512 goto pack_integer;
513
514 case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
515 integer_size = 2;
516 bigendian_p = 1;
517 goto pack_integer;
518
519 case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
520 integer_size = 4;
521 bigendian_p = 1;
522 goto pack_integer;
523
524 case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
525 integer_size = 2;
526 bigendian_p = 0;
527 goto pack_integer;
528
529 case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
530 integer_size = 4;
531 bigendian_p = 0;
532 goto pack_integer;
533
534 pack_integer:
535 if (explicit_endian) {
536 bigendian_p = explicit_endian == '>';
537 }
538 if (integer_size > MAX_INTEGER_PACK_SIZE)
539 rb_bug("unexpected intger size for pack: %d", integer_size);
540 while (len-- > 0) {
541 char intbuf[MAX_INTEGER_PACK_SIZE];
542
543 from = NEXTFROM;
544 rb_integer_pack(from, intbuf, integer_size, 1, 0,
547 rb_str_buf_cat(res, intbuf, integer_size);
548 }
549 break;
550
551 case 'f': /* single precision float in native format */
552 case 'F': /* ditto */
553 while (len-- > 0) {
554 float f;
555
556 from = NEXTFROM;
557 f = VALUE_to_float(from);
558 rb_str_buf_cat(res, (char*)&f, sizeof(float));
559 }
560 break;
561
562 case 'e': /* single precision float in VAX byte-order */
563 while (len-- > 0) {
564 FLOAT_CONVWITH(tmp);
565
566 from = NEXTFROM;
567 tmp.f = VALUE_to_float(from);
568 HTOVF(tmp);
569 rb_str_buf_cat(res, tmp.buf, sizeof(float));
570 }
571 break;
572
573 case 'E': /* double precision float in VAX byte-order */
574 while (len-- > 0) {
575 DOUBLE_CONVWITH(tmp);
576 from = NEXTFROM;
577 tmp.d = RFLOAT_VALUE(rb_to_float(from));
578 HTOVD(tmp);
579 rb_str_buf_cat(res, tmp.buf, sizeof(double));
580 }
581 break;
582
583 case 'd': /* double precision float in native format */
584 case 'D': /* ditto */
585 while (len-- > 0) {
586 double d;
587
588 from = NEXTFROM;
589 d = RFLOAT_VALUE(rb_to_float(from));
590 rb_str_buf_cat(res, (char*)&d, sizeof(double));
591 }
592 break;
593
594 case 'g': /* single precision float in network byte-order */
595 while (len-- > 0) {
596 FLOAT_CONVWITH(tmp);
597 from = NEXTFROM;
598 tmp.f = VALUE_to_float(from);
599 HTONF(tmp);
600 rb_str_buf_cat(res, tmp.buf, sizeof(float));
601 }
602 break;
603
604 case 'G': /* double precision float in network byte-order */
605 while (len-- > 0) {
606 DOUBLE_CONVWITH(tmp);
607
608 from = NEXTFROM;
609 tmp.d = RFLOAT_VALUE(rb_to_float(from));
610 HTOND(tmp);
611 rb_str_buf_cat(res, tmp.buf, sizeof(double));
612 }
613 break;
614
615 case 'x': /* null byte */
616 grow:
617 while (len >= 10) {
618 rb_str_buf_cat(res, nul10, 10);
619 len -= 10;
620 }
621 rb_str_buf_cat(res, nul10, len);
622 break;
623
624 case 'X': /* back up byte */
625 shrink:
626 plen = RSTRING_LEN(res);
627 if (plen < len)
628 rb_raise(rb_eArgError, "X outside of string");
629 rb_str_set_len(res, plen - len);
630 break;
631
632 case '@': /* null fill to absolute position */
633 len -= RSTRING_LEN(res);
634 if (len > 0) goto grow;
635 len = -len;
636 if (len > 0) goto shrink;
637 break;
638
639 case '%':
640 rb_raise(rb_eArgError, "%% is not supported");
641 break;
642
643 case 'U': /* Unicode character */
644 while (len-- > 0) {
645 SIGNED_VALUE l;
646 char buf[8];
647 int le;
648
649 from = NEXTFROM;
650 from = rb_to_int(from);
651 l = NUM2LONG(from);
652 if (l < 0) {
653 rb_raise(rb_eRangeError, "pack(U): value out of range");
654 }
655 le = rb_uv_to_utf8(buf, l);
656 rb_str_buf_cat(res, (char*)buf, le);
657 }
658 break;
659
660 case 'u': /* uuencoded string */
661 case 'm': /* base64 encoded string */
662 from = NEXTFROM;
663 StringValue(from);
664 ptr = RSTRING_PTR(from);
665 plen = RSTRING_LEN(from);
666
667 if (len == 0 && type == 'm') {
668 encodes(res, ptr, plen, type, 0);
669 ptr += plen;
670 break;
671 }
672 if (len <= 2)
673 len = 45;
674 else if (len > 63 && type == 'u')
675 len = 63;
676 else
677 len = len / 3 * 3;
678 while (plen > 0) {
679 long todo;
680
681 if (plen > len)
682 todo = len;
683 else
684 todo = plen;
685 encodes(res, ptr, todo, type, 1);
686 plen -= todo;
687 ptr += todo;
688 }
689 break;
690
691 case 'M': /* quoted-printable encoded string */
692 from = rb_obj_as_string(NEXTFROM);
693 if (len <= 1)
694 len = 72;
695 qpencode(res, from, len);
696 break;
697
698 case 'P': /* pointer to packed byte string */
699 from = THISFROM;
700 if (!NIL_P(from)) {
701 StringValue(from);
702 if (RSTRING_LEN(from) < len) {
703 rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
704 RSTRING_LEN(from), len);
705 }
706 }
707 len = 1;
708 /* FALL THROUGH */
709 case 'p': /* pointer to string */
710 while (len-- > 0) {
711 char *t;
712 from = NEXTFROM;
713 if (NIL_P(from)) {
714 t = 0;
715 }
716 else {
717 t = StringValuePtr(from);
718 }
719 if (!associates) {
720 associates = rb_ary_new();
721 }
722 rb_ary_push(associates, from);
723 rb_str_buf_cat(res, (char*)&t, sizeof(char*));
724 }
725 break;
726
727 case 'w': /* BER compressed integer */
728 while (len-- > 0) {
729 VALUE buf = rb_str_new(0, 0);
730 size_t numbytes;
731 int sign;
732 char *cp;
733
734 from = NEXTFROM;
735 from = rb_to_int(from);
736 numbytes = rb_absint_numwords(from, 7, NULL);
737 if (numbytes == 0)
738 numbytes = 1;
739 buf = rb_str_new(NULL, numbytes);
740
741 sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
742
743 if (sign < 0)
744 rb_raise(rb_eArgError, "can't compress negative numbers");
745 if (sign == 2)
746 rb_bug("buffer size problem?");
747
748 cp = RSTRING_PTR(buf);
749 while (1 < numbytes) {
750 *cp |= 0x80;
751 cp++;
752 numbytes--;
753 }
754
755 rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
756 }
757 break;
758
759 default: {
760 unknown_directive("pack", type, fmt);
761 break;
762 }
763 }
764 }
765
766 if (associates) {
767 str_associate(res, associates);
768 }
769 switch (enc_info) {
770 case 1:
771 ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
772 break;
773 case 2:
774 rb_enc_set_index(res, rb_utf8_encindex());
775 break;
776 default:
777 /* do nothing, keep ASCII-8BIT */
778 break;
779 }
780 return res;
781}
782
783static const char uu_table[] =
784"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
785static const char b64_table[] =
786"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
787
788static void
789encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
790{
791 enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
792 char buff[buff_size + 1]; /* +1 for tail_lf */
793 long i = 0;
794 const char *const trans = type == 'u' ? uu_table : b64_table;
795 char padding;
796 const unsigned char *s = (const unsigned char *)s0;
797
798 if (type == 'u') {
799 buff[i++] = (char)len + ' ';
800 padding = '`';
801 }
802 else {
803 padding = '=';
804 }
805 while (len >= input_unit) {
806 while (len >= input_unit && buff_size-i >= encoded_unit) {
807 buff[i++] = trans[077 & (*s >> 2)];
808 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
809 buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
810 buff[i++] = trans[077 & s[2]];
811 s += input_unit;
812 len -= input_unit;
813 }
814 if (buff_size-i < encoded_unit) {
815 rb_str_buf_cat(str, buff, i);
816 i = 0;
817 }
818 }
819
820 if (len == 2) {
821 buff[i++] = trans[077 & (*s >> 2)];
822 buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
823 buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
824 buff[i++] = padding;
825 }
826 else if (len == 1) {
827 buff[i++] = trans[077 & (*s >> 2)];
828 buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
829 buff[i++] = padding;
830 buff[i++] = padding;
831 }
832 if (tail_lf) buff[i++] = '\n';
833 rb_str_buf_cat(str, buff, i);
834 if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
835}
836
837static const char hex_table[] = "0123456789ABCDEF";
838
839static void
840qpencode(VALUE str, VALUE from, long len)
841{
842 char buff[1024];
843 long i = 0, n = 0, prev = EOF;
844 unsigned char *s = (unsigned char*)RSTRING_PTR(from);
845 unsigned char *send = s + RSTRING_LEN(from);
846
847 while (s < send) {
848 if ((*s > 126) ||
849 (*s < 32 && *s != '\n' && *s != '\t') ||
850 (*s == '=')) {
851 buff[i++] = '=';
852 buff[i++] = hex_table[*s >> 4];
853 buff[i++] = hex_table[*s & 0x0f];
854 n += 3;
855 prev = EOF;
856 }
857 else if (*s == '\n') {
858 if (prev == ' ' || prev == '\t') {
859 buff[i++] = '=';
860 buff[i++] = *s;
861 }
862 buff[i++] = *s;
863 n = 0;
864 prev = *s;
865 }
866 else {
867 buff[i++] = *s;
868 n++;
869 prev = *s;
870 }
871 if (n > len) {
872 buff[i++] = '=';
873 buff[i++] = '\n';
874 n = 0;
875 prev = '\n';
876 }
877 if (i > 1024 - 5) {
878 rb_str_buf_cat(str, buff, i);
879 i = 0;
880 }
881 s++;
882 }
883 if (n > 0) {
884 buff[i++] = '=';
885 buff[i++] = '\n';
886 }
887 if (i > 0) {
888 rb_str_buf_cat(str, buff, i);
889 }
890}
891
892static inline int
893hex2num(char c)
894{
895 int n;
896 n = ruby_digit36_to_number_table[(unsigned char)c];
897 if (16 <= n)
898 n = -1;
899 return n;
900}
901
902#define PACK_LENGTH_ADJUST_SIZE(sz) do { \
903 tmp_len = 0; \
904 if (len > (long)((send-s)/(sz))) { \
905 if (!star) { \
906 tmp_len = len-(send-s)/(sz); \
907 } \
908 len = (send-s)/(sz); \
909 } \
910} while (0)
911
912#define PACK_ITEM_ADJUST() do { \
913 if (tmp_len > 0 && mode == UNPACK_ARRAY) \
914 rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
915} while (0)
916
917/* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
918 * 12.4/12.5/12.6 C compiler optimization bug
919 * with "-xO4" optimization option.
920 */
921#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
922# define AVOID_CC_BUG volatile
923#else
924# define AVOID_CC_BUG
925#endif
926
927enum unpack_mode {
928 UNPACK_ARRAY,
929 UNPACK_BLOCK,
930 UNPACK_1
931};
932
933static VALUE
934pack_unpack_internal(VALUE str, VALUE fmt, enum unpack_mode mode, long offset)
935{
936#define hexdigits ruby_hexdigits
937 char *s, *send;
938 char *p, *pend;
939 VALUE ary, associates = Qfalse;
940 char type;
941 long len;
942 AVOID_CC_BUG long tmp_len;
943 int star;
944#ifdef NATINT_PACK
945 int natint; /* native integer */
946#endif
947 int signed_p, integer_size, bigendian_p;
948#define UNPACK_PUSH(item) do {\
949 VALUE item_val = (item);\
950 if ((mode) == UNPACK_BLOCK) {\
951 rb_yield(item_val);\
952 }\
953 else if ((mode) == UNPACK_ARRAY) {\
954 rb_ary_push(ary, item_val);\
955 }\
956 else /* if ((mode) == UNPACK_1) { */ {\
957 return item_val; \
958 }\
959 } while (0)
960
961 StringValue(str);
962 StringValue(fmt);
964
965 if (offset < 0) rb_raise(rb_eArgError, "offset can't be negative");
966 len = RSTRING_LEN(str);
967 if (offset > len) rb_raise(rb_eArgError, "offset outside of string");
968
969 s = RSTRING_PTR(str);
970 send = s + len;
971 s += offset;
972
973 p = RSTRING_PTR(fmt);
974 pend = p + RSTRING_LEN(fmt);
975
976#define UNPACK_FETCH(var, type) (memcpy((var), s, sizeof(type)), s += sizeof(type))
977
978 ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
979 while (p < pend) {
980 int explicit_endian = 0;
981 type = *p++;
982#ifdef NATINT_PACK
983 natint = 0;
984#endif
985
986 if (ISSPACE(type)) continue;
987 if (type == '#') {
988 while ((p < pend) && (*p != '\n')) {
989 p++;
990 }
991 continue;
992 }
993
994 star = 0;
995 {
996 modifiers:
997 switch (*p) {
998 case '_':
999 case '!':
1000
1001 if (strchr(natstr, type)) {
1002#ifdef NATINT_PACK
1003 natint = 1;
1004#endif
1005 p++;
1006 }
1007 else {
1008 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1009 }
1010 goto modifiers;
1011
1012 case '<':
1013 case '>':
1014 if (!strchr(endstr, type)) {
1015 rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1016 }
1017 if (explicit_endian) {
1018 rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1019 }
1020 explicit_endian = *p++;
1021 goto modifiers;
1022 }
1023 }
1024
1025 if (p >= pend)
1026 len = 1;
1027 else if (*p == '*') {
1028 star = 1;
1029 len = send - s;
1030 p++;
1031 }
1032 else if (ISDIGIT(*p)) {
1033 errno = 0;
1034 len = STRTOUL(p, (char**)&p, 10);
1035 if (len < 0 || errno) {
1036 rb_raise(rb_eRangeError, "pack length too big");
1037 }
1038 }
1039 else {
1040 len = (type != '@');
1041 }
1042
1043 switch (type) {
1044 case '%':
1045 rb_raise(rb_eArgError, "%% is not supported");
1046 break;
1047
1048 case 'A':
1049 if (len > send - s) len = send - s;
1050 {
1051 long end = len;
1052 char *t = s + len - 1;
1053
1054 while (t >= s) {
1055 if (*t != ' ' && *t != '\0') break;
1056 t--; len--;
1057 }
1058 UNPACK_PUSH(rb_str_new(s, len));
1059 s += end;
1060 }
1061 break;
1062
1063 case 'Z':
1064 {
1065 char *t = s;
1066
1067 if (len > send-s) len = send-s;
1068 while (t < s+len && *t) t++;
1069 UNPACK_PUSH(rb_str_new(s, t-s));
1070 if (t < send) t++;
1071 s = star ? t : s+len;
1072 }
1073 break;
1074
1075 case 'a':
1076 if (len > send - s) len = send - s;
1077 UNPACK_PUSH(rb_str_new(s, len));
1078 s += len;
1079 break;
1080
1081 case 'b':
1082 {
1083 VALUE bitstr;
1084 char *t;
1085 int bits;
1086 long i;
1087
1088 if (p[-1] == '*' || len > (send - s) * 8)
1089 len = (send - s) * 8;
1090 bits = 0;
1091 bitstr = rb_usascii_str_new(0, len);
1092 t = RSTRING_PTR(bitstr);
1093 for (i=0; i<len; i++) {
1094 if (i & 7) bits >>= 1;
1095 else bits = (unsigned char)*s++;
1096 *t++ = (bits & 1) ? '1' : '0';
1097 }
1098 UNPACK_PUSH(bitstr);
1099 }
1100 break;
1101
1102 case 'B':
1103 {
1104 VALUE bitstr;
1105 char *t;
1106 int bits;
1107 long i;
1108
1109 if (p[-1] == '*' || len > (send - s) * 8)
1110 len = (send - s) * 8;
1111 bits = 0;
1112 bitstr = rb_usascii_str_new(0, len);
1113 t = RSTRING_PTR(bitstr);
1114 for (i=0; i<len; i++) {
1115 if (i & 7) bits <<= 1;
1116 else bits = (unsigned char)*s++;
1117 *t++ = (bits & 128) ? '1' : '0';
1118 }
1119 UNPACK_PUSH(bitstr);
1120 }
1121 break;
1122
1123 case 'h':
1124 {
1125 VALUE bitstr;
1126 char *t;
1127 int bits;
1128 long i;
1129
1130 if (p[-1] == '*' || len > (send - s) * 2)
1131 len = (send - s) * 2;
1132 bits = 0;
1133 bitstr = rb_usascii_str_new(0, len);
1134 t = RSTRING_PTR(bitstr);
1135 for (i=0; i<len; i++) {
1136 if (i & 1)
1137 bits >>= 4;
1138 else
1139 bits = (unsigned char)*s++;
1140 *t++ = hexdigits[bits & 15];
1141 }
1142 UNPACK_PUSH(bitstr);
1143 }
1144 break;
1145
1146 case 'H':
1147 {
1148 VALUE bitstr;
1149 char *t;
1150 int bits;
1151 long i;
1152
1153 if (p[-1] == '*' || len > (send - s) * 2)
1154 len = (send - s) * 2;
1155 bits = 0;
1156 bitstr = rb_usascii_str_new(0, len);
1157 t = RSTRING_PTR(bitstr);
1158 for (i=0; i<len; i++) {
1159 if (i & 1)
1160 bits <<= 4;
1161 else
1162 bits = (unsigned char)*s++;
1163 *t++ = hexdigits[(bits >> 4) & 15];
1164 }
1165 UNPACK_PUSH(bitstr);
1166 }
1167 break;
1168
1169 case 'c':
1170 signed_p = 1;
1171 integer_size = 1;
1172 bigendian_p = BIGENDIAN_P(); /* not effective */
1173 goto unpack_integer;
1174
1175 case 'C':
1176 signed_p = 0;
1177 integer_size = 1;
1178 bigendian_p = BIGENDIAN_P(); /* not effective */
1179 goto unpack_integer;
1180
1181 case 's':
1182 signed_p = 1;
1183 integer_size = NATINT_LEN(short, 2);
1184 bigendian_p = BIGENDIAN_P();
1185 goto unpack_integer;
1186
1187 case 'S':
1188 signed_p = 0;
1189 integer_size = NATINT_LEN(short, 2);
1190 bigendian_p = BIGENDIAN_P();
1191 goto unpack_integer;
1192
1193 case 'i':
1194 signed_p = 1;
1195 integer_size = (int)sizeof(int);
1196 bigendian_p = BIGENDIAN_P();
1197 goto unpack_integer;
1198
1199 case 'I':
1200 signed_p = 0;
1201 integer_size = (int)sizeof(int);
1202 bigendian_p = BIGENDIAN_P();
1203 goto unpack_integer;
1204
1205 case 'l':
1206 signed_p = 1;
1207 integer_size = NATINT_LEN(long, 4);
1208 bigendian_p = BIGENDIAN_P();
1209 goto unpack_integer;
1210
1211 case 'L':
1212 signed_p = 0;
1213 integer_size = NATINT_LEN(long, 4);
1214 bigendian_p = BIGENDIAN_P();
1215 goto unpack_integer;
1216
1217 case 'q':
1218 signed_p = 1;
1219 integer_size = NATINT_LEN_Q;
1220 bigendian_p = BIGENDIAN_P();
1221 goto unpack_integer;
1222
1223 case 'Q':
1224 signed_p = 0;
1225 integer_size = NATINT_LEN_Q;
1226 bigendian_p = BIGENDIAN_P();
1227 goto unpack_integer;
1228
1229 case 'j':
1230 signed_p = 1;
1231 integer_size = sizeof(intptr_t);
1232 bigendian_p = BIGENDIAN_P();
1233 goto unpack_integer;
1234
1235 case 'J':
1236 signed_p = 0;
1237 integer_size = sizeof(uintptr_t);
1238 bigendian_p = BIGENDIAN_P();
1239 goto unpack_integer;
1240
1241 case 'n':
1242 signed_p = 0;
1243 integer_size = 2;
1244 bigendian_p = 1;
1245 goto unpack_integer;
1246
1247 case 'N':
1248 signed_p = 0;
1249 integer_size = 4;
1250 bigendian_p = 1;
1251 goto unpack_integer;
1252
1253 case 'v':
1254 signed_p = 0;
1255 integer_size = 2;
1256 bigendian_p = 0;
1257 goto unpack_integer;
1258
1259 case 'V':
1260 signed_p = 0;
1261 integer_size = 4;
1262 bigendian_p = 0;
1263 goto unpack_integer;
1264
1265 unpack_integer:
1266 if (explicit_endian) {
1267 bigendian_p = explicit_endian == '>';
1268 }
1269 PACK_LENGTH_ADJUST_SIZE(integer_size);
1270 while (len-- > 0) {
1271 int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1272 VALUE val;
1273 if (signed_p)
1274 flags |= INTEGER_PACK_2COMP;
1275 val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1276 UNPACK_PUSH(val);
1277 s += integer_size;
1278 }
1279 PACK_ITEM_ADJUST();
1280 break;
1281
1282 case 'f':
1283 case 'F':
1284 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1285 while (len-- > 0) {
1286 float tmp;
1287 UNPACK_FETCH(&tmp, float);
1288 UNPACK_PUSH(DBL2NUM((double)tmp));
1289 }
1290 PACK_ITEM_ADJUST();
1291 break;
1292
1293 case 'e':
1294 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1295 while (len-- > 0) {
1296 FLOAT_CONVWITH(tmp);
1297 UNPACK_FETCH(tmp.buf, float);
1298 VTOHF(tmp);
1299 UNPACK_PUSH(DBL2NUM(tmp.f));
1300 }
1301 PACK_ITEM_ADJUST();
1302 break;
1303
1304 case 'E':
1305 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1306 while (len-- > 0) {
1307 DOUBLE_CONVWITH(tmp);
1308 UNPACK_FETCH(tmp.buf, double);
1309 VTOHD(tmp);
1310 UNPACK_PUSH(DBL2NUM(tmp.d));
1311 }
1312 PACK_ITEM_ADJUST();
1313 break;
1314
1315 case 'D':
1316 case 'd':
1317 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1318 while (len-- > 0) {
1319 double tmp;
1320 UNPACK_FETCH(&tmp, double);
1321 UNPACK_PUSH(DBL2NUM(tmp));
1322 }
1323 PACK_ITEM_ADJUST();
1324 break;
1325
1326 case 'g':
1327 PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1328 while (len-- > 0) {
1329 FLOAT_CONVWITH(tmp);
1330 UNPACK_FETCH(tmp.buf, float);
1331 NTOHF(tmp);
1332 UNPACK_PUSH(DBL2NUM(tmp.f));
1333 }
1334 PACK_ITEM_ADJUST();
1335 break;
1336
1337 case 'G':
1338 PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1339 while (len-- > 0) {
1340 DOUBLE_CONVWITH(tmp);
1341 UNPACK_FETCH(tmp.buf, double);
1342 NTOHD(tmp);
1343 UNPACK_PUSH(DBL2NUM(tmp.d));
1344 }
1345 PACK_ITEM_ADJUST();
1346 break;
1347
1348 case 'U':
1349 if (len > send - s) len = send - s;
1350 while (len > 0 && s < send) {
1351 long alen = send - s;
1352 unsigned long l;
1353
1354 l = utf8_to_uv(s, &alen);
1355 s += alen; len--;
1356 UNPACK_PUSH(ULONG2NUM(l));
1357 }
1358 break;
1359
1360 case 'u':
1361 {
1362 VALUE buf = rb_str_new(0, (send - s)*3/4);
1363 char *ptr = RSTRING_PTR(buf);
1364 long total = 0;
1365
1366 while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1367 long a,b,c,d;
1368 char hunk[3];
1369
1370 len = ((unsigned char)*s++ - ' ') & 077;
1371
1372 total += len;
1373 if (total > RSTRING_LEN(buf)) {
1374 len -= total - RSTRING_LEN(buf);
1375 total = RSTRING_LEN(buf);
1376 }
1377
1378 while (len > 0) {
1379 long mlen = len > 3 ? 3 : len;
1380
1381 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1382 a = ((unsigned char)*s++ - ' ') & 077;
1383 else
1384 a = 0;
1385 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1386 b = ((unsigned char)*s++ - ' ') & 077;
1387 else
1388 b = 0;
1389 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1390 c = ((unsigned char)*s++ - ' ') & 077;
1391 else
1392 c = 0;
1393 if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1394 d = ((unsigned char)*s++ - ' ') & 077;
1395 else
1396 d = 0;
1397 hunk[0] = (char)(a << 2 | b >> 4);
1398 hunk[1] = (char)(b << 4 | c >> 2);
1399 hunk[2] = (char)(c << 6 | d);
1400 memcpy(ptr, hunk, mlen);
1401 ptr += mlen;
1402 len -= mlen;
1403 }
1404 if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1405 s++; /* possible checksum byte */
1406 if (s < send && *s == '\r') s++;
1407 if (s < send && *s == '\n') s++;
1408 }
1409
1410 rb_str_set_len(buf, total);
1411 UNPACK_PUSH(buf);
1412 }
1413 break;
1414
1415 case 'm':
1416 {
1417 VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
1418 char *ptr = RSTRING_PTR(buf);
1419 int a = -1,b = -1,c = 0,d = 0;
1420 static signed char b64_xtable[256];
1421
1422 if (b64_xtable['/'] <= 0) {
1423 int i;
1424
1425 for (i = 0; i < 256; i++) {
1426 b64_xtable[i] = -1;
1427 }
1428 for (i = 0; i < 64; i++) {
1429 b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1430 }
1431 }
1432 if (len == 0) {
1433 while (s < send) {
1434 a = b = c = d = -1;
1435 a = b64_xtable[(unsigned char)*s++];
1436 if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1437 b = b64_xtable[(unsigned char)*s++];
1438 if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1439 if (*s == '=') {
1440 if (s + 2 == send && *(s + 1) == '=') break;
1441 rb_raise(rb_eArgError, "invalid base64");
1442 }
1443 c = b64_xtable[(unsigned char)*s++];
1444 if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1445 if (s + 1 == send && *s == '=') break;
1446 d = b64_xtable[(unsigned char)*s++];
1447 if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1448 *ptr++ = castchar(a << 2 | b >> 4);
1449 *ptr++ = castchar(b << 4 | c >> 2);
1450 *ptr++ = castchar(c << 6 | d);
1451 }
1452 if (c == -1) {
1453 *ptr++ = castchar(a << 2 | b >> 4);
1454 if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1455 }
1456 else if (d == -1) {
1457 *ptr++ = castchar(a << 2 | b >> 4);
1458 *ptr++ = castchar(b << 4 | c >> 2);
1459 if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1460 }
1461 }
1462 else {
1463 while (s < send) {
1464 a = b = c = d = -1;
1465 while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1466 if (s >= send) break;
1467 s++;
1468 while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1469 if (s >= send) break;
1470 s++;
1471 while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1472 if (*s == '=' || s >= send) break;
1473 s++;
1474 while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1475 if (*s == '=' || s >= send) break;
1476 s++;
1477 *ptr++ = castchar(a << 2 | b >> 4);
1478 *ptr++ = castchar(b << 4 | c >> 2);
1479 *ptr++ = castchar(c << 6 | d);
1480 a = -1;
1481 }
1482 if (a != -1 && b != -1) {
1483 if (c == -1)
1484 *ptr++ = castchar(a << 2 | b >> 4);
1485 else {
1486 *ptr++ = castchar(a << 2 | b >> 4);
1487 *ptr++ = castchar(b << 4 | c >> 2);
1488 }
1489 }
1490 }
1491 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1492 UNPACK_PUSH(buf);
1493 }
1494 break;
1495
1496 case 'M':
1497 {
1498 VALUE buf = rb_str_new(0, send - s);
1499 char *ptr = RSTRING_PTR(buf), *ss = s;
1500 int csum = 0;
1501 int c1, c2;
1502
1503 while (s < send) {
1504 if (*s == '=') {
1505 if (++s == send) break;
1506 if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1507 s++;
1508 if (*s != '\n') {
1509 if ((c1 = hex2num(*s)) == -1) break;
1510 if (++s == send) break;
1511 if ((c2 = hex2num(*s)) == -1) break;
1512 csum |= *ptr++ = castchar(c1 << 4 | c2);
1513 }
1514 }
1515 else {
1516 csum |= *ptr++ = *s;
1517 }
1518 s++;
1519 ss = s;
1520 }
1521 rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1522 rb_str_buf_cat(buf, ss, send-ss);
1524 ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
1525 UNPACK_PUSH(buf);
1526 }
1527 break;
1528
1529 case '@':
1530 if (len > RSTRING_LEN(str))
1531 rb_raise(rb_eArgError, "@ outside of string");
1532 s = RSTRING_PTR(str) + len;
1533 break;
1534
1535 case 'X':
1536 if (len > s - RSTRING_PTR(str))
1537 rb_raise(rb_eArgError, "X outside of string");
1538 s -= len;
1539 break;
1540
1541 case 'x':
1542 if (len > send - s)
1543 rb_raise(rb_eArgError, "x outside of string");
1544 s += len;
1545 break;
1546
1547 case 'P':
1548 if (sizeof(char *) <= (size_t)(send - s)) {
1549 VALUE tmp = Qnil;
1550 char *t;
1551
1552 UNPACK_FETCH(&t, char *);
1553 if (t) {
1554 if (!associates) associates = str_associated(str);
1555 tmp = associated_pointer(associates, t);
1556 if (len < RSTRING_LEN(tmp)) {
1557 tmp = rb_str_new(t, len);
1558 str_associate(tmp, associates);
1559 }
1560 }
1561 UNPACK_PUSH(tmp);
1562 }
1563 break;
1564
1565 case 'p':
1566 if (len > (long)((send - s) / sizeof(char *)))
1567 len = (send - s) / sizeof(char *);
1568 while (len-- > 0) {
1569 if ((size_t)(send - s) < sizeof(char *))
1570 break;
1571 else {
1572 VALUE tmp = Qnil;
1573 char *t;
1574
1575 UNPACK_FETCH(&t, char *);
1576 if (t) {
1577 if (!associates) associates = str_associated(str);
1578 tmp = associated_pointer(associates, t);
1579 }
1580 UNPACK_PUSH(tmp);
1581 }
1582 }
1583 break;
1584
1585 case 'w':
1586 {
1587 char *s0 = s;
1588 while (len > 0 && s < send) {
1589 if (*s & 0x80) {
1590 s++;
1591 }
1592 else {
1593 s++;
1594 UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1595 len--;
1596 s0 = s;
1597 }
1598 }
1599 }
1600 break;
1601
1602 default:
1603 unknown_directive("unpack", type, fmt);
1604 break;
1605 }
1606 }
1607
1608 return ary;
1609}
1610
1611static VALUE
1612pack_unpack(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1613{
1614 enum unpack_mode mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1615 return pack_unpack_internal(str, fmt, mode, RB_NUM2LONG(offset));
1616}
1617
1618static VALUE
1619pack_unpack1(rb_execution_context_t *ec, VALUE str, VALUE fmt, VALUE offset)
1620{
1621 return pack_unpack_internal(str, fmt, UNPACK_1, RB_NUM2LONG(offset));
1622}
1623
1624int
1625rb_uv_to_utf8(char buf[6], unsigned long uv)
1626{
1627 if (uv <= 0x7f) {
1628 buf[0] = (char)uv;
1629 return 1;
1630 }
1631 if (uv <= 0x7ff) {
1632 buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1633 buf[1] = castchar((uv&0x3f)|0x80);
1634 return 2;
1635 }
1636 if (uv <= 0xffff) {
1637 buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1638 buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1639 buf[2] = castchar((uv&0x3f)|0x80);
1640 return 3;
1641 }
1642 if (uv <= 0x1fffff) {
1643 buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1644 buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1645 buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1646 buf[3] = castchar((uv&0x3f)|0x80);
1647 return 4;
1648 }
1649 if (uv <= 0x3ffffff) {
1650 buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1651 buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1652 buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1653 buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1654 buf[4] = castchar((uv&0x3f)|0x80);
1655 return 5;
1656 }
1657 if (uv <= 0x7fffffff) {
1658 buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1659 buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1660 buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1661 buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1662 buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1663 buf[5] = castchar((uv&0x3f)|0x80);
1664 return 6;
1665 }
1666 rb_raise(rb_eRangeError, "pack(U): value out of range");
1667
1669}
1670
1671static const unsigned long utf8_limits[] = {
1672 0x0, /* 1 */
1673 0x80, /* 2 */
1674 0x800, /* 3 */
1675 0x10000, /* 4 */
1676 0x200000, /* 5 */
1677 0x4000000, /* 6 */
1678 0x80000000, /* 7 */
1679};
1680
1681static unsigned long
1682utf8_to_uv(const char *p, long *lenp)
1683{
1684 int c = *p++ & 0xff;
1685 unsigned long uv = c;
1686 long n;
1687
1688 if (!(uv & 0x80)) {
1689 *lenp = 1;
1690 return uv;
1691 }
1692 if (!(uv & 0x40)) {
1693 *lenp = 1;
1694 rb_raise(rb_eArgError, "malformed UTF-8 character");
1695 }
1696
1697 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1698 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1699 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1700 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1701 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1702 else {
1703 *lenp = 1;
1704 rb_raise(rb_eArgError, "malformed UTF-8 character");
1705 }
1706 if (n > *lenp) {
1707 rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1708 n, *lenp);
1709 }
1710 *lenp = n--;
1711 if (n != 0) {
1712 while (n--) {
1713 c = *p++ & 0xff;
1714 if ((c & 0xc0) != 0x80) {
1715 *lenp -= n + 1;
1716 rb_raise(rb_eArgError, "malformed UTF-8 character");
1717 }
1718 else {
1719 c &= 0x3f;
1720 uv = uv << 6 | c;
1721 }
1722 }
1723 }
1724 n = *lenp - 1;
1725 if (uv < utf8_limits[n]) {
1726 rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1727 }
1728 return uv;
1729}
1730
1731#include "pack.rbinc"
1732
1733void
1734Init_pack(void)
1735{
1736 id_associated = rb_make_internal_id();
1737}
int rb_block_given_p(void)
Determines if the current method is given a block.
Definition eval.c:864
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
Definition coderange.h:181
#define ISSPACE
Old name of rb_isspace.
Definition ctype.h:88
#define RFLOAT_VALUE
Old name of rb_float_value.
Definition double.h:28
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition long.h:60
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
Definition assume.h:29
#define STRTOUL
Old name of ruby_strtoul.
Definition ctype.h:104
#define ISDIGIT
Old name of rb_isdigit.
Definition ctype.h:93
#define ISALPHA
Old name of rb_isalpha.
Definition ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition ctype.h:85
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define NIL_P
Old name of RB_NIL_P.
#define DBL2NUM
Old name of rb_float_new.
Definition double.h:29
#define ISPRINT
Old name of rb_isprint.
Definition ctype.h:86
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition long.h:51
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
Definition coderange.h:189
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition error.c:3148
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition error.c:794
VALUE rb_eRangeError
RangeError exception.
Definition error.c:1095
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1091
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1089
void rb_warn(const char *fmt,...)
Identical to rb_warning(), except it reports always regardless of runtime -W flag.
Definition error.c:411
VALUE rb_eArgError
ArgumentError exception.
Definition error.c:1092
VALUE rb_to_float(VALUE val)
Identical to rb_check_to_float(), except it raises on error.
Definition object.c:3557
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
Definition object.c:3022
Defines RBIMPL_HAS_BUILTIN.
#define INTEGER_PACK_LITTLE_ENDIAN
Little endian combination.
Definition bignum.h:567
#define INTEGER_PACK_BIG_ENDIAN
Big endian combination.
Definition bignum.h:572
int rb_uv_to_utf8(char buf[6], unsigned long uv)
Encodes a Unicode codepoint into its UTF-8 representation.
Definition pack.c:1625
#define INTEGER_PACK_2COMP
Uses 2's complement representation.
Definition bignum.h:549
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1498
#define rb_str_buf_cat
Just another name of rb_str_cat.
Definition string.h:1681
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
Definition string.h:1532
void rb_str_modify(VALUE str)
Declares that the string is about to be modified.
Definition string.c:2437
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition string.c:3020
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
Definition string.c:2489
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
Definition string.c:1532
VALUE rb_obj_as_string(VALUE obj)
Try converting an object to its stringised representation using its to_s method, if any.
Definition string.c:1682
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
Definition variable.c:1606
const signed char ruby_digit36_to_number_table[]
Character to number mapping like ‘'a’->10,'b'->11etc.
Definition util.c:76
#define RB_NUM2LONG
Just another name of rb_num2long_inline.
Definition long.h:57
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:68
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
Definition rarray.h:69
#define StringValue(v)
Ensures that the parameter object is a String.
Definition rstring.h:72
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition rstring.h:82
const char * rb_obj_classname(VALUE obj)
Queries the name of the class of the passed object.
Definition variable.c:325
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition value.h:63
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52