decode_internal.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. /*
  2. * Copyright (c) 2009-2021, Google LLC
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of Google LLC nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  18. * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
  20. * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. /*
  28. * Internal implementation details of the decoder that are shared between
  29. * decode.c and decode_fast.c.
  30. */
  31. #ifndef UPB_DECODE_INT_H_
  32. #define UPB_DECODE_INT_H_
  33. #include <setjmp.h>
  34. #include "upb/msg_internal.h"
  35. #include "upb/upb_internal.h"
  36. /* Must be last. */
  37. #include "upb/port_def.inc"
  38. #define DECODE_NOGROUP (uint32_t)-1
  39. typedef struct upb_decstate {
  40. const char *end; /* Can read up to 16 bytes slop beyond this. */
  41. const char *limit_ptr; /* = end + UPB_MIN(limit, 0) */
  42. upb_msg *unknown_msg; /* If non-NULL, add unknown data at buffer flip. */
  43. const char *unknown; /* Start of unknown data. */
  44. const upb_extreg *extreg; /* For looking up extensions during the parse. */
  45. int limit; /* Submessage limit relative to end. */
  46. int depth;
  47. uint32_t end_group; /* field number of END_GROUP tag, else DECODE_NOGROUP */
  48. bool alias;
  49. char patch[32];
  50. upb_arena arena;
  51. jmp_buf err;
  52. } upb_decstate;
  53. /* Error function that will abort decoding with longjmp(). We can't declare this
  54. * UPB_NORETURN, even though it is appropriate, because if we do then compilers
  55. * will "helpfully" refuse to tailcall to it
  56. * (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
  57. * of our optimizations. That is also why we must declare it in a separate file,
  58. * otherwise the compiler will see that it calls longjmp() and deduce that it is
  59. * noreturn. */
  60. const char *fastdecode_err(upb_decstate *d);
  61. extern const uint8_t upb_utf8_offsets[];
  62. UPB_INLINE
  63. bool decode_verifyutf8_inl(const char *buf, int len) {
  64. int i, j;
  65. uint8_t offset;
  66. i = 0;
  67. while (i < len) {
  68. offset = upb_utf8_offsets[(uint8_t)buf[i]];
  69. if (offset == 0 || i + offset > len) {
  70. return false;
  71. }
  72. for (j = i + 1; j < i + offset; j++) {
  73. if ((buf[j] & 0xc0) != 0x80) {
  74. return false;
  75. }
  76. }
  77. i += offset;
  78. }
  79. return i == len;
  80. }
  81. /* x86-64 pointers always have the high 16 bits matching. So we can shift
  82. * left 8 and right 8 without loss of information. */
  83. UPB_INLINE intptr_t decode_totable(const upb_msglayout *tablep) {
  84. return ((intptr_t)tablep << 8) | tablep->table_mask;
  85. }
  86. UPB_INLINE const upb_msglayout *decode_totablep(intptr_t table) {
  87. return (const upb_msglayout*)(table >> 8);
  88. }
  89. UPB_INLINE
  90. const char *decode_isdonefallback_inl(upb_decstate *d, const char *ptr,
  91. int overrun) {
  92. if (overrun < d->limit) {
  93. /* Need to copy remaining data into patch buffer. */
  94. UPB_ASSERT(overrun < 16);
  95. if (d->unknown_msg) {
  96. if (!_upb_msg_addunknown(d->unknown_msg, d->unknown, ptr - d->unknown,
  97. &d->arena)) {
  98. return NULL;
  99. }
  100. d->unknown = &d->patch[0] + overrun;
  101. }
  102. memset(d->patch + 16, 0, 16);
  103. memcpy(d->patch, d->end, 16);
  104. ptr = &d->patch[0] + overrun;
  105. d->end = &d->patch[16];
  106. d->limit -= 16;
  107. d->limit_ptr = d->end + d->limit;
  108. d->alias = false;
  109. UPB_ASSERT(ptr < d->limit_ptr);
  110. return ptr;
  111. } else {
  112. return NULL;
  113. }
  114. }
  115. const char *decode_isdonefallback(upb_decstate *d, const char *ptr,
  116. int overrun);
  117. UPB_INLINE
  118. bool decode_isdone(upb_decstate *d, const char **ptr) {
  119. int overrun = *ptr - d->end;
  120. if (UPB_LIKELY(*ptr < d->limit_ptr)) {
  121. return false;
  122. } else if (UPB_LIKELY(overrun == d->limit)) {
  123. return true;
  124. } else {
  125. *ptr = decode_isdonefallback(d, *ptr, overrun);
  126. return false;
  127. }
  128. }
  129. #if UPB_FASTTABLE
  130. UPB_INLINE
  131. const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr,
  132. upb_msg *msg, intptr_t table,
  133. uint64_t hasbits, uint64_t tag) {
  134. const upb_msglayout *table_p = decode_totablep(table);
  135. uint8_t mask = table;
  136. uint64_t data;
  137. size_t idx = tag & mask;
  138. UPB_ASSUME((idx & 7) == 0);
  139. idx >>= 3;
  140. data = table_p->fasttable[idx].field_data ^ tag;
  141. UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table,
  142. hasbits, data);
  143. }
  144. #endif
  145. UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) {
  146. uint16_t tag;
  147. memcpy(&tag, ptr, 2);
  148. return tag;
  149. }
  150. UPB_INLINE void decode_checklimit(upb_decstate *d) {
  151. UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
  152. }
  153. UPB_INLINE int decode_pushlimit(upb_decstate *d, const char *ptr, int size) {
  154. int limit = size + (int)(ptr - d->end);
  155. int delta = d->limit - limit;
  156. decode_checklimit(d);
  157. d->limit = limit;
  158. d->limit_ptr = d->end + UPB_MIN(0, limit);
  159. decode_checklimit(d);
  160. return delta;
  161. }
  162. UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr,
  163. int saved_delta) {
  164. UPB_ASSERT(ptr - d->end == d->limit);
  165. decode_checklimit(d);
  166. d->limit += saved_delta;
  167. d->limit_ptr = d->end + UPB_MIN(0, d->limit);
  168. decode_checklimit(d);
  169. }
  170. #include "upb/port_undef.inc"
  171. #endif /* UPB_DECODE_INT_H_ */