Lexer.php 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. <?php
  2. class PHPParser_Lexer
  3. {
  4. protected $code;
  5. protected $tokens;
  6. protected $pos;
  7. protected $line;
  8. protected $tokenMap;
  9. protected $dropTokens;
  10. /**
  11. * Creates a Lexer.
  12. */
  13. public function __construct() {
  14. // map from internal tokens to PHPParser tokens
  15. $this->tokenMap = $this->createTokenMap();
  16. // map of tokens to drop while lexing (the map is only used for isset lookup,
  17. // that's why the value is simply set to 1; the value is never actually used.)
  18. $this->dropTokens = array_fill_keys(array(T_WHITESPACE, T_OPEN_TAG), 1);
  19. }
  20. /**
  21. * Initializes the lexer for lexing the provided source code.
  22. *
  23. * @param string $code The source code to lex
  24. *
  25. * @throws PHPParser_Error on lexing errors (unterminated comment or unexpected character)
  26. */
  27. public function startLexing($code) {
  28. $scream = ini_set('xdebug.scream', 0);
  29. $this->resetErrors();
  30. $this->tokens = @token_get_all($code);
  31. $this->handleErrors();
  32. ini_set('xdebug.scream', $scream);
  33. $this->code = $code; // keep the code around for __halt_compiler() handling
  34. $this->pos = -1;
  35. $this->line = 1;
  36. }
  37. protected function resetErrors() {
  38. // set error_get_last() to defined state by forcing an undefined variable error
  39. set_error_handler(array($this, 'dummyErrorHandler'), 0);
  40. @$undefinedVariable;
  41. restore_error_handler();
  42. }
  43. private function dummyErrorHandler() { return false; }
  44. protected function handleErrors() {
  45. $error = error_get_last();
  46. if (preg_match(
  47. '~^Unterminated comment starting line ([0-9]+)$~',
  48. $error['message'], $matches
  49. )) {
  50. throw new PHPParser_Error('Unterminated comment', $matches[1]);
  51. }
  52. if (preg_match(
  53. '~^Unexpected character in input: \'(.)\' \(ASCII=([0-9]+)\)~s',
  54. $error['message'], $matches
  55. )) {
  56. throw new PHPParser_Error(sprintf(
  57. 'Unexpected character "%s" (ASCII %d)',
  58. $matches[1], $matches[2]
  59. ));
  60. }
  61. // PHP cuts error message after null byte, so need special case
  62. if (preg_match('~^Unexpected character in input: \'$~', $error['message'])) {
  63. throw new PHPParser_Error('Unexpected null byte');
  64. }
  65. }
  66. /**
  67. * Fetches the next token.
  68. *
  69. * @param mixed $value Variable to store token content in
  70. * @param mixed $startAttributes Variable to store start attributes in
  71. * @param mixed $endAttributes Variable to store end attributes in
  72. *
  73. * @return int Token id
  74. */
  75. public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null) {
  76. $startAttributes = array();
  77. $endAttributes = array();
  78. while (isset($this->tokens[++$this->pos])) {
  79. $token = $this->tokens[$this->pos];
  80. if (is_string($token)) {
  81. $startAttributes['startLine'] = $this->line;
  82. $endAttributes['endLine'] = $this->line;
  83. // bug in token_get_all
  84. if ('b"' === $token) {
  85. $value = 'b"';
  86. return ord('"');
  87. } else {
  88. $value = $token;
  89. return ord($token);
  90. }
  91. } else {
  92. $this->line += substr_count($token[1], "\n");
  93. if (T_COMMENT === $token[0]) {
  94. $startAttributes['comments'][] = new PHPParser_Comment($token[1], $token[2]);
  95. } elseif (T_DOC_COMMENT === $token[0]) {
  96. $startAttributes['comments'][] = new PHPParser_Comment_Doc($token[1], $token[2]);
  97. } elseif (!isset($this->dropTokens[$token[0]])) {
  98. $value = $token[1];
  99. $startAttributes['startLine'] = $token[2];
  100. $endAttributes['endLine'] = $this->line;
  101. return $this->tokenMap[$token[0]];
  102. }
  103. }
  104. }
  105. $startAttributes['startLine'] = $this->line;
  106. // 0 is the EOF token
  107. return 0;
  108. }
  109. /**
  110. * Handles __halt_compiler() by returning the text after it.
  111. *
  112. * @return string Remaining text
  113. */
  114. public function handleHaltCompiler() {
  115. // get the length of the text before the T_HALT_COMPILER token
  116. $textBefore = '';
  117. for ($i = 0; $i <= $this->pos; ++$i) {
  118. if (is_string($this->tokens[$i])) {
  119. $textBefore .= $this->tokens[$i];
  120. } else {
  121. $textBefore .= $this->tokens[$i][1];
  122. }
  123. }
  124. // text after T_HALT_COMPILER, still including ();
  125. $textAfter = substr($this->code, strlen($textBefore));
  126. // ensure that it is followed by ();
  127. // this simplifies the situation, by not allowing any comments
  128. // in between of the tokens.
  129. if (!preg_match('~\s*\(\s*\)\s*(?:;|\?>\r?\n?)~', $textAfter, $matches)) {
  130. throw new PHPParser_Error('__HALT_COMPILER must be followed by "();"');
  131. }
  132. // prevent the lexer from returning any further tokens
  133. $this->pos = count($this->tokens);
  134. // return with (); removed
  135. return (string) substr($textAfter, strlen($matches[0])); // (string) converts false to ''
  136. }
  137. /**
  138. * Creates the token map.
  139. *
  140. * The token map maps the PHP internal token identifiers
  141. * to the identifiers used by the Parser. Additionally it
  142. * maps T_OPEN_TAG_WITH_ECHO to T_ECHO and T_CLOSE_TAG to ';'.
  143. *
  144. * @return array The token map
  145. */
  146. protected function createTokenMap() {
  147. $tokenMap = array();
  148. // 256 is the minimum possible token number, as everything below
  149. // it is an ASCII value
  150. for ($i = 256; $i < 1000; ++$i) {
  151. // T_DOUBLE_COLON is equivalent to T_PAAMAYIM_NEKUDOTAYIM
  152. if (T_DOUBLE_COLON === $i) {
  153. $tokenMap[$i] = PHPParser_Parser::T_PAAMAYIM_NEKUDOTAYIM;
  154. // T_OPEN_TAG_WITH_ECHO with dropped T_OPEN_TAG results in T_ECHO
  155. } elseif(T_OPEN_TAG_WITH_ECHO === $i) {
  156. $tokenMap[$i] = PHPParser_Parser::T_ECHO;
  157. // T_CLOSE_TAG is equivalent to ';'
  158. } elseif(T_CLOSE_TAG === $i) {
  159. $tokenMap[$i] = ord(';');
  160. // and the others can be mapped directly
  161. } elseif ('UNKNOWN' !== ($name = token_name($i))
  162. && defined($name = 'PHPParser_Parser::' . $name)
  163. ) {
  164. $tokenMap[$i] = constant($name);
  165. }
  166. }
  167. return $tokenMap;
  168. }
  169. }