Language.php 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. <?php
  2. /* Copyright (c)
  3. * - 2006-2013, Ivan Sagalaev (maniacsoftwaremaniacs.org), highlight.js
  4. * (original author)
  5. * - 2013-2015, Geert Bergman (geertscrivo.nl), highlight.php
  6. * - 2014, Daniel Lynge, highlight.php (contributor)
  7. * All rights reserved.
  8. *
  9. * Redistribution and use in source and binary forms, with or without
  10. * modification, are permitted provided that the following conditions are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright notice,
  13. * this list of conditions and the following disclaimer.
  14. * 2. Redistributions in binary form must reproduce the above copyright notice,
  15. * this list of conditions and the following disclaimer in the documentation
  16. * and/or other materials provided with the distribution.
  17. * 3. Neither the name of "highlight.js", "highlight.php", nor the names of its
  18. * contributors may be used to endorse or promote products derived from this
  19. * software without specific prior written permission.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31. * POSSIBILITY OF SUCH DAMAGE.
  32. */
  33. namespace Highlight;
  34. class Language
  35. {
  36. public $caseInsensitive = false;
  37. public $aliases = null;
  38. public function complete(&$e)
  39. {
  40. if (!isset($e)) {
  41. $e = new \stdClass();
  42. }
  43. $patch = array(
  44. "begin" => true,
  45. "end" => true,
  46. "lexemes" => true,
  47. "illegal" => true,
  48. );
  49. $def = array(
  50. "begin" => "",
  51. "beginRe" => "",
  52. "beginKeywords" => "",
  53. "excludeBegin" => "",
  54. "returnBegin" => "",
  55. "end" => "",
  56. "endRe" => "",
  57. "endsParent" => "",
  58. "endsWithParent" => "",
  59. "excludeEnd" => "",
  60. "returnEnd" => "",
  61. "starts" => "",
  62. "terminators" => "",
  63. "terminatorEnd" => "",
  64. "lexemes" => "",
  65. "lexemesRe" => "",
  66. "illegal" => "",
  67. "illegalRe" => "",
  68. "className" => "",
  69. "contains" => array(),
  70. "keywords" => null,
  71. "subLanguage" => null,
  72. "subLanguageMode" => "",
  73. "compiled" => false,
  74. "relevance" => 1);
  75. foreach ($patch as $k => $v) {
  76. if (isset($e->$k)) {
  77. $e->$k = str_replace("\\/", "/", $e->$k);
  78. $e->$k = str_replace("/", "\\/", $e->$k);
  79. }
  80. }
  81. foreach ($def as $k => $v) {
  82. if (!isset($e->$k)) {
  83. @$e->$k = $v;
  84. }
  85. }
  86. }
  87. public function __construct($lang)
  88. {
  89. $json = file_get_contents(__DIR__ . DIRECTORY_SEPARATOR . "languages" .
  90. DIRECTORY_SEPARATOR . "{$lang}.json");
  91. $this->mode = json_decode($json);
  92. $this->name = $lang;
  93. $this->aliases =
  94. isset($this->mode->aliases) ? $this->mode->aliases : null;
  95. $this->caseInsensitive = isset($this->mode->case_insensitive) ?
  96. $this->mode->case_insensitive : false;
  97. }
  98. private function langRe($value, $global=false)
  99. {
  100. return "/{$value}/um" . ($this->caseInsensitive ? "i" : "");
  101. }
  102. private function processKeyWords($kw)
  103. {
  104. if (is_string($kw)) {
  105. if ($this->caseInsensitive) {
  106. $kw = mb_strtolower($kw, "UTF-8");
  107. }
  108. $kw = array("keyword" => explode(" ", $kw));
  109. } else {
  110. foreach ($kw as $cls=>$vl) {
  111. if (!is_array($vl)) {
  112. if ($this->caseInsensitive) {
  113. $vl = mb_strtolower($vl, "UTF-8");
  114. }
  115. $kw->$cls = explode(" ", $vl);
  116. }
  117. }
  118. }
  119. return $kw;
  120. }
  121. private function compileMode($mode, $parent=null)
  122. {
  123. if (isset($mode->compiled)) {
  124. return;
  125. }
  126. $this->complete($mode);
  127. $mode->compiled = true;
  128. $mode->keywords =
  129. $mode->keywords ? $mode->keywords : $mode->beginKeywords;
  130. /* Note: JsonRef method creates different references as those in the
  131. * original source files. Two modes may refer to the same keywors
  132. * set, so only testing if the mode has keywords is not enough: the
  133. * mode's keywords might be compiled already, so it is necessary
  134. * to do an 'is_array' check.
  135. */
  136. if ($mode->keywords && !is_array($mode->keywords)) {
  137. $compiledKeywords = array();
  138. $mode->lexemesRe = $this->langRe($mode->lexemes
  139. ? $mode->lexemes : "\b\w+\b", true);
  140. foreach ($this->processKeyWords($mode->keywords) as $clsNm => $dat) {
  141. if (!is_array($dat)) {
  142. $dat = array($dat);
  143. }
  144. foreach ($dat as $kw) {
  145. $pair = explode("|", $kw);
  146. $compiledKeywords[$pair[0]] =
  147. array($clsNm, isset($pair[1]) ? intval($pair[1]) : 1);
  148. }
  149. }
  150. $mode->keywords = $compiledKeywords;
  151. }
  152. if ($parent) {
  153. if ($mode->beginKeywords) {
  154. $mode->begin = "\\b(" .
  155. implode("|",explode(" ", $mode->beginKeywords)) . ")\\b";
  156. }
  157. if (!$mode->begin) {
  158. $mode->begin = "\B|\b";
  159. }
  160. $mode->beginRe = $this->langRe($mode->begin);
  161. if (!$mode->end && !$mode->endsWithParent) {
  162. $mode->end = "\B|\b";
  163. }
  164. if ($mode->end) {
  165. $mode->endRe = $this->langRe($mode->end);
  166. }
  167. $mode->terminatorEnd = $mode->end;
  168. if ($mode->endsWithParent && $parent->terminatorEnd) {
  169. $mode->terminatorEnd .=
  170. ($mode->end ? "|" : "") . $parent->terminatorEnd;
  171. }
  172. }
  173. if ($mode->illegal) {
  174. $mode->illegalRe = $this->langRe($mode->illegal);
  175. }
  176. $expanded_contains = array();
  177. for ($i=0; $i<count($mode->contains); $i++) {
  178. if (isset($mode->contains[$i]->variants)) {
  179. foreach ($mode->contains[$i]->variants as $v) {
  180. $x = (object)((array)$v + (array)$mode->contains[$i]);
  181. unset($x->variants);
  182. $expanded_contains[] = $x;
  183. }
  184. } else {
  185. $expanded_contains[] = "self" === $mode->contains[$i] ?
  186. $mode : $mode->contains[$i];
  187. }
  188. }
  189. $mode->contains = $expanded_contains;
  190. for ($i=0; $i<count($mode->contains); $i++) {
  191. $this->compileMode($mode->contains[$i], $mode);
  192. }
  193. if ($mode->starts) {
  194. $this->compileMode($mode->starts, $parent);
  195. }
  196. $terminators = array();
  197. for ($i=0; $i<count($mode->contains); $i++) {
  198. $terminators[] = $mode->contains[$i]->beginKeywords
  199. ? "\.?(" . $mode->contains[$i]->begin . ")\.?"
  200. : $mode->contains[$i]->begin;
  201. }
  202. if ($mode->terminatorEnd) {
  203. $terminators[] = $mode->terminatorEnd;
  204. }
  205. if ($mode->illegal) {
  206. $terminators[] = $mode->illegal;
  207. }
  208. $mode->terminators = count($terminators)
  209. ? $this->langRe(implode("|", $terminators), true) : null;
  210. }
  211. public function compile()
  212. {
  213. if (!isset($this->mode->compiled)) {
  214. $jr = new JsonRef();
  215. $this->mode = $jr->decode($this->mode);
  216. $this->compileMode($this->mode);
  217. }
  218. }
  219. }