Escaper.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. <?php
  2. /**
  3. * Copyright © Magento, Inc. All rights reserved.
  4. * See COPYING.txt for license details.
  5. */
  6. namespace Magento\Framework;
  7. /**
  8. * Magento escape methods
  9. *
  10. * @api
  11. * @since 100.0.2
  12. */
  13. class Escaper
  14. {
  15. /**
  16. * @var \Magento\Framework\ZendEscaper
  17. */
  18. private $escaper;
  19. /**
  20. * @var \Psr\Log\LoggerInterface
  21. */
  22. private $logger;
  23. /**
  24. * @var string[]
  25. */
  26. private $notAllowedTags = ['script', 'img', 'embed', 'iframe', 'video', 'source', 'object', 'audio'];
  27. /**
  28. * @var string[]
  29. */
  30. private $allowedAttributes = ['id', 'class', 'href', 'target', 'title', 'style'];
  31. /**
  32. * @var string
  33. */
  34. private static $xssFiltrationPattern =
  35. '/((javascript(\\\\x3a|:|%3A))|(data(\\\\x3a|:|%3A))|(vbscript:))|'
  36. . '((\\\\x6A\\\\x61\\\\x76\\\\x61\\\\x73\\\\x63\\\\x72\\\\x69\\\\x70\\\\x74(\\\\x3a|:|%3A))|'
  37. . '(\\\\x64\\\\x61\\\\x74\\\\x61(\\\\x3a|:|%3A)))/i';
  38. /**
  39. * @var string[]
  40. */
  41. private $escapeAsUrlAttributes = ['href'];
  42. /**
  43. * Escape string for HTML context.
  44. *
  45. * AllowedTags will not be escaped, except the following: script, img, embed,
  46. * iframe, video, source, object, audio
  47. *
  48. * @param string|array $data
  49. * @param array|null $allowedTags
  50. * @return string|array
  51. */
  52. public function escapeHtml($data, $allowedTags = null)
  53. {
  54. if (!is_array($data)) {
  55. $data = (string)$data;
  56. }
  57. if (is_array($data)) {
  58. $result = [];
  59. foreach ($data as $item) {
  60. $result[] = $this->escapeHtml($item, $allowedTags);
  61. }
  62. } elseif (strlen($data)) {
  63. if (is_array($allowedTags) && !empty($allowedTags)) {
  64. $allowedTags = $this->filterProhibitedTags($allowedTags);
  65. $wrapperElementId = uniqid();
  66. $domDocument = new \DOMDocument('1.0', 'UTF-8');
  67. set_error_handler(
  68. function ($errorNumber, $errorString) {
  69. throw new \Exception($errorString, $errorNumber);
  70. }
  71. );
  72. $data = $this->prepareUnescapedCharacters($data);
  73. $string = mb_convert_encoding($data, 'HTML-ENTITIES', 'UTF-8');
  74. try {
  75. $domDocument->loadHTML(
  76. '<html><body id="' . $wrapperElementId . '">' . $string . '</body></html>'
  77. );
  78. } catch (\Exception $e) {
  79. restore_error_handler();
  80. $this->getLogger()->critical($e);
  81. }
  82. restore_error_handler();
  83. $this->removeNotAllowedTags($domDocument, $allowedTags);
  84. $this->removeNotAllowedAttributes($domDocument);
  85. $this->escapeText($domDocument);
  86. $this->escapeAttributeValues($domDocument);
  87. $result = mb_convert_encoding($domDocument->saveHTML(), 'UTF-8', 'HTML-ENTITIES');
  88. preg_match('/<body id="' . $wrapperElementId . '">(.+)<\/body><\/html>$/si', $result, $matches);
  89. return !empty($matches) ? $matches[1] : '';
  90. } else {
  91. $result = htmlspecialchars($data, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8', false);
  92. }
  93. } else {
  94. $result = $data;
  95. }
  96. return $result;
  97. }
  98. /**
  99. * Used to replace characters, that mb_convert_encoding will not process
  100. *
  101. * @param string $data
  102. * @return string|null
  103. */
  104. private function prepareUnescapedCharacters(string $data): ?string
  105. {
  106. $patterns = ['/\&/u'];
  107. $replacements = ['&amp;'];
  108. return \preg_replace($patterns, $replacements, $data);
  109. }
  110. /**
  111. * Remove not allowed tags
  112. *
  113. * @param \DOMDocument $domDocument
  114. * @param string[] $allowedTags
  115. * @return void
  116. */
  117. private function removeNotAllowedTags(\DOMDocument $domDocument, array $allowedTags)
  118. {
  119. $xpath = new \DOMXPath($domDocument);
  120. $nodes = $xpath->query(
  121. '//node()[name() != \''
  122. . implode('\' and name() != \'', array_merge($allowedTags, ['html', 'body']))
  123. . '\']'
  124. );
  125. foreach ($nodes as $node) {
  126. if ($node->nodeName != '#text' && $node->nodeName != '#comment') {
  127. $node->parentNode->replaceChild($domDocument->createTextNode($node->textContent), $node);
  128. }
  129. }
  130. }
  131. /**
  132. * Remove not allowed attributes
  133. *
  134. * @param \DOMDocument $domDocument
  135. * @return void
  136. */
  137. private function removeNotAllowedAttributes(\DOMDocument $domDocument)
  138. {
  139. $xpath = new \DOMXPath($domDocument);
  140. $nodes = $xpath->query(
  141. '//@*[name() != \'' . implode('\' and name() != \'', $this->allowedAttributes) . '\']'
  142. );
  143. foreach ($nodes as $node) {
  144. $node->parentNode->removeAttribute($node->nodeName);
  145. }
  146. }
  147. /**
  148. * Escape text
  149. *
  150. * @param \DOMDocument $domDocument
  151. * @return void
  152. */
  153. private function escapeText(\DOMDocument $domDocument)
  154. {
  155. $xpath = new \DOMXPath($domDocument);
  156. $nodes = $xpath->query('//text()');
  157. foreach ($nodes as $node) {
  158. $node->textContent = $this->escapeHtml($node->textContent);
  159. }
  160. }
  161. /**
  162. * Escape attribute values
  163. *
  164. * @param \DOMDocument $domDocument
  165. * @return void
  166. */
  167. private function escapeAttributeValues(\DOMDocument $domDocument)
  168. {
  169. $xpath = new \DOMXPath($domDocument);
  170. $nodes = $xpath->query('//@*');
  171. foreach ($nodes as $node) {
  172. $value = $this->escapeAttributeValue(
  173. $node->nodeName,
  174. $node->parentNode->getAttribute($node->nodeName)
  175. );
  176. $node->parentNode->setAttribute($node->nodeName, $value);
  177. }
  178. }
  179. /**
  180. * Escape attribute value using escapeHtml or escapeUrl
  181. *
  182. * @param string $name
  183. * @param string $value
  184. * @return string
  185. */
  186. private function escapeAttributeValue($name, $value)
  187. {
  188. return in_array($name, $this->escapeAsUrlAttributes) ? $this->escapeUrl($value) : $this->escapeHtml($value);
  189. }
  190. /**
  191. * Escape a string for the HTML attribute context
  192. *
  193. * @param string $string
  194. * @param boolean $escapeSingleQuote
  195. * @return string
  196. * @since 101.0.0
  197. */
  198. public function escapeHtmlAttr($string, $escapeSingleQuote = true)
  199. {
  200. if ($escapeSingleQuote) {
  201. return $this->getEscaper()->escapeHtmlAttr((string) $string);
  202. }
  203. return htmlspecialchars((string)$string, ENT_COMPAT, 'UTF-8', false);
  204. }
  205. /**
  206. * Escape URL
  207. *
  208. * @param string $string
  209. * @return string
  210. */
  211. public function escapeUrl($string)
  212. {
  213. return $this->escapeHtml($this->escapeXssInUrl($string));
  214. }
  215. /**
  216. * Encode URL
  217. *
  218. * @param string $string
  219. * @return string
  220. * @since 101.0.0
  221. */
  222. public function encodeUrlParam($string)
  223. {
  224. return $this->getEscaper()->escapeUrl($string);
  225. }
  226. /**
  227. * Escape string for the JavaScript context
  228. *
  229. * @param string $string
  230. * @return string
  231. * @since 101.0.0
  232. */
  233. public function escapeJs($string)
  234. {
  235. if ($string === '' || ctype_digit($string)) {
  236. return $string;
  237. }
  238. return preg_replace_callback(
  239. '/[^a-z0-9,\._]/iSu',
  240. function ($matches) {
  241. $chr = $matches[0];
  242. if (strlen($chr) != 1) {
  243. $chr = mb_convert_encoding($chr, 'UTF-16BE', 'UTF-8');
  244. $chr = ($chr === false) ? '' : $chr;
  245. }
  246. return sprintf('\\u%04s', strtoupper(bin2hex($chr)));
  247. },
  248. $string
  249. );
  250. }
  251. /**
  252. * Escape string for the CSS context
  253. *
  254. * @param string $string
  255. * @return string
  256. * @since 101.0.0
  257. */
  258. public function escapeCss($string)
  259. {
  260. return $this->getEscaper()->escapeCss($string);
  261. }
  262. /**
  263. * Escape quotes in java script
  264. *
  265. * @param string|array $data
  266. * @param string $quote
  267. * @return string|array
  268. * @deprecated 101.0.0
  269. */
  270. public function escapeJsQuote($data, $quote = '\'')
  271. {
  272. if (is_array($data)) {
  273. $result = [];
  274. foreach ($data as $item) {
  275. $result[] = $this->escapeJsQuote($item, $quote);
  276. }
  277. } else {
  278. $result = str_replace($quote, '\\' . $quote, (string)$data);
  279. }
  280. return $result;
  281. }
  282. /**
  283. * Escape xss in urls
  284. *
  285. * @param string $data
  286. * @return string
  287. * @deprecated 101.0.0
  288. */
  289. public function escapeXssInUrl($data)
  290. {
  291. return htmlspecialchars(
  292. $this->escapeScriptIdentifiers((string)$data),
  293. ENT_COMPAT | ENT_HTML5 | ENT_HTML401,
  294. 'UTF-8',
  295. false
  296. );
  297. }
  298. /**
  299. * Remove `javascript:`, `vbscript:`, `data:` words from the string.
  300. *
  301. * @param string $data
  302. * @return string
  303. */
  304. private function escapeScriptIdentifiers(string $data): string
  305. {
  306. $filteredData = preg_replace(self::$xssFiltrationPattern, ':', $data) ?: '';
  307. if (preg_match(self::$xssFiltrationPattern, $filteredData)) {
  308. $filteredData = $this->escapeScriptIdentifiers($filteredData);
  309. }
  310. return $filteredData;
  311. }
  312. /**
  313. * Escape quotes inside html attributes
  314. *
  315. * Use $addSlashes = false for escaping js that inside html attribute (onClick, onSubmit etc)
  316. *
  317. * @param string $data
  318. * @param bool $addSlashes
  319. * @return string
  320. * @deprecated 101.0.0
  321. */
  322. public function escapeQuote($data, $addSlashes = false)
  323. {
  324. if ($addSlashes === true) {
  325. $data = addslashes($data);
  326. }
  327. return htmlspecialchars($data, ENT_QUOTES, null, false);
  328. }
  329. /**
  330. * Get escaper
  331. *
  332. * @return \Magento\Framework\ZendEscaper
  333. * @deprecated 101.0.0
  334. */
  335. private function getEscaper()
  336. {
  337. if ($this->escaper == null) {
  338. $this->escaper = \Magento\Framework\App\ObjectManager::getInstance()
  339. ->get(\Magento\Framework\ZendEscaper::class);
  340. }
  341. return $this->escaper;
  342. }
  343. /**
  344. * Get logger
  345. *
  346. * @return \Psr\Log\LoggerInterface
  347. * @deprecated 101.0.0
  348. */
  349. private function getLogger()
  350. {
  351. if ($this->logger == null) {
  352. $this->logger = \Magento\Framework\App\ObjectManager::getInstance()
  353. ->get(\Psr\Log\LoggerInterface::class);
  354. }
  355. return $this->logger;
  356. }
  357. /**
  358. * Filter prohibited tags.
  359. *
  360. * @param string[] $allowedTags
  361. * @return string[]
  362. */
  363. private function filterProhibitedTags(array $allowedTags): array
  364. {
  365. $notAllowedTags = array_intersect(
  366. array_map('strtolower', $allowedTags),
  367. $this->notAllowedTags
  368. );
  369. if (!empty($notAllowedTags)) {
  370. $this->getLogger()->critical(
  371. 'The following tag(s) are not allowed: ' . implode(', ', $notAllowedTags)
  372. );
  373. $allowedTags = array_diff($allowedTags, $this->notAllowedTags);
  374. }
  375. return $allowedTags;
  376. }
  377. }