Differ.php 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. <?php declare(strict_types=1);
  2. /*
  3. * This file is part of sebastian/diff.
  4. *
  5. * (c) Sebastian Bergmann <sebastian@phpunit.de>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace SebastianBergmann\Diff;
  11. use SebastianBergmann\Diff\Output\DiffOutputBuilderInterface;
  12. use SebastianBergmann\Diff\Output\UnifiedDiffOutputBuilder;
  13. /**
  14. * Diff implementation.
  15. */
  16. final class Differ
  17. {
  18. public const OLD = 0;
  19. public const ADDED = 1;
  20. public const REMOVED = 2;
  21. public const DIFF_LINE_END_WARNING = 3;
  22. public const NO_LINE_END_EOF_WARNING = 4;
  23. /**
  24. * @var DiffOutputBuilderInterface
  25. */
  26. private $outputBuilder;
  27. /**
  28. * @param DiffOutputBuilderInterface $outputBuilder
  29. *
  30. * @throws InvalidArgumentException
  31. */
  32. public function __construct($outputBuilder = null)
  33. {
  34. if ($outputBuilder instanceof DiffOutputBuilderInterface) {
  35. $this->outputBuilder = $outputBuilder;
  36. } elseif (null === $outputBuilder) {
  37. $this->outputBuilder = new UnifiedDiffOutputBuilder;
  38. } elseif (\is_string($outputBuilder)) {
  39. // PHPUnit 6.1.4, 6.2.0, 6.2.1, 6.2.2, and 6.2.3 support
  40. // @see https://github.com/sebastianbergmann/phpunit/issues/2734#issuecomment-314514056
  41. // @deprecated
  42. $this->outputBuilder = new UnifiedDiffOutputBuilder($outputBuilder);
  43. } else {
  44. throw new InvalidArgumentException(
  45. \sprintf(
  46. 'Expected builder to be an instance of DiffOutputBuilderInterface, <null> or a string, got %s.',
  47. \is_object($outputBuilder) ? 'instance of "' . \get_class($outputBuilder) . '"' : \gettype($outputBuilder) . ' "' . $outputBuilder . '"'
  48. )
  49. );
  50. }
  51. }
  52. /**
  53. * Returns the diff between two arrays or strings as string.
  54. *
  55. * @param array|string $from
  56. * @param array|string $to
  57. * @param null|LongestCommonSubsequenceCalculator $lcs
  58. *
  59. * @return string
  60. */
  61. public function diff($from, $to, LongestCommonSubsequenceCalculator $lcs = null): string
  62. {
  63. $diff = $this->diffToArray(
  64. $this->normalizeDiffInput($from),
  65. $this->normalizeDiffInput($to),
  66. $lcs
  67. );
  68. return $this->outputBuilder->getDiff($diff);
  69. }
  70. /**
  71. * Returns the diff between two arrays or strings as array.
  72. *
  73. * Each array element contains two elements:
  74. * - [0] => mixed $token
  75. * - [1] => 2|1|0
  76. *
  77. * - 2: REMOVED: $token was removed from $from
  78. * - 1: ADDED: $token was added to $from
  79. * - 0: OLD: $token is not changed in $to
  80. *
  81. * @param array|string $from
  82. * @param array|string $to
  83. * @param LongestCommonSubsequenceCalculator $lcs
  84. *
  85. * @return array
  86. */
  87. public function diffToArray($from, $to, LongestCommonSubsequenceCalculator $lcs = null): array
  88. {
  89. if (\is_string($from)) {
  90. $from = $this->splitStringByLines($from);
  91. } elseif (!\is_array($from)) {
  92. throw new InvalidArgumentException('"from" must be an array or string.');
  93. }
  94. if (\is_string($to)) {
  95. $to = $this->splitStringByLines($to);
  96. } elseif (!\is_array($to)) {
  97. throw new InvalidArgumentException('"to" must be an array or string.');
  98. }
  99. [$from, $to, $start, $end] = self::getArrayDiffParted($from, $to);
  100. if ($lcs === null) {
  101. $lcs = $this->selectLcsImplementation($from, $to);
  102. }
  103. $common = $lcs->calculate(\array_values($from), \array_values($to));
  104. $diff = [];
  105. foreach ($start as $token) {
  106. $diff[] = [$token, self::OLD];
  107. }
  108. \reset($from);
  109. \reset($to);
  110. foreach ($common as $token) {
  111. while (($fromToken = \reset($from)) !== $token) {
  112. $diff[] = [\array_shift($from), self::REMOVED];
  113. }
  114. while (($toToken = \reset($to)) !== $token) {
  115. $diff[] = [\array_shift($to), self::ADDED];
  116. }
  117. $diff[] = [$token, self::OLD];
  118. \array_shift($from);
  119. \array_shift($to);
  120. }
  121. while (($token = \array_shift($from)) !== null) {
  122. $diff[] = [$token, self::REMOVED];
  123. }
  124. while (($token = \array_shift($to)) !== null) {
  125. $diff[] = [$token, self::ADDED];
  126. }
  127. foreach ($end as $token) {
  128. $diff[] = [$token, self::OLD];
  129. }
  130. if ($this->detectUnmatchedLineEndings($diff)) {
  131. \array_unshift($diff, ["#Warning: Strings contain different line endings!\n", self::DIFF_LINE_END_WARNING]);
  132. }
  133. return $diff;
  134. }
  135. /**
  136. * Casts variable to string if it is not a string or array.
  137. *
  138. * @param mixed $input
  139. *
  140. * @return array|string
  141. */
  142. private function normalizeDiffInput($input)
  143. {
  144. if (!\is_array($input) && !\is_string($input)) {
  145. return (string) $input;
  146. }
  147. return $input;
  148. }
  149. /**
  150. * Checks if input is string, if so it will split it line-by-line.
  151. *
  152. * @param string $input
  153. *
  154. * @return array
  155. */
  156. private function splitStringByLines(string $input): array
  157. {
  158. return \preg_split('/(.*\R)/', $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
  159. }
  160. /**
  161. * @param array $from
  162. * @param array $to
  163. *
  164. * @return LongestCommonSubsequenceCalculator
  165. */
  166. private function selectLcsImplementation(array $from, array $to): LongestCommonSubsequenceCalculator
  167. {
  168. // We do not want to use the time-efficient implementation if its memory
  169. // footprint will probably exceed this value. Note that the footprint
  170. // calculation is only an estimation for the matrix and the LCS method
  171. // will typically allocate a bit more memory than this.
  172. $memoryLimit = 100 * 1024 * 1024;
  173. if ($this->calculateEstimatedFootprint($from, $to) > $memoryLimit) {
  174. return new MemoryEfficientLongestCommonSubsequenceCalculator;
  175. }
  176. return new TimeEfficientLongestCommonSubsequenceCalculator;
  177. }
  178. /**
  179. * Calculates the estimated memory footprint for the DP-based method.
  180. *
  181. * @param array $from
  182. * @param array $to
  183. *
  184. * @return float|int
  185. */
  186. private function calculateEstimatedFootprint(array $from, array $to)
  187. {
  188. $itemSize = PHP_INT_SIZE === 4 ? 76 : 144;
  189. return $itemSize * \min(\count($from), \count($to)) ** 2;
  190. }
  191. /**
  192. * Returns true if line ends don't match in a diff.
  193. *
  194. * @param array $diff
  195. *
  196. * @return bool
  197. */
  198. private function detectUnmatchedLineEndings(array $diff): bool
  199. {
  200. $newLineBreaks = ['' => true];
  201. $oldLineBreaks = ['' => true];
  202. foreach ($diff as $entry) {
  203. if (self::OLD === $entry[1]) {
  204. $ln = $this->getLinebreak($entry[0]);
  205. $oldLineBreaks[$ln] = true;
  206. $newLineBreaks[$ln] = true;
  207. } elseif (self::ADDED === $entry[1]) {
  208. $newLineBreaks[$this->getLinebreak($entry[0])] = true;
  209. } elseif (self::REMOVED === $entry[1]) {
  210. $oldLineBreaks[$this->getLinebreak($entry[0])] = true;
  211. }
  212. }
  213. // if either input or output is a single line without breaks than no warning should be raised
  214. if (['' => true] === $newLineBreaks || ['' => true] === $oldLineBreaks) {
  215. return false;
  216. }
  217. // two way compare
  218. foreach ($newLineBreaks as $break => $set) {
  219. if (!isset($oldLineBreaks[$break])) {
  220. return true;
  221. }
  222. }
  223. foreach ($oldLineBreaks as $break => $set) {
  224. if (!isset($newLineBreaks[$break])) {
  225. return true;
  226. }
  227. }
  228. return false;
  229. }
  230. private function getLinebreak($line): string
  231. {
  232. if (!\is_string($line)) {
  233. return '';
  234. }
  235. $lc = \substr($line, -1);
  236. if ("\r" === $lc) {
  237. return "\r";
  238. }
  239. if ("\n" !== $lc) {
  240. return '';
  241. }
  242. if ("\r\n" === \substr($line, -2)) {
  243. return "\r\n";
  244. }
  245. return "\n";
  246. }
  247. private static function getArrayDiffParted(array &$from, array &$to): array
  248. {
  249. $start = [];
  250. $end = [];
  251. \reset($to);
  252. foreach ($from as $k => $v) {
  253. $toK = \key($to);
  254. if ($toK === $k && $v === $to[$k]) {
  255. $start[$k] = $v;
  256. unset($from[$k], $to[$k]);
  257. } else {
  258. break;
  259. }
  260. }
  261. \end($from);
  262. \end($to);
  263. do {
  264. $fromK = \key($from);
  265. $toK = \key($to);
  266. if (null === $fromK || null === $toK || \current($from) !== \current($to)) {
  267. break;
  268. }
  269. \prev($from);
  270. \prev($to);
  271. $end = [$fromK => $from[$fromK]] + $end;
  272. unset($from[$fromK], $to[$toK]);
  273. } while (true);
  274. return [$from, $to, $start, $end];
  275. }
  276. }