Algorithm.php 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. <?php
  2. /**
  3. * Copyright © Magento, Inc. All rights reserved.
  4. * See COPYING.txt for license details.
  5. */
  6. namespace Magento\Framework\Search\Dynamic;
  7. /**
  8. * Algorithm for layer value filter
  9. *
  10. * @author Magento Core Team <core@magentocommerce.com>
  11. * @api
  12. * @since 100.0.2
  13. */
  14. class Algorithm
  15. {
  16. /**
  17. * Minimal possible value
  18. */
  19. const MIN_POSSIBLE_VALUE = .01;
  20. /**
  21. * Rounding factor coefficient
  22. */
  23. const TEN_POWER_ROUNDING_FACTOR = 4;
  24. /**
  25. * Interval deflection coefficient
  26. */
  27. const INTERVAL_DEFLECTION_LIMIT = .3;
  28. /**
  29. * Standard normal distribution's a/2 quantile
  30. * Depends on predefined a. In case of a=0.05
  31. */
  32. const STANDARD_NORMAL_DISTRIBUTION = 1.96;
  33. /**
  34. * Min and Max number of intervals
  35. */
  36. const MIN_INTERVALS_NUMBER = 2;
  37. const MAX_INTERVALS_NUMBER = 10;
  38. /**
  39. * Upper values limit
  40. *
  41. * @var null|float
  42. */
  43. protected $_upperLimit = null;
  44. /**
  45. * Lower values limit
  46. *
  47. * @var null|float
  48. */
  49. protected $_lowerLimit = null;
  50. /**
  51. * Number of segmentation intervals
  52. *
  53. * @var null|int
  54. */
  55. protected $_intervalsNumber = null;
  56. /**
  57. * Upper limits of skipped quantiles
  58. *
  59. * @var array
  60. */
  61. protected $_skippedQuantilesUpperLimits = [];
  62. /**
  63. * Total count of values
  64. *
  65. * @var int
  66. */
  67. protected $_count = 0;
  68. /**
  69. * Current quantile interval
  70. *
  71. * @var array [from, to]
  72. */
  73. protected $_quantileInterval = [0, 0];
  74. /**
  75. * Values of current quantile
  76. *
  77. * @var array
  78. */
  79. protected $_values = [];
  80. /**
  81. * Max value
  82. *
  83. * @var float
  84. */
  85. protected $_maxValue = 0;
  86. /**
  87. * Min value
  88. *
  89. * @var float
  90. */
  91. protected $_minValue = 0;
  92. /**
  93. * Last value query limiter
  94. *
  95. * @var array [index, value]
  96. */
  97. protected $_lastValueLimiter = [null, 0];
  98. /**
  99. * Set lower and upper limit for algorithm
  100. *
  101. * @param null|float $lowerLimit
  102. * @param null|float $upperLimit
  103. * @return \Magento\Framework\Search\Dynamic\Algorithm
  104. */
  105. public function setLimits($lowerLimit = null, $upperLimit = null)
  106. {
  107. $this->_lowerLimit = empty($lowerLimit) ? null : (double)$lowerLimit;
  108. $this->_upperLimit = empty($upperLimit) ? null : (double)$upperLimit;
  109. return $this;
  110. }
  111. /**
  112. * Set statistics
  113. *
  114. * @param float $min
  115. * @param float $max
  116. * @param float $standardDeviation
  117. * @param int $count
  118. * @return $this
  119. */
  120. public function setStatistics($min, $max, $standardDeviation, $count)
  121. {
  122. $this->_count = $count;
  123. $this->_minValue = $min;
  124. $this->_maxValue = $max;
  125. $valueRange = $max - $min;
  126. if ($count < 2 || $valueRange <= 0) {
  127. //Same value couldn't be separated with several intervals
  128. $this->_intervalsNumber = 1;
  129. return $this;
  130. }
  131. if ($standardDeviation <= 0) {
  132. $intervalsNumber = pow(10, self::TEN_POWER_ROUNDING_FACTOR);
  133. } else {
  134. $intervalsNumber = $valueRange * pow($count, 1 / 3) / (3.5 * $standardDeviation);
  135. }
  136. $this->_intervalsNumber = max(ceil($intervalsNumber), self::MIN_INTERVALS_NUMBER);
  137. $this->_intervalsNumber = (int)min($this->_intervalsNumber, self::MAX_INTERVALS_NUMBER);
  138. return $this;
  139. }
  140. /**
  141. * Calculate separators, each contains 'from', 'to' and 'count'
  142. *
  143. * @param IntervalInterface $interval
  144. * @return array
  145. * @SuppressWarnings(PHPMD.CyclomaticComplexity)
  146. * @SuppressWarnings(PHPMD.NPathComplexity)
  147. */
  148. public function calculateSeparators(IntervalInterface $interval)
  149. {
  150. $result = [];
  151. $lastCount = 0;
  152. $intervalFirstValue = $this->_minValue;
  153. $lastSeparator = $this->_lowerLimit === null ? 0 : $this->_lowerLimit;
  154. for ($intervalNumber = 1; $intervalNumber < $this->getIntervalsNumber(); ++$intervalNumber) {
  155. $separator = $this->_findValueSeparator($intervalNumber, $interval);
  156. if (empty($separator)) {
  157. continue;
  158. }
  159. if ($this->_quantileInterval[0] == 0) {
  160. $intervalFirstValue = $this->_values[0];
  161. }
  162. $separatorCandidate = false;
  163. $newIntervalFirstValue = $intervalFirstValue;
  164. $newLastSeparator = $lastSeparator;
  165. $valuesPerInterval = $this->_count / $this->_getCalculatedIntervalsNumber();
  166. while (!empty($separator) && !array_key_exists($intervalNumber, $result)) {
  167. $separatorsPortion = array_shift($separator);
  168. $bestSeparator = $this->_findBestSeparator($intervalNumber, $separatorsPortion);
  169. if ($bestSeparator && $bestSeparator[2] > 0) {
  170. $isEqualValue = $intervalFirstValue ==
  171. $this->_values[$bestSeparator[2] - 1] ? $this->_values[0] : false;
  172. $count = $bestSeparator[2] + $this->_quantileInterval[0] - $lastCount;
  173. $separatorData = [
  174. 'from' => $isEqualValue !== false ? $isEqualValue : $lastSeparator,
  175. 'to' => $isEqualValue !== false ? $isEqualValue : $bestSeparator[1],
  176. 'count' => $count,
  177. ];
  178. if (abs(1 - $count / $valuesPerInterval) <= self::INTERVAL_DEFLECTION_LIMIT) {
  179. $newLastSeparator = $bestSeparator[1];
  180. $newIntervalFirstValue = $this->_values[$bestSeparator[2]];
  181. $result[$intervalNumber] = $separatorData;
  182. } elseif (!$separatorCandidate || $bestSeparator[0] < $separatorCandidate[0]) {
  183. $separatorCandidate = [
  184. $bestSeparator[0],
  185. $separatorData,
  186. $bestSeparator[1],
  187. $this->_values[$bestSeparator[2]],
  188. ];
  189. }
  190. }
  191. }
  192. if (!array_key_exists($intervalNumber, $result) && $separatorCandidate) {
  193. $newLastSeparator = $separatorCandidate[2];
  194. $newIntervalFirstValue = $separatorCandidate[3];
  195. $result[$intervalNumber] = $separatorCandidate[1];
  196. }
  197. if (array_key_exists($intervalNumber, $result)) {
  198. $lastSeparator = $newLastSeparator;
  199. $intervalFirstValue = $newIntervalFirstValue;
  200. $valueIndex = $this->_binarySearch($lastSeparator);
  201. $lastCount += $result[$intervalNumber]['count'];
  202. if ($valueIndex != -1 && $lastSeparator > $this->_lastValueLimiter[1]) {
  203. $this->_lastValueLimiter = [$valueIndex + $this->_quantileInterval[0], $lastSeparator];
  204. }
  205. }
  206. }
  207. if ($this->_lastValueLimiter[0] < $this->_count) {
  208. $isEqualValue = $intervalFirstValue == $this->_maxValue ? $intervalFirstValue : false;
  209. $result[$this->getIntervalsNumber()] = [
  210. 'from' => $isEqualValue ? $isEqualValue : $lastSeparator,
  211. 'to' => $isEqualValue ? $isEqualValue : ($this->_upperLimit === null ? '' : $this->_upperLimit),
  212. 'count' => $this->_count - $lastCount,
  213. ];
  214. }
  215. return array_values($result);
  216. }
  217. /**
  218. * Get amount of segmentation intervals
  219. *
  220. * @return int
  221. */
  222. protected function getIntervalsNumber()
  223. {
  224. if ($this->_intervalsNumber !== null) {
  225. return $this->_intervalsNumber;
  226. }
  227. return 1;
  228. }
  229. /**
  230. * Find value separator for the quantile
  231. *
  232. * @param int $quantileNumber should be from 1 to n-1 where n is number of intervals
  233. * @param IntervalInterface $interval
  234. * @return array|null
  235. * @SuppressWarnings(PHPMD.CyclomaticComplexity)
  236. * @SuppressWarnings(PHPMD.NPathComplexity)
  237. * @SuppressWarnings(PHPMD.ExcessiveMethodLength)
  238. */
  239. protected function _findValueSeparator($quantileNumber, IntervalInterface $interval)
  240. {
  241. if ($quantileNumber < 1 || $quantileNumber >= $this->getIntervalsNumber()) {
  242. return null;
  243. }
  244. $values = [];
  245. $quantileInterval = $this->_getQuantileInterval($quantileNumber);
  246. $intervalValuesCount = $quantileInterval[1] - $quantileInterval[0] + 1;
  247. $offset = $quantileInterval[0];
  248. if ($this->_lastValueLimiter[0] !== null) {
  249. $offset -= $this->_lastValueLimiter[0];
  250. }
  251. if ($offset < 0) {
  252. $intervalValuesCount += $offset;
  253. $values = array_slice(
  254. $this->_values,
  255. $this->_lastValueLimiter[0] + $offset - $this->_quantileInterval[0],
  256. -$offset
  257. );
  258. $offset = 0;
  259. }
  260. $lowerValue = $this->_lastValueLimiter[1];
  261. if ($this->_lowerLimit !== null) {
  262. $lowerValue = max($lowerValue, $this->_lowerLimit);
  263. }
  264. if ($intervalValuesCount >= 0) {
  265. $values = array_merge(
  266. $values,
  267. $interval->load($intervalValuesCount + 1, $offset, $lowerValue, $this->_upperLimit)
  268. );
  269. }
  270. $lastValue = $values[$intervalValuesCount - 1];
  271. $bestRoundValue = [];
  272. if ($lastValue == $values[0]) {
  273. if ($quantileNumber == 1 && $offset) {
  274. $additionalValues = $interval->loadPrevious($lastValue, $quantileInterval[0], $this->_lowerLimit);
  275. if ($additionalValues) {
  276. $quantileInterval[0] -= count($additionalValues);
  277. $values = array_merge($additionalValues, $values);
  278. $bestRoundValue = $this->_findRoundValue(
  279. $values[0] + self::MIN_POSSIBLE_VALUE / 10,
  280. $lastValue,
  281. false
  282. );
  283. }
  284. }
  285. if ($quantileNumber == $this->getIntervalsNumber() - 1) {
  286. $valuesCount = count($values);
  287. if ($values[$valuesCount - 1] > $lastValue) {
  288. $additionalValues = [$values[$valuesCount - 1]];
  289. } else {
  290. $additionalValues = $interval->loadNext(
  291. $lastValue,
  292. $this->_count - $quantileInterval[0] - count($values),
  293. $this->_upperLimit
  294. );
  295. }
  296. if ($additionalValues) {
  297. $quantileInterval[1] = $quantileInterval[0] + count($values) - 1;
  298. if ($values[$valuesCount - 1] <= $lastValue) {
  299. $quantileInterval[1] += count($additionalValues);
  300. $values = array_merge($values, $additionalValues);
  301. }
  302. $upperBestRoundValue = $this->_findRoundValue(
  303. $lastValue + self::MIN_POSSIBLE_VALUE / 10,
  304. $values[count($values) - 1],
  305. false
  306. );
  307. $this->_mergeRoundValues($bestRoundValue, $upperBestRoundValue);
  308. }
  309. }
  310. } else {
  311. $bestRoundValue = $this->_findRoundValue(
  312. $values[0] + self::MIN_POSSIBLE_VALUE / 10,
  313. $lastValue
  314. );
  315. }
  316. $this->_quantileInterval = $quantileInterval;
  317. $this->_values = $values;
  318. if (empty($bestRoundValue)) {
  319. $this->_skippedQuantilesUpperLimits[$quantileNumber] = $quantileInterval[1];
  320. return $bestRoundValue;
  321. }
  322. $valuesCount = count($values);
  323. if ($values[$valuesCount - 1] > $lastValue) {
  324. $this->_lastValueLimiter = [$quantileInterval[0] + $valuesCount - 1, $values[$valuesCount - 1]];
  325. }
  326. ksort($bestRoundValue, SORT_NUMERIC);
  327. foreach ($bestRoundValue as $index => &$bestRoundValueValues) {
  328. if (empty($bestRoundValueValues)) {
  329. unset($bestRoundValue[$index]);
  330. } else {
  331. sort($bestRoundValueValues);
  332. }
  333. }
  334. return array_reverse($bestRoundValue);
  335. }
  336. /**
  337. * Get quantile interval
  338. *
  339. * @param int $quantileNumber should be from 1 to n-1 where n is number of intervals
  340. * @return null|float[] [floatMin,floatMax]
  341. */
  342. protected function _getQuantileInterval($quantileNumber)
  343. {
  344. if ($quantileNumber < 1 || $quantileNumber >= $this->getIntervalsNumber()) {
  345. return null;
  346. }
  347. $quantile = $this->_getQuantile($quantileNumber);
  348. $deflectionLimit = floor($this->_count / 2 / $this->getIntervalsNumber());
  349. $limits = [
  350. min(floor($quantile - $deflectionLimit), floor($quantile)),
  351. max(ceil($quantile + $deflectionLimit - 1), ceil($quantile)),
  352. ];
  353. $sqrtParam = $this->_count * $quantileNumber * ($this->getIntervalsNumber() - $quantileNumber);
  354. $deflection = self::STANDARD_NORMAL_DISTRIBUTION * sqrt($sqrtParam) / $this->getIntervalsNumber();
  355. $left = max(floor($quantile - $deflection - 1), $limits[0], 0);
  356. if (array_key_exists($quantileNumber - 1, $this->_skippedQuantilesUpperLimits)
  357. && $left > $this->_skippedQuantilesUpperLimits[$quantileNumber - 1]
  358. ) {
  359. $left = $this->_skippedQuantilesUpperLimits[$quantileNumber - 1];
  360. }
  361. $right = min(ceil($quantile + $deflection), $limits[1], $this->_count - 1);
  362. return [$left, $right];
  363. }
  364. /**
  365. * Get quantile
  366. *
  367. * @param int $quantileNumber should be from 1 to n-1 where n is number of intervals
  368. * @return float|null
  369. */
  370. protected function _getQuantile($quantileNumber)
  371. {
  372. if ($quantileNumber < 1 || $quantileNumber >= $this->getIntervalsNumber()) {
  373. return 0;
  374. }
  375. return $quantileNumber * $this->_count / $this->getIntervalsNumber() - .5;
  376. }
  377. /**
  378. * Find max rounding factor with given value range
  379. *
  380. * @param float $lowerValue
  381. * @param float $upperValue
  382. * @param bool $returnEmpty whether empty result is acceptable
  383. * @param null|float $roundingFactor if given, checks for range to contain the factor
  384. * @return false|array
  385. * @SuppressWarnings(PHPMD.CyclomaticComplexity)
  386. * @SuppressWarnings(PHPMD.NPathComplexity)
  387. */
  388. protected function _findRoundValue($lowerValue, $upperValue, $returnEmpty = true, $roundingFactor = null)
  389. {
  390. $lowerValue = round($lowerValue, 3);
  391. $upperValue = round($upperValue, 3);
  392. if ($roundingFactor !== null) {
  393. // Can't separate if values are equal
  394. if ($lowerValue >= $upperValue) {
  395. if ($lowerValue > $upperValue || $returnEmpty) {
  396. return false;
  397. }
  398. }
  399. // round is used for such examples: (1194.32 / 0.02) or (5 / 100000)
  400. $lowerDivision = ceil(round($lowerValue / $roundingFactor, self::TEN_POWER_ROUNDING_FACTOR + 3));
  401. $upperDivision = floor(round($upperValue / $roundingFactor, self::TEN_POWER_ROUNDING_FACTOR + 3));
  402. $result = [];
  403. if ($upperDivision <= 0 || $upperDivision - $lowerDivision > 10) {
  404. return $result;
  405. }
  406. for ($i = $lowerDivision; $i <= $upperDivision; ++$i) {
  407. $result[] = round($i * $roundingFactor, 2);
  408. }
  409. return $result;
  410. }
  411. $result = [];
  412. $tenPower = pow(10, self::TEN_POWER_ROUNDING_FACTOR);
  413. $roundingFactorCoefficients = [10, 5, 2];
  414. while ($tenPower >= self::MIN_POSSIBLE_VALUE) {
  415. if ($tenPower == self::MIN_POSSIBLE_VALUE) {
  416. $roundingFactorCoefficients[] = 1;
  417. }
  418. foreach ($roundingFactorCoefficients as $roundingFactorCoefficient) {
  419. $roundingFactorCoefficient *= $tenPower;
  420. $roundValues = $this->_findRoundValue(
  421. $lowerValue,
  422. $upperValue,
  423. $returnEmpty,
  424. $roundingFactorCoefficient
  425. );
  426. if ($roundValues) {
  427. $index = round(
  428. $roundingFactorCoefficient /
  429. self::MIN_POSSIBLE_VALUE
  430. );
  431. $result[$index] = $roundValues;
  432. }
  433. }
  434. $tenPower /= 10;
  435. }
  436. return empty($result) ? [1 => []] : $result;
  437. }
  438. /**
  439. * Merge new round values with old ones
  440. *
  441. * @param array &$oldRoundValues
  442. * @param array &$newRoundValues
  443. * @return void
  444. */
  445. protected function _mergeRoundValues(&$oldRoundValues, &$newRoundValues)
  446. {
  447. foreach ($newRoundValues as $roundingFactor => $roundValueValues) {
  448. if (array_key_exists($roundingFactor, $oldRoundValues)) {
  449. $oldRoundValues[$roundingFactor] = array_unique(
  450. array_merge($oldRoundValues[$roundingFactor], $roundValueValues)
  451. );
  452. } else {
  453. $oldRoundValues[$roundingFactor] = $roundValueValues;
  454. }
  455. }
  456. }
  457. /**
  458. * Get intervals number with checking skipped quantiles
  459. *
  460. * @return int
  461. */
  462. protected function _getCalculatedIntervalsNumber()
  463. {
  464. return max(1, $this->getIntervalsNumber() - count($this->_skippedQuantilesUpperLimits));
  465. }
  466. /**
  467. * Get separator nearest to quantile among the separators
  468. *
  469. * @param int $quantileNumber
  470. * @param array $separators
  471. * @return array|false [deflection, separatorValue, $valueIndex]
  472. */
  473. protected function _findBestSeparator($quantileNumber, $separators)
  474. {
  475. $result = false;
  476. $i = 0;
  477. $valuesCount = count($this->_values);
  478. while ($i < $valuesCount && !empty($separators)) {
  479. $i = $this->_binarySearch($separators[0], [$i]);
  480. if ($i == -1) {
  481. break;
  482. }
  483. $separator = array_shift($separators);
  484. $deflection = abs(
  485. $quantileNumber * $this->_count -
  486. ($this->_quantileInterval[0] +
  487. $i) * $this->_getCalculatedIntervalsNumber()
  488. );
  489. if (!$result || $deflection < $result[0]) {
  490. $result = [$deflection, $separator, $i];
  491. }
  492. }
  493. return $result ? $result : false;
  494. }
  495. /**
  496. * Search first index of value, that satisfy conditions to be 'greater or equal' than $value
  497. * Returns -1 if index was not found
  498. *
  499. * @param float $value
  500. * @param null|float[] $limits search [from, to]
  501. * @return int
  502. * @SuppressWarnings(PHPMD.CyclomaticComplexity)
  503. * @SuppressWarnings(PHPMD.NPathComplexity)
  504. */
  505. protected function _binarySearch($value, $limits = null)
  506. {
  507. if (empty($this->_values)) {
  508. return -1;
  509. }
  510. if (!is_array($limits)) {
  511. $limits = [];
  512. }
  513. if (!isset($limits[0])) {
  514. $limits[0] = 0;
  515. }
  516. if (!isset($limits[1])) {
  517. $limits[1] = count($this->_values) - 1;
  518. }
  519. if ($limits[0] > $limits[1] || $this->_values[$limits[1]] < $value) {
  520. return -1;
  521. }
  522. if ($limits[1] - $limits[0] <= 1) {
  523. return $this->_values[$limits[0]] < $value ? $limits[1] : $limits[0];
  524. }
  525. $separator = floor(($limits[0] + $limits[1]) / 2);
  526. if ($this->_values[$separator] < $value) {
  527. $limits[0] = $separator + 1;
  528. } else {
  529. $limits[1] = $separator;
  530. }
  531. return $this->_binarySearch($value, [$limits[0], $limits[1]]);
  532. }
  533. }