123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596 |
- <?php
- /**
- * Copyright © Magento, Inc. All rights reserved.
- * See COPYING.txt for license details.
- */
- namespace Magento\Framework\Search\Dynamic;
- /**
- * Algorithm for layer value filter
- *
- * @author Magento Core Team <core@magentocommerce.com>
- * @api
- * @since 100.0.2
- */
- class Algorithm
- {
- /**
- * Minimal possible value
- */
- const MIN_POSSIBLE_VALUE = .01;
- /**
- * Rounding factor coefficient
- */
- const TEN_POWER_ROUNDING_FACTOR = 4;
- /**
- * Interval deflection coefficient
- */
- const INTERVAL_DEFLECTION_LIMIT = .3;
- /**
- * Standard normal distribution's a/2 quantile
- * Depends on predefined a. In case of a=0.05
- */
- const STANDARD_NORMAL_DISTRIBUTION = 1.96;
- /**
- * Min and Max number of intervals
- */
- const MIN_INTERVALS_NUMBER = 2;
- const MAX_INTERVALS_NUMBER = 10;
- /**
- * Upper values limit
- *
- * @var null|float
- */
- protected $_upperLimit = null;
- /**
- * Lower values limit
- *
- * @var null|float
- */
- protected $_lowerLimit = null;
- /**
- * Number of segmentation intervals
- *
- * @var null|int
- */
- protected $_intervalsNumber = null;
- /**
- * Upper limits of skipped quantiles
- *
- * @var array
- */
- protected $_skippedQuantilesUpperLimits = [];
- /**
- * Total count of values
- *
- * @var int
- */
- protected $_count = 0;
- /**
- * Current quantile interval
- *
- * @var array [from, to]
- */
- protected $_quantileInterval = [0, 0];
- /**
- * Values of current quantile
- *
- * @var array
- */
- protected $_values = [];
- /**
- * Max value
- *
- * @var float
- */
- protected $_maxValue = 0;
- /**
- * Min value
- *
- * @var float
- */
- protected $_minValue = 0;
- /**
- * Last value query limiter
- *
- * @var array [index, value]
- */
- protected $_lastValueLimiter = [null, 0];
- /**
- * Set lower and upper limit for algorithm
- *
- * @param null|float $lowerLimit
- * @param null|float $upperLimit
- * @return \Magento\Framework\Search\Dynamic\Algorithm
- */
- public function setLimits($lowerLimit = null, $upperLimit = null)
- {
- $this->_lowerLimit = empty($lowerLimit) ? null : (double)$lowerLimit;
- $this->_upperLimit = empty($upperLimit) ? null : (double)$upperLimit;
- return $this;
- }
- /**
- * Set statistics
- *
- * @param float $min
- * @param float $max
- * @param float $standardDeviation
- * @param int $count
- * @return $this
- */
- public function setStatistics($min, $max, $standardDeviation, $count)
- {
- $this->_count = $count;
- $this->_minValue = $min;
- $this->_maxValue = $max;
- $valueRange = $max - $min;
- if ($count < 2 || $valueRange <= 0) {
- //Same value couldn't be separated with several intervals
- $this->_intervalsNumber = 1;
- return $this;
- }
- if ($standardDeviation <= 0) {
- $intervalsNumber = pow(10, self::TEN_POWER_ROUNDING_FACTOR);
- } else {
- $intervalsNumber = $valueRange * pow($count, 1 / 3) / (3.5 * $standardDeviation);
- }
- $this->_intervalsNumber = max(ceil($intervalsNumber), self::MIN_INTERVALS_NUMBER);
- $this->_intervalsNumber = (int)min($this->_intervalsNumber, self::MAX_INTERVALS_NUMBER);
- return $this;
- }
- /**
- * Calculate separators, each contains 'from', 'to' and 'count'
- *
- * @param IntervalInterface $interval
- * @return array
- * @SuppressWarnings(PHPMD.CyclomaticComplexity)
- * @SuppressWarnings(PHPMD.NPathComplexity)
- */
- public function calculateSeparators(IntervalInterface $interval)
- {
- $result = [];
- $lastCount = 0;
- $intervalFirstValue = $this->_minValue;
- $lastSeparator = $this->_lowerLimit === null ? 0 : $this->_lowerLimit;
- for ($intervalNumber = 1; $intervalNumber < $this->getIntervalsNumber(); ++$intervalNumber) {
- $separator = $this->_findValueSeparator($intervalNumber, $interval);
- if (empty($separator)) {
- continue;
- }
- if ($this->_quantileInterval[0] == 0) {
- $intervalFirstValue = $this->_values[0];
- }
- $separatorCandidate = false;
- $newIntervalFirstValue = $intervalFirstValue;
- $newLastSeparator = $lastSeparator;
- $valuesPerInterval = $this->_count / $this->_getCalculatedIntervalsNumber();
- while (!empty($separator) && !array_key_exists($intervalNumber, $result)) {
- $separatorsPortion = array_shift($separator);
- $bestSeparator = $this->_findBestSeparator($intervalNumber, $separatorsPortion);
- if ($bestSeparator && $bestSeparator[2] > 0) {
- $isEqualValue = $intervalFirstValue ==
- $this->_values[$bestSeparator[2] - 1] ? $this->_values[0] : false;
- $count = $bestSeparator[2] + $this->_quantileInterval[0] - $lastCount;
- $separatorData = [
- 'from' => $isEqualValue !== false ? $isEqualValue : $lastSeparator,
- 'to' => $isEqualValue !== false ? $isEqualValue : $bestSeparator[1],
- 'count' => $count,
- ];
- if (abs(1 - $count / $valuesPerInterval) <= self::INTERVAL_DEFLECTION_LIMIT) {
- $newLastSeparator = $bestSeparator[1];
- $newIntervalFirstValue = $this->_values[$bestSeparator[2]];
- $result[$intervalNumber] = $separatorData;
- } elseif (!$separatorCandidate || $bestSeparator[0] < $separatorCandidate[0]) {
- $separatorCandidate = [
- $bestSeparator[0],
- $separatorData,
- $bestSeparator[1],
- $this->_values[$bestSeparator[2]],
- ];
- }
- }
- }
- if (!array_key_exists($intervalNumber, $result) && $separatorCandidate) {
- $newLastSeparator = $separatorCandidate[2];
- $newIntervalFirstValue = $separatorCandidate[3];
- $result[$intervalNumber] = $separatorCandidate[1];
- }
- if (array_key_exists($intervalNumber, $result)) {
- $lastSeparator = $newLastSeparator;
- $intervalFirstValue = $newIntervalFirstValue;
- $valueIndex = $this->_binarySearch($lastSeparator);
- $lastCount += $result[$intervalNumber]['count'];
- if ($valueIndex != -1 && $lastSeparator > $this->_lastValueLimiter[1]) {
- $this->_lastValueLimiter = [$valueIndex + $this->_quantileInterval[0], $lastSeparator];
- }
- }
- }
- if ($this->_lastValueLimiter[0] < $this->_count) {
- $isEqualValue = $intervalFirstValue == $this->_maxValue ? $intervalFirstValue : false;
- $result[$this->getIntervalsNumber()] = [
- 'from' => $isEqualValue ? $isEqualValue : $lastSeparator,
- 'to' => $isEqualValue ? $isEqualValue : ($this->_upperLimit === null ? '' : $this->_upperLimit),
- 'count' => $this->_count - $lastCount,
- ];
- }
- return array_values($result);
- }
- /**
- * Get amount of segmentation intervals
- *
- * @return int
- */
- protected function getIntervalsNumber()
- {
- if ($this->_intervalsNumber !== null) {
- return $this->_intervalsNumber;
- }
- return 1;
- }
- /**
- * Find value separator for the quantile
- *
- * @param int $quantileNumber should be from 1 to n-1 where n is number of intervals
- * @param IntervalInterface $interval
- * @return array|null
- * @SuppressWarnings(PHPMD.CyclomaticComplexity)
- * @SuppressWarnings(PHPMD.NPathComplexity)
- * @SuppressWarnings(PHPMD.ExcessiveMethodLength)
- */
- protected function _findValueSeparator($quantileNumber, IntervalInterface $interval)
- {
- if ($quantileNumber < 1 || $quantileNumber >= $this->getIntervalsNumber()) {
- return null;
- }
- $values = [];
- $quantileInterval = $this->_getQuantileInterval($quantileNumber);
- $intervalValuesCount = $quantileInterval[1] - $quantileInterval[0] + 1;
- $offset = $quantileInterval[0];
- if ($this->_lastValueLimiter[0] !== null) {
- $offset -= $this->_lastValueLimiter[0];
- }
- if ($offset < 0) {
- $intervalValuesCount += $offset;
- $values = array_slice(
- $this->_values,
- $this->_lastValueLimiter[0] + $offset - $this->_quantileInterval[0],
- -$offset
- );
- $offset = 0;
- }
- $lowerValue = $this->_lastValueLimiter[1];
- if ($this->_lowerLimit !== null) {
- $lowerValue = max($lowerValue, $this->_lowerLimit);
- }
- if ($intervalValuesCount >= 0) {
- $values = array_merge(
- $values,
- $interval->load($intervalValuesCount + 1, $offset, $lowerValue, $this->_upperLimit)
- );
- }
- $lastValue = $values[$intervalValuesCount - 1];
- $bestRoundValue = [];
- if ($lastValue == $values[0]) {
- if ($quantileNumber == 1 && $offset) {
- $additionalValues = $interval->loadPrevious($lastValue, $quantileInterval[0], $this->_lowerLimit);
- if ($additionalValues) {
- $quantileInterval[0] -= count($additionalValues);
- $values = array_merge($additionalValues, $values);
- $bestRoundValue = $this->_findRoundValue(
- $values[0] + self::MIN_POSSIBLE_VALUE / 10,
- $lastValue,
- false
- );
- }
- }
- if ($quantileNumber == $this->getIntervalsNumber() - 1) {
- $valuesCount = count($values);
- if ($values[$valuesCount - 1] > $lastValue) {
- $additionalValues = [$values[$valuesCount - 1]];
- } else {
- $additionalValues = $interval->loadNext(
- $lastValue,
- $this->_count - $quantileInterval[0] - count($values),
- $this->_upperLimit
- );
- }
- if ($additionalValues) {
- $quantileInterval[1] = $quantileInterval[0] + count($values) - 1;
- if ($values[$valuesCount - 1] <= $lastValue) {
- $quantileInterval[1] += count($additionalValues);
- $values = array_merge($values, $additionalValues);
- }
- $upperBestRoundValue = $this->_findRoundValue(
- $lastValue + self::MIN_POSSIBLE_VALUE / 10,
- $values[count($values) - 1],
- false
- );
- $this->_mergeRoundValues($bestRoundValue, $upperBestRoundValue);
- }
- }
- } else {
- $bestRoundValue = $this->_findRoundValue(
- $values[0] + self::MIN_POSSIBLE_VALUE / 10,
- $lastValue
- );
- }
- $this->_quantileInterval = $quantileInterval;
- $this->_values = $values;
- if (empty($bestRoundValue)) {
- $this->_skippedQuantilesUpperLimits[$quantileNumber] = $quantileInterval[1];
- return $bestRoundValue;
- }
- $valuesCount = count($values);
- if ($values[$valuesCount - 1] > $lastValue) {
- $this->_lastValueLimiter = [$quantileInterval[0] + $valuesCount - 1, $values[$valuesCount - 1]];
- }
- ksort($bestRoundValue, SORT_NUMERIC);
- foreach ($bestRoundValue as $index => &$bestRoundValueValues) {
- if (empty($bestRoundValueValues)) {
- unset($bestRoundValue[$index]);
- } else {
- sort($bestRoundValueValues);
- }
- }
- return array_reverse($bestRoundValue);
- }
- /**
- * Get quantile interval
- *
- * @param int $quantileNumber should be from 1 to n-1 where n is number of intervals
- * @return null|float[] [floatMin,floatMax]
- */
- protected function _getQuantileInterval($quantileNumber)
- {
- if ($quantileNumber < 1 || $quantileNumber >= $this->getIntervalsNumber()) {
- return null;
- }
- $quantile = $this->_getQuantile($quantileNumber);
- $deflectionLimit = floor($this->_count / 2 / $this->getIntervalsNumber());
- $limits = [
- min(floor($quantile - $deflectionLimit), floor($quantile)),
- max(ceil($quantile + $deflectionLimit - 1), ceil($quantile)),
- ];
- $sqrtParam = $this->_count * $quantileNumber * ($this->getIntervalsNumber() - $quantileNumber);
- $deflection = self::STANDARD_NORMAL_DISTRIBUTION * sqrt($sqrtParam) / $this->getIntervalsNumber();
- $left = max(floor($quantile - $deflection - 1), $limits[0], 0);
- if (array_key_exists($quantileNumber - 1, $this->_skippedQuantilesUpperLimits)
- && $left > $this->_skippedQuantilesUpperLimits[$quantileNumber - 1]
- ) {
- $left = $this->_skippedQuantilesUpperLimits[$quantileNumber - 1];
- }
- $right = min(ceil($quantile + $deflection), $limits[1], $this->_count - 1);
- return [$left, $right];
- }
- /**
- * Get quantile
- *
- * @param int $quantileNumber should be from 1 to n-1 where n is number of intervals
- * @return float|null
- */
- protected function _getQuantile($quantileNumber)
- {
- if ($quantileNumber < 1 || $quantileNumber >= $this->getIntervalsNumber()) {
- return 0;
- }
- return $quantileNumber * $this->_count / $this->getIntervalsNumber() - .5;
- }
- /**
- * Find max rounding factor with given value range
- *
- * @param float $lowerValue
- * @param float $upperValue
- * @param bool $returnEmpty whether empty result is acceptable
- * @param null|float $roundingFactor if given, checks for range to contain the factor
- * @return false|array
- * @SuppressWarnings(PHPMD.CyclomaticComplexity)
- * @SuppressWarnings(PHPMD.NPathComplexity)
- */
- protected function _findRoundValue($lowerValue, $upperValue, $returnEmpty = true, $roundingFactor = null)
- {
- $lowerValue = round($lowerValue, 3);
- $upperValue = round($upperValue, 3);
- if ($roundingFactor !== null) {
- // Can't separate if values are equal
- if ($lowerValue >= $upperValue) {
- if ($lowerValue > $upperValue || $returnEmpty) {
- return false;
- }
- }
- // round is used for such examples: (1194.32 / 0.02) or (5 / 100000)
- $lowerDivision = ceil(round($lowerValue / $roundingFactor, self::TEN_POWER_ROUNDING_FACTOR + 3));
- $upperDivision = floor(round($upperValue / $roundingFactor, self::TEN_POWER_ROUNDING_FACTOR + 3));
- $result = [];
- if ($upperDivision <= 0 || $upperDivision - $lowerDivision > 10) {
- return $result;
- }
- for ($i = $lowerDivision; $i <= $upperDivision; ++$i) {
- $result[] = round($i * $roundingFactor, 2);
- }
- return $result;
- }
- $result = [];
- $tenPower = pow(10, self::TEN_POWER_ROUNDING_FACTOR);
- $roundingFactorCoefficients = [10, 5, 2];
- while ($tenPower >= self::MIN_POSSIBLE_VALUE) {
- if ($tenPower == self::MIN_POSSIBLE_VALUE) {
- $roundingFactorCoefficients[] = 1;
- }
- foreach ($roundingFactorCoefficients as $roundingFactorCoefficient) {
- $roundingFactorCoefficient *= $tenPower;
- $roundValues = $this->_findRoundValue(
- $lowerValue,
- $upperValue,
- $returnEmpty,
- $roundingFactorCoefficient
- );
- if ($roundValues) {
- $index = round(
- $roundingFactorCoefficient /
- self::MIN_POSSIBLE_VALUE
- );
- $result[$index] = $roundValues;
- }
- }
- $tenPower /= 10;
- }
- return empty($result) ? [1 => []] : $result;
- }
- /**
- * Merge new round values with old ones
- *
- * @param array &$oldRoundValues
- * @param array &$newRoundValues
- * @return void
- */
- protected function _mergeRoundValues(&$oldRoundValues, &$newRoundValues)
- {
- foreach ($newRoundValues as $roundingFactor => $roundValueValues) {
- if (array_key_exists($roundingFactor, $oldRoundValues)) {
- $oldRoundValues[$roundingFactor] = array_unique(
- array_merge($oldRoundValues[$roundingFactor], $roundValueValues)
- );
- } else {
- $oldRoundValues[$roundingFactor] = $roundValueValues;
- }
- }
- }
- /**
- * Get intervals number with checking skipped quantiles
- *
- * @return int
- */
- protected function _getCalculatedIntervalsNumber()
- {
- return max(1, $this->getIntervalsNumber() - count($this->_skippedQuantilesUpperLimits));
- }
- /**
- * Get separator nearest to quantile among the separators
- *
- * @param int $quantileNumber
- * @param array $separators
- * @return array|false [deflection, separatorValue, $valueIndex]
- */
- protected function _findBestSeparator($quantileNumber, $separators)
- {
- $result = false;
- $i = 0;
- $valuesCount = count($this->_values);
- while ($i < $valuesCount && !empty($separators)) {
- $i = $this->_binarySearch($separators[0], [$i]);
- if ($i == -1) {
- break;
- }
- $separator = array_shift($separators);
- $deflection = abs(
- $quantileNumber * $this->_count -
- ($this->_quantileInterval[0] +
- $i) * $this->_getCalculatedIntervalsNumber()
- );
- if (!$result || $deflection < $result[0]) {
- $result = [$deflection, $separator, $i];
- }
- }
- return $result ? $result : false;
- }
- /**
- * Search first index of value, that satisfy conditions to be 'greater or equal' than $value
- * Returns -1 if index was not found
- *
- * @param float $value
- * @param null|float[] $limits search [from, to]
- * @return int
- * @SuppressWarnings(PHPMD.CyclomaticComplexity)
- * @SuppressWarnings(PHPMD.NPathComplexity)
- */
- protected function _binarySearch($value, $limits = null)
- {
- if (empty($this->_values)) {
- return -1;
- }
- if (!is_array($limits)) {
- $limits = [];
- }
- if (!isset($limits[0])) {
- $limits[0] = 0;
- }
- if (!isset($limits[1])) {
- $limits[1] = count($this->_values) - 1;
- }
- if ($limits[0] > $limits[1] || $this->_values[$limits[1]] < $value) {
- return -1;
- }
- if ($limits[1] - $limits[0] <= 1) {
- return $this->_values[$limits[0]] < $value ? $limits[1] : $limits[0];
- }
- $separator = floor(($limits[0] + $limits[1]) / 2);
- if ($this->_values[$separator] < $value) {
- $limits[0] = $separator + 1;
- } else {
- $limits[1] = $separator;
- }
- return $this->_binarySearch($value, [$limits[0], $limits[1]]);
- }
- }
|