| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623 | <?phpnamespace PhpOffice\PhpSpreadsheet\Calculation;/** * PARTLY BASED ON: * Copyright (c) 2007 E. W. Bachtal, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy of this software * and associated documentation files (the "Software"), to deal in the Software without restriction, * including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all copies or substantial * portions of the Software. * * The software is provided "as is", without warranty of any kind, express or implied, including but not * limited to the warranties of merchantability, fitness for a particular purpose and noninfringement. In * no event shall the authors or copyright holders be liable for any claim, damages or other liability, * whether in an action of contract, tort or otherwise, arising from, out of or in connection with the * software or the use or other dealings in the software. * * https://ewbi.blogs.com/develops/2007/03/excel_formula_p.html * https://ewbi.blogs.com/develops/2004/12/excel_formula_p.html */class FormulaParser{    // Character constants    const QUOTE_DOUBLE = '"';    const QUOTE_SINGLE = '\'';    const BRACKET_CLOSE = ']';    const BRACKET_OPEN = '[';    const BRACE_OPEN = '{';    const BRACE_CLOSE = '}';    const PAREN_OPEN = '(';    const PAREN_CLOSE = ')';    const SEMICOLON = ';';    const WHITESPACE = ' ';    const COMMA = ',';    const ERROR_START = '#';    const OPERATORS_SN = '+-';    const OPERATORS_INFIX = '+-*/^&=><';    const OPERATORS_POSTFIX = '%';    /**     * Formula.     *     * @var string     */    private $formula;    /**     * Tokens.     *     * @var FormulaToken[]     */    private $tokens = [];    /**     * Create a new FormulaParser.     *     * @param string $pFormula Formula to parse     *     * @throws Exception     */    public function __construct($pFormula = '')    {        // Check parameters        if ($pFormula === null) {            throw new Exception('Invalid parameter passed: formula');        }        // Initialise values        $this->formula = trim($pFormula);        // Parse!        $this->parseToTokens();    }    /**     * Get Formula.     *     * @return string     */    public function getFormula()    {        return $this->formula;    }    /**     * Get Token.     *     * @param int $pId Token id     *     * @throws Exception     *     * @return string     */    public function getToken($pId = 0)    {        if (isset($this->tokens[$pId])) {            return $this->tokens[$pId];        }        throw new Exception("Token with id $pId does not exist.");    }    /**     * Get Token count.     *     * @return int     */    public function getTokenCount()    {        return count($this->tokens);    }    /**     * Get Tokens.     *     * @return FormulaToken[]     */    public function getTokens()    {        return $this->tokens;    }    /**     * Parse to tokens.     */    private function parseToTokens()    {        // No attempt is made to verify formulas; assumes formulas are derived from Excel, where        // they can only exist if valid; stack overflows/underflows sunk as nulls without exceptions.        // Check if the formula has a valid starting =        $formulaLength = strlen($this->formula);        if ($formulaLength < 2 || $this->formula[0] != '=') {            return;        }        // Helper variables        $tokens1 = $tokens2 = $stack = [];        $inString = $inPath = $inRange = $inError = false;        $token = $previousToken = $nextToken = null;        $index = 1;        $value = '';        $ERRORS = ['#NULL!', '#DIV/0!', '#VALUE!', '#REF!', '#NAME?', '#NUM!', '#N/A'];        $COMPARATORS_MULTI = ['>=', '<=', '<>'];        while ($index < $formulaLength) {            // state-dependent character evaluation (order is important)            // double-quoted strings            // embeds are doubled            // end marks token            if ($inString) {                if ($this->formula[$index] == self::QUOTE_DOUBLE) {                    if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_DOUBLE)) {                        $value .= self::QUOTE_DOUBLE;                        ++$index;                    } else {                        $inString = false;                        $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_TEXT);                        $value = '';                    }                } else {                    $value .= $this->formula[$index];                }                ++$index;                continue;            }            // single-quoted strings (links)            // embeds are double            // end does not mark a token            if ($inPath) {                if ($this->formula[$index] == self::QUOTE_SINGLE) {                    if ((($index + 2) <= $formulaLength) && ($this->formula[$index + 1] == self::QUOTE_SINGLE)) {                        $value .= self::QUOTE_SINGLE;                        ++$index;                    } else {                        $inPath = false;                    }                } else {                    $value .= $this->formula[$index];                }                ++$index;                continue;            }            // bracked strings (R1C1 range index or linked workbook name)            // no embeds (changed to "()" by Excel)            // end does not mark a token            if ($inRange) {                if ($this->formula[$index] == self::BRACKET_CLOSE) {                    $inRange = false;                }                $value .= $this->formula[$index];                ++$index;                continue;            }            // error values            // end marks a token, determined from absolute list of values            if ($inError) {                $value .= $this->formula[$index];                ++$index;                if (in_array($value, $ERRORS)) {                    $inError = false;                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND, FormulaToken::TOKEN_SUBTYPE_ERROR);                    $value = '';                }                continue;            }            // scientific notation check            if (strpos(self::OPERATORS_SN, $this->formula[$index]) !== false) {                if (strlen($value) > 1) {                    if (preg_match('/^[1-9]{1}(\\.\\d+)?E{1}$/', $this->formula[$index]) != 0) {                        $value .= $this->formula[$index];                        ++$index;                        continue;                    }                }            }            // independent character evaluation (order not important)            // establish state-dependent character evaluations            if ($this->formula[$index] == self::QUOTE_DOUBLE) {                if (strlen($value) > 0) {                    // unexpected                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);                    $value = '';                }                $inString = true;                ++$index;                continue;            }            if ($this->formula[$index] == self::QUOTE_SINGLE) {                if (strlen($value) > 0) {                    // unexpected                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);                    $value = '';                }                $inPath = true;                ++$index;                continue;            }            if ($this->formula[$index] == self::BRACKET_OPEN) {                $inRange = true;                $value .= self::BRACKET_OPEN;                ++$index;                continue;            }            if ($this->formula[$index] == self::ERROR_START) {                if (strlen($value) > 0) {                    // unexpected                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);                    $value = '';                }                $inError = true;                $value .= self::ERROR_START;                ++$index;                continue;            }            // mark start and end of arrays and array rows            if ($this->formula[$index] == self::BRACE_OPEN) {                if (strlen($value) > 0) {                    // unexpected                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_UNKNOWN);                    $value = '';                }                $tmp = new FormulaToken('ARRAY', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START);                $tokens1[] = $tmp;                $stack[] = clone $tmp;                $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START);                $tokens1[] = $tmp;                $stack[] = clone $tmp;                ++$index;                continue;            }            if ($this->formula[$index] == self::SEMICOLON) {                if (strlen($value) > 0) {                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);                    $value = '';                }                $tmp = array_pop($stack);                $tmp->setValue('');                $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);                $tokens1[] = $tmp;                $tmp = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT);                $tokens1[] = $tmp;                $tmp = new FormulaToken('ARRAYROW', FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START);                $tokens1[] = $tmp;                $stack[] = clone $tmp;                ++$index;                continue;            }            if ($this->formula[$index] == self::BRACE_CLOSE) {                if (strlen($value) > 0) {                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);                    $value = '';                }                $tmp = array_pop($stack);                $tmp->setValue('');                $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);                $tokens1[] = $tmp;                $tmp = array_pop($stack);                $tmp->setValue('');                $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);                $tokens1[] = $tmp;                ++$index;                continue;            }            // trim white-space            if ($this->formula[$index] == self::WHITESPACE) {                if (strlen($value) > 0) {                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);                    $value = '';                }                $tokens1[] = new FormulaToken('', FormulaToken::TOKEN_TYPE_WHITESPACE);                ++$index;                while (($this->formula[$index] == self::WHITESPACE) && ($index < $formulaLength)) {                    ++$index;                }                continue;            }            // multi-character comparators            if (($index + 2) <= $formulaLength) {                if (in_array(substr($this->formula, $index, 2), $COMPARATORS_MULTI)) {                    if (strlen($value) > 0) {                        $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);                        $value = '';                    }                    $tokens1[] = new FormulaToken(substr($this->formula, $index, 2), FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_LOGICAL);                    $index += 2;                    continue;                }            }            // standard infix operators            if (strpos(self::OPERATORS_INFIX, $this->formula[$index]) !== false) {                if (strlen($value) > 0) {                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);                    $value = '';                }                $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORINFIX);                ++$index;                continue;            }            // standard postfix operators (only one)            if (strpos(self::OPERATORS_POSTFIX, $this->formula[$index]) !== false) {                if (strlen($value) > 0) {                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);                    $value = '';                }                $tokens1[] = new FormulaToken($this->formula[$index], FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX);                ++$index;                continue;            }            // start subexpression or function            if ($this->formula[$index] == self::PAREN_OPEN) {                if (strlen($value) > 0) {                    $tmp = new FormulaToken($value, FormulaToken::TOKEN_TYPE_FUNCTION, FormulaToken::TOKEN_SUBTYPE_START);                    $tokens1[] = $tmp;                    $stack[] = clone $tmp;                    $value = '';                } else {                    $tmp = new FormulaToken('', FormulaToken::TOKEN_TYPE_SUBEXPRESSION, FormulaToken::TOKEN_SUBTYPE_START);                    $tokens1[] = $tmp;                    $stack[] = clone $tmp;                }                ++$index;                continue;            }            // function, subexpression, or array parameters, or operand unions            if ($this->formula[$index] == self::COMMA) {                if (strlen($value) > 0) {                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);                    $value = '';                }                $tmp = array_pop($stack);                $tmp->setValue('');                $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);                $stack[] = $tmp;                if ($tmp->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) {                    $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_UNION);                } else {                    $tokens1[] = new FormulaToken(',', FormulaToken::TOKEN_TYPE_ARGUMENT);                }                ++$index;                continue;            }            // stop subexpression            if ($this->formula[$index] == self::PAREN_CLOSE) {                if (strlen($value) > 0) {                    $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);                    $value = '';                }                $tmp = array_pop($stack);                $tmp->setValue('');                $tmp->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_STOP);                $tokens1[] = $tmp;                ++$index;                continue;            }            // token accumulation            $value .= $this->formula[$index];            ++$index;        }        // dump remaining accumulation        if (strlen($value) > 0) {            $tokens1[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERAND);        }        // move tokenList to new set, excluding unnecessary white-space tokens and converting necessary ones to intersections        $tokenCount = count($tokens1);        for ($i = 0; $i < $tokenCount; ++$i) {            $token = $tokens1[$i];            if (isset($tokens1[$i - 1])) {                $previousToken = $tokens1[$i - 1];            } else {                $previousToken = null;            }            if (isset($tokens1[$i + 1])) {                $nextToken = $tokens1[$i + 1];            } else {                $nextToken = null;            }            if ($token === null) {                continue;            }            if ($token->getTokenType() != FormulaToken::TOKEN_TYPE_WHITESPACE) {                $tokens2[] = $token;                continue;            }            if ($previousToken === null) {                continue;            }            if (!(                (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||                (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||                ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)            )) {                continue;            }            if ($nextToken === null) {                continue;            }            if (!(                (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) ||                (($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) && ($nextToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_START)) ||                ($nextToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)            )) {                continue;            }            $tokens2[] = new FormulaToken($value, FormulaToken::TOKEN_TYPE_OPERATORINFIX, FormulaToken::TOKEN_SUBTYPE_INTERSECTION);        }        // move tokens to final list, switching infix "-" operators to prefix when appropriate, switching infix "+" operators        // to noop when appropriate, identifying operand and infix-operator subtypes, and pulling "@" from function names        $this->tokens = [];        $tokenCount = count($tokens2);        for ($i = 0; $i < $tokenCount; ++$i) {            $token = $tokens2[$i];            if (isset($tokens2[$i - 1])) {                $previousToken = $tokens2[$i - 1];            } else {                $previousToken = null;            }            if (isset($tokens2[$i + 1])) {                $nextToken = $tokens2[$i + 1];            } else {                $nextToken = null;            }            if ($token === null) {                continue;            }            if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '-') {                if ($i == 0) {                    $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX);                } elseif ((($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) &&                        ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||                    (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) &&                        ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||                    ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) ||                    ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)) {                    $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH);                } else {                    $token->setTokenType(FormulaToken::TOKEN_TYPE_OPERATORPREFIX);                }                $this->tokens[] = $token;                continue;            }            if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX && $token->getValue() == '+') {                if ($i == 0) {                    continue;                } elseif ((($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) &&                        ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||                    (($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_SUBEXPRESSION) &&                        ($previousToken->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_STOP)) ||                    ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORPOSTFIX) ||                    ($previousToken->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND)) {                    $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH);                } else {                    continue;                }                $this->tokens[] = $token;                continue;            }            if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERATORINFIX &&                $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING) {                if (strpos('<>=', substr($token->getValue(), 0, 1)) !== false) {                    $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL);                } elseif ($token->getValue() == '&') {                    $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_CONCATENATION);                } else {                    $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_MATH);                }                $this->tokens[] = $token;                continue;            }            if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_OPERAND &&                $token->getTokenSubType() == FormulaToken::TOKEN_SUBTYPE_NOTHING) {                if (!is_numeric($token->getValue())) {                    if (strtoupper($token->getValue()) == 'TRUE' || strtoupper($token->getValue()) == 'FALSE') {                        $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_LOGICAL);                    } else {                        $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_RANGE);                    }                } else {                    $token->setTokenSubType(FormulaToken::TOKEN_SUBTYPE_NUMBER);                }                $this->tokens[] = $token;                continue;            }            if ($token->getTokenType() == FormulaToken::TOKEN_TYPE_FUNCTION) {                if (strlen($token->getValue()) > 0) {                    if (substr($token->getValue(), 0, 1) == '@') {                        $token->setValue(substr($token->getValue(), 1));                    }                }            }            $this->tokens[] = $token;        }    }}
 |