| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 | <?php/** * Removes all unrecognized tags from the list of tokens. * * This strategy iterates through all the tokens and removes unrecognized * tokens. If a token is not recognized but a TagTransform is defined for * that element, the element will be transformed accordingly. */class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy{    /**     * @param HTMLPurifier_Token[] $tokens     * @param HTMLPurifier_Config $config     * @param HTMLPurifier_Context $context     * @return array|HTMLPurifier_Token[]     */    public function execute($tokens, $config, $context)    {        $definition = $config->getHTMLDefinition();        $generator = new HTMLPurifier_Generator($config, $context);        $result = array();        $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');        $remove_invalid_img = $config->get('Core.RemoveInvalidImg');        // currently only used to determine if comments should be kept        $trusted = $config->get('HTML.Trusted');        $comment_lookup = $config->get('HTML.AllowedComments');        $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');        $check_comments = $comment_lookup !== array() || $comment_regexp !== null;        $remove_script_contents = $config->get('Core.RemoveScriptContents');        $hidden_elements = $config->get('Core.HiddenElements');        // remove script contents compatibility        if ($remove_script_contents === true) {            $hidden_elements['script'] = true;        } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {            unset($hidden_elements['script']);        }        $attr_validator = new HTMLPurifier_AttrValidator();        // removes tokens until it reaches a closing tag with its value        $remove_until = false;        // converts comments into text tokens when this is equal to a tag name        $textify_comments = false;        $token = false;        $context->register('CurrentToken', $token);        $e = false;        if ($config->get('Core.CollectErrors')) {            $e =& $context->get('ErrorCollector');        }        foreach ($tokens as $token) {            if ($remove_until) {                if (empty($token->is_tag) || $token->name !== $remove_until) {                    continue;                }            }            if (!empty($token->is_tag)) {                // DEFINITION CALL                // before any processing, try to transform the element                if (isset($definition->info_tag_transform[$token->name])) {                    $original_name = $token->name;                    // there is a transformation for this tag                    // DEFINITION CALL                    $token = $definition->                        info_tag_transform[$token->name]->transform($token, $config, $context);                    if ($e) {                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);                    }                }                if (isset($definition->info[$token->name])) {                    // mostly everything's good, but                    // we need to make sure required attributes are in order                    if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&                        $definition->info[$token->name]->required_attr &&                        ($token->name != 'img' || $remove_invalid_img) // ensure config option still works                    ) {                        $attr_validator->validateToken($token, $config, $context);                        $ok = true;                        foreach ($definition->info[$token->name]->required_attr as $name) {                            if (!isset($token->attr[$name])) {                                $ok = false;                                break;                            }                        }                        if (!$ok) {                            if ($e) {                                $e->send(                                    E_ERROR,                                    'Strategy_RemoveForeignElements: Missing required attribute',                                    $name                                );                            }                            continue;                        }                        $token->armor['ValidateAttributes'] = true;                    }                    if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {                        $textify_comments = $token->name;                    } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {                        $textify_comments = false;                    }                } elseif ($escape_invalid_tags) {                    // invalid tag, generate HTML representation and insert in                    if ($e) {                        $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');                    }                    $token = new HTMLPurifier_Token_Text(                        $generator->generateFromToken($token)                    );                } else {                    // check if we need to destroy all of the tag's children                    // CAN BE GENERICIZED                    if (isset($hidden_elements[$token->name])) {                        if ($token instanceof HTMLPurifier_Token_Start) {                            $remove_until = $token->name;                        } elseif ($token instanceof HTMLPurifier_Token_Empty) {                            // do nothing: we're still looking                        } else {                            $remove_until = false;                        }                        if ($e) {                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');                        }                    } else {                        if ($e) {                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');                        }                    }                    continue;                }            } elseif ($token instanceof HTMLPurifier_Token_Comment) {                // textify comments in script tags when they are allowed                if ($textify_comments !== false) {                    $data = $token->data;                    $token = new HTMLPurifier_Token_Text($data);                } elseif ($trusted || $check_comments) {                    // always cleanup comments                    $trailing_hyphen = false;                    if ($e) {                        // perform check whether or not there's a trailing hyphen                        if (substr($token->data, -1) == '-') {                            $trailing_hyphen = true;                        }                    }                    $token->data = rtrim($token->data, '-');                    $found_double_hyphen = false;                    while (strpos($token->data, '--') !== false) {                        $found_double_hyphen = true;                        $token->data = str_replace('--', '-', $token->data);                    }                    if ($trusted || !empty($comment_lookup[trim($token->data)]) ||                        ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {                        // OK good                        if ($e) {                            if ($trailing_hyphen) {                                $e->send(                                    E_NOTICE,                                    'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'                                );                            }                            if ($found_double_hyphen) {                                $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');                            }                        }                    } else {                        if ($e) {                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');                        }                        continue;                    }                } else {                    // strip comments                    if ($e) {                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');                    }                    continue;                }            } elseif ($token instanceof HTMLPurifier_Token_Text) {            } else {                continue;            }            $result[] = $token;        }        if ($remove_until && $e) {            // we removed tokens until the end, throw error            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);        }        $context->destroy('CurrentToken');        return $result;    }}// vim: et sw=4 sts=4
 |