Minor dependency updates
[yaffs-website] / vendor / ezyang / htmlpurifier / library / HTMLPurifier / Strategy / RemoveForeignElements.php
1 <?php
2
3 /**
4  * Removes all unrecognized tags from the list of tokens.
5  *
6  * This strategy iterates through all the tokens and removes unrecognized
7  * tokens. If a token is not recognized but a TagTransform is defined for
8  * that element, the element will be transformed accordingly.
9  */
10
11 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
12 {
13
14     /**
15      * @param HTMLPurifier_Token[] $tokens
16      * @param HTMLPurifier_Config $config
17      * @param HTMLPurifier_Context $context
18      * @return array|HTMLPurifier_Token[]
19      */
20     public function execute($tokens, $config, $context)
21     {
22         $definition = $config->getHTMLDefinition();
23         $generator = new HTMLPurifier_Generator($config, $context);
24         $result = array();
25
26         $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
27         $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
28
29         // currently only used to determine if comments should be kept
30         $trusted = $config->get('HTML.Trusted');
31         $comment_lookup = $config->get('HTML.AllowedComments');
32         $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
33         $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
34
35         $remove_script_contents = $config->get('Core.RemoveScriptContents');
36         $hidden_elements = $config->get('Core.HiddenElements');
37
38         // remove script contents compatibility
39         if ($remove_script_contents === true) {
40             $hidden_elements['script'] = true;
41         } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
42             unset($hidden_elements['script']);
43         }
44
45         $attr_validator = new HTMLPurifier_AttrValidator();
46
47         // removes tokens until it reaches a closing tag with its value
48         $remove_until = false;
49
50         // converts comments into text tokens when this is equal to a tag name
51         $textify_comments = false;
52
53         $token = false;
54         $context->register('CurrentToken', $token);
55
56         $e = false;
57         if ($config->get('Core.CollectErrors')) {
58             $e =& $context->get('ErrorCollector');
59         }
60
61         foreach ($tokens as $token) {
62             if ($remove_until) {
63                 if (empty($token->is_tag) || $token->name !== $remove_until) {
64                     continue;
65                 }
66             }
67             if (!empty($token->is_tag)) {
68                 // DEFINITION CALL
69
70                 // before any processing, try to transform the element
71                 if (isset($definition->info_tag_transform[$token->name])) {
72                     $original_name = $token->name;
73                     // there is a transformation for this tag
74                     // DEFINITION CALL
75                     $token = $definition->
76                         info_tag_transform[$token->name]->transform($token, $config, $context);
77                     if ($e) {
78                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
79                     }
80                 }
81
82                 if (isset($definition->info[$token->name])) {
83                     // mostly everything's good, but
84                     // we need to make sure required attributes are in order
85                     if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
86                         $definition->info[$token->name]->required_attr &&
87                         ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
88                     ) {
89                         $attr_validator->validateToken($token, $config, $context);
90                         $ok = true;
91                         foreach ($definition->info[$token->name]->required_attr as $name) {
92                             if (!isset($token->attr[$name])) {
93                                 $ok = false;
94                                 break;
95                             }
96                         }
97                         if (!$ok) {
98                             if ($e) {
99                                 $e->send(
100                                     E_ERROR,
101                                     'Strategy_RemoveForeignElements: Missing required attribute',
102                                     $name
103                                 );
104                             }
105                             continue;
106                         }
107                         $token->armor['ValidateAttributes'] = true;
108                     }
109
110                     if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
111                         $textify_comments = $token->name;
112                     } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
113                         $textify_comments = false;
114                     }
115
116                 } elseif ($escape_invalid_tags) {
117                     // invalid tag, generate HTML representation and insert in
118                     if ($e) {
119                         $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
120                     }
121                     $token = new HTMLPurifier_Token_Text(
122                         $generator->generateFromToken($token)
123                     );
124                 } else {
125                     // check if we need to destroy all of the tag's children
126                     // CAN BE GENERICIZED
127                     if (isset($hidden_elements[$token->name])) {
128                         if ($token instanceof HTMLPurifier_Token_Start) {
129                             $remove_until = $token->name;
130                         } elseif ($token instanceof HTMLPurifier_Token_Empty) {
131                             // do nothing: we're still looking
132                         } else {
133                             $remove_until = false;
134                         }
135                         if ($e) {
136                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
137                         }
138                     } else {
139                         if ($e) {
140                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
141                         }
142                     }
143                     continue;
144                 }
145             } elseif ($token instanceof HTMLPurifier_Token_Comment) {
146                 // textify comments in script tags when they are allowed
147                 if ($textify_comments !== false) {
148                     $data = $token->data;
149                     $token = new HTMLPurifier_Token_Text($data);
150                 } elseif ($trusted || $check_comments) {
151                     // always cleanup comments
152                     $trailing_hyphen = false;
153                     if ($e) {
154                         // perform check whether or not there's a trailing hyphen
155                         if (substr($token->data, -1) == '-') {
156                             $trailing_hyphen = true;
157                         }
158                     }
159                     $token->data = rtrim($token->data, '-');
160                     $found_double_hyphen = false;
161                     while (strpos($token->data, '--') !== false) {
162                         $found_double_hyphen = true;
163                         $token->data = str_replace('--', '-', $token->data);
164                     }
165                     if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
166                         ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
167                         // OK good
168                         if ($e) {
169                             if ($trailing_hyphen) {
170                                 $e->send(
171                                     E_NOTICE,
172                                     'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
173                                 );
174                             }
175                             if ($found_double_hyphen) {
176                                 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
177                             }
178                         }
179                     } else {
180                         if ($e) {
181                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
182                         }
183                         continue;
184                     }
185                 } else {
186                     // strip comments
187                     if ($e) {
188                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
189                     }
190                     continue;
191                 }
192             } elseif ($token instanceof HTMLPurifier_Token_Text) {
193             } else {
194                 continue;
195             }
196             $result[] = $token;
197         }
198         if ($remove_until && $e) {
199             // we removed tokens until the end, throw error
200             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
201         }
202         $context->destroy('CurrentToken');
203         return $result;
204     }
205 }
206
207 // vim: et sw=4 sts=4