]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/htmlpurifier/HTMLPurifier/Strategy/RemoveForeignElements.php
[add] HTML Purifier added to clean code
[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / Strategy / RemoveForeignElements.php
1 <?php
2
3 /**
4 * Removes all unrecognized tags from the list of tokens.
5 *
6 * This strategy iterates through all the tokens and removes unrecognized
7 * tokens. If a token is not recognized but a TagTransform is defined for
8 * that element, the element will be transformed accordingly.
9 */
10
11 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
12 {
13
14 /**
15 * @param HTMLPurifier_Token[] $tokens
16 * @param HTMLPurifier_Config $config
17 * @param HTMLPurifier_Context $context
18 * @return array|HTMLPurifier_Token[]
19 */
20 public function execute($tokens, $config, $context)
21 {
22 $definition = $config->getHTMLDefinition();
23 $generator = new HTMLPurifier_Generator($config, $context);
24 $result = array();
25
26 $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
27 $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
28
29 // currently only used to determine if comments should be kept
30 $trusted = $config->get('HTML.Trusted');
31 $comment_lookup = $config->get('HTML.AllowedComments');
32 $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
33 $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
34
35 $remove_script_contents = $config->get('Core.RemoveScriptContents');
36 $hidden_elements = $config->get('Core.HiddenElements');
37
38 // remove script contents compatibility
39 if ($remove_script_contents === true) {
40 $hidden_elements['script'] = true;
41 } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
42 unset($hidden_elements['script']);
43 }
44
45 $attr_validator = new HTMLPurifier_AttrValidator();
46
47 // removes tokens until it reaches a closing tag with its value
48 $remove_until = false;
49
50 // converts comments into text tokens when this is equal to a tag name
51 $textify_comments = false;
52
53 $token = false;
54 $context->register('CurrentToken', $token);
55
56 $e = false;
57 if ($config->get('Core.CollectErrors')) {
58 $e =& $context->get('ErrorCollector');
59 }
60
61 foreach ($tokens as $token) {
62 if ($remove_until) {
63 if (empty($token->is_tag) || $token->name !== $remove_until) {
64 continue;
65 }
66 }
67 if (!empty($token->is_tag)) {
68 // DEFINITION CALL
69
70 // before any processing, try to transform the element
71 if (isset($definition->info_tag_transform[$token->name])) {
72 $original_name = $token->name;
73 // there is a transformation for this tag
74 // DEFINITION CALL
75 $token = $definition->
76 info_tag_transform[$token->name]->transform($token, $config, $context);
77 if ($e) {
78 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
79 }
80 }
81
82 if (isset($definition->info[$token->name])) {
83 // mostly everything's good, but
84 // we need to make sure required attributes are in order
85 if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
86 $definition->info[$token->name]->required_attr &&
87 ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
88 ) {
89 $attr_validator->validateToken($token, $config, $context);
90 $ok = true;
91 foreach ($definition->info[$token->name]->required_attr as $name) {
92 if (!isset($token->attr[$name])) {
93 $ok = false;
94 break;
95 }
96 }
97 if (!$ok) {
98 if ($e) {
99 $e->send(
100 E_ERROR,
101 'Strategy_RemoveForeignElements: Missing required attribute',
102 $name
103 );
104 }
105 continue;
106 }
107 $token->armor['ValidateAttributes'] = true;
108 }
109
110 if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
111 $textify_comments = $token->name;
112 } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
113 $textify_comments = false;
114 }
115
116 } elseif ($escape_invalid_tags) {
117 // invalid tag, generate HTML representation and insert in
118 if ($e) {
119 $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
120 }
121 $token = new HTMLPurifier_Token_Text(
122 $generator->generateFromToken($token)
123 );
124 } else {
125 // check if we need to destroy all of the tag's children
126 // CAN BE GENERICIZED
127 if (isset($hidden_elements[$token->name])) {
128 if ($token instanceof HTMLPurifier_Token_Start) {
129 $remove_until = $token->name;
130 } elseif ($token instanceof HTMLPurifier_Token_Empty) {
131 // do nothing: we're still looking
132 } else {
133 $remove_until = false;
134 }
135 if ($e) {
136 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
137 }
138 } else {
139 if ($e) {
140 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
141 }
142 }
143 continue;
144 }
145 } elseif ($token instanceof HTMLPurifier_Token_Comment) {
146 // textify comments in script tags when they are allowed
147 if ($textify_comments !== false) {
148 $data = $token->data;
149 $token = new HTMLPurifier_Token_Text($data);
150 } elseif ($trusted || $check_comments) {
151 // always cleanup comments
152 $trailing_hyphen = false;
153 if ($e) {
154 // perform check whether or not there's a trailing hyphen
155 if (substr($token->data, -1) == '-') {
156 $trailing_hyphen = true;
157 }
158 }
159 $token->data = rtrim($token->data, '-');
160 $found_double_hyphen = false;
161 while (strpos($token->data, '--') !== false) {
162 $found_double_hyphen = true;
163 $token->data = str_replace('--', '-', $token->data);
164 }
165 if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
166 ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
167 // OK good
168 if ($e) {
169 if ($trailing_hyphen) {
170 $e->send(
171 E_NOTICE,
172 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
173 );
174 }
175 if ($found_double_hyphen) {
176 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
177 }
178 }
179 } else {
180 if ($e) {
181 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
182 }
183 continue;
184 }
185 } else {
186 // strip comments
187 if ($e) {
188 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
189 }
190 continue;
191 }
192 } elseif ($token instanceof HTMLPurifier_Token_Text) {
193 } else {
194 continue;
195 }
196 $result[] = $token;
197 }
198 if ($remove_until && $e) {
199 // we removed tokens until the end, throw error
200 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
201 }
202 $context->destroy('CurrentToken');
203 return $result;
204 }
205 }
206
207 // vim: et sw=4 sts=4