]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/htmlpurifier/HTMLPurifier/AttrDef.php
[add] HTML Purifier added to clean code
[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / AttrDef.php
1 <?php
2
3 /**
4 * Base class for all validating attribute definitions.
5 *
6 * This family of classes forms the core for not only HTML attribute validation,
7 * but also any sort of string that needs to be validated or cleaned (which
8 * means CSS properties and composite definitions are defined here too).
9 * Besides defining (through code) what precisely makes the string valid,
10 * subclasses are also responsible for cleaning the code if possible.
11 */
12
13 abstract class HTMLPurifier_AttrDef
14 {
15
16 /**
17 * Tells us whether or not an HTML attribute is minimized.
18 * Has no meaning in other contexts.
19 * @type bool
20 */
21 public $minimized = false;
22
23 /**
24 * Tells us whether or not an HTML attribute is required.
25 * Has no meaning in other contexts
26 * @type bool
27 */
28 public $required = false;
29
30 /**
31 * Validates and cleans passed string according to a definition.
32 *
33 * @param string $string String to be validated and cleaned.
34 * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
35 * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
36 */
37 abstract public function validate($string, $config, $context);
38
39 /**
40 * Convenience method that parses a string as if it were CDATA.
41 *
42 * This method process a string in the manner specified at
43 * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
44 * leading and trailing whitespace, ignoring line feeds, and replacing
45 * carriage returns and tabs with spaces. While most useful for HTML
46 * attributes specified as CDATA, it can also be applied to most CSS
47 * values.
48 *
49 * @note This method is not entirely standards compliant, as trim() removes
50 * more types of whitespace than specified in the spec. In practice,
51 * this is rarely a problem, as those extra characters usually have
52 * already been removed by HTMLPurifier_Encoder.
53 *
54 * @warning This processing is inconsistent with XML's whitespace handling
55 * as specified by section 3.3.3 and referenced XHTML 1.0 section
56 * 4.7. However, note that we are NOT necessarily
57 * parsing XML, thus, this behavior may still be correct. We
58 * assume that newlines have been normalized.
59 */
60 public function parseCDATA($string)
61 {
62 $string = trim($string);
63 $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
64 return $string;
65 }
66
67 /**
68 * Factory method for creating this class from a string.
69 * @param string $string String construction info
70 * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
71 */
72 public function make($string)
73 {
74 // default implementation, return a flyweight of this object.
75 // If $string has an effect on the returned object (i.e. you
76 // need to overload this method), it is best
77 // to clone or instantiate new copies. (Instantiation is safer.)
78 return $this;
79 }
80
81 /**
82 * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
83 * properly. THIS IS A HACK!
84 * @param string $string a CSS colour definition
85 * @return string
86 */
87 protected function mungeRgb($string)
88 {
89 return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
90 }
91
92 /**
93 * Parses a possibly escaped CSS string and returns the "pure"
94 * version of it.
95 */
96 protected function expandCSSEscape($string)
97 {
98 // flexibly parse it
99 $ret = '';
100 for ($i = 0, $c = strlen($string); $i < $c; $i++) {
101 if ($string[$i] === '\\') {
102 $i++;
103 if ($i >= $c) {
104 $ret .= '\\';
105 break;
106 }
107 if (ctype_xdigit($string[$i])) {
108 $code = $string[$i];
109 for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
110 if (!ctype_xdigit($string[$i])) {
111 break;
112 }
113 $code .= $string[$i];
114 }
115 // We have to be extremely careful when adding
116 // new characters, to make sure we're not breaking
117 // the encoding.
118 $char = HTMLPurifier_Encoder::unichr(hexdec($code));
119 if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
120 continue;
121 }
122 $ret .= $char;
123 if ($i < $c && trim($string[$i]) !== '') {
124 $i--;
125 }
126 continue;
127 }
128 if ($string[$i] === "\n") {
129 continue;
130 }
131 }
132 $ret .= $string[$i];
133 }
134 return $ret;
135 }
136 }
137
138 // vim: et sw=4 sts=4