inc/3rdparty/htmlpurifier/HTMLPurifier/AttrDef.php

   1 <?php
   2
   3 /**
   4  * Base class for all validating attribute definitions.
   5  *
   6  * This family of classes forms the core for not only HTML attribute validation,
   7  * but also any sort of string that needs to be validated or cleaned (which
   8  * means CSS properties and composite definitions are defined here too).
   9  * Besides defining (through code) what precisely makes the string valid,
  10  * subclasses are also responsible for cleaning the code if possible.
  11  */
  12
  13 abstract class HTMLPurifier_AttrDef
  14 {
  15
  16     /**
  17      * Tells us whether or not an HTML attribute is minimized.
  18      * Has no meaning in other contexts.
  19      * @type bool
  20      */
  21     public $minimized = false;
  22
  23     /**
  24      * Tells us whether or not an HTML attribute is required.
  25      * Has no meaning in other contexts
  26      * @type bool
  27      */
  28     public $required = false;
  29
  30     /**
  31      * Validates and cleans passed string according to a definition.
  32      *
  33      * @param string $string String to be validated and cleaned.
  34      * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  35      * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
  36      */
  37     abstract public function validate($string, $config, $context);
  38
  39     /**
  40      * Convenience method that parses a string as if it were CDATA.
  41      *
  42      * This method process a string in the manner specified at
  43      * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
  44      * leading and trailing whitespace, ignoring line feeds, and replacing
  45      * carriage returns and tabs with spaces.  While most useful for HTML
  46      * attributes specified as CDATA, it can also be applied to most CSS
  47      * values.
  48      *
  49      * @note This method is not entirely standards compliant, as trim() removes
  50      *       more types of whitespace than specified in the spec. In practice,
  51      *       this is rarely a problem, as those extra characters usually have
  52      *       already been removed by HTMLPurifier_Encoder.
  53      *
  54      * @warning This processing is inconsistent with XML's whitespace handling
  55      *          as specified by section 3.3.3 and referenced XHTML 1.0 section
  56      *          4.7.  However, note that we are NOT necessarily
  57      *          parsing XML, thus, this behavior may still be correct. We
  58      *          assume that newlines have been normalized.
  59      */
  60     public function parseCDATA($string)
  61     {
  62         $string = trim($string);
  63         $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
  64         return $string;
  65     }
  66
  67     /**
  68      * Factory method for creating this class from a string.
  69      * @param string $string String construction info
  70      * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
  71      */
  72     public function make($string)
  73     {
  74         // default implementation, return a flyweight of this object.
  75         // If $string has an effect on the returned object (i.e. you
  76         // need to overload this method), it is best
  77         // to clone or instantiate new copies. (Instantiation is safer.)
  78         return $this;
  79     }
  80
  81     /**
  82      * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
  83      * properly. THIS IS A HACK!
  84      * @param string $string a CSS colour definition
  85      * @return string
  86      */
  87     protected function mungeRgb($string)
  88     {
  89         return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string);
  90     }
  91
  92     /**
  93      * Parses a possibly escaped CSS string and returns the "pure"
  94      * version of it.
  95      */
  96     protected function expandCSSEscape($string)
  97     {
  98         // flexibly parse it
  99         $ret = '';
 100         for ($i = 0, $c = strlen($string); $i < $c; $i++) {
 101             if ($string[$i] === '\\') {
 102                 $i++;
 103                 if ($i >= $c) {
 104                     $ret .= '\\';
 105                     break;
 106                 }
 107                 if (ctype_xdigit($string[$i])) {
 108                     $code = $string[$i];
 109                     for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
 110                         if (!ctype_xdigit($string[$i])) {
 111                             break;
 112                         }
 113                         $code .= $string[$i];
 114                     }
 115                     // We have to be extremely careful when adding
 116                     // new characters, to make sure we're not breaking
 117                     // the encoding.
 118                     $char = HTMLPurifier_Encoder::unichr(hexdec($code));
 119                     if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
 120                         continue;
 121                     }
 122                     $ret .= $char;
 123                     if ($i < $c && trim($string[$i]) !== '') {
 124                         $i--;
 125                     }
 126                     continue;
 127                 }
 128                 if ($string[$i] === "\n") {
 129                     continue;
 130                 }
 131             }
 132             $ret .= $string[$i];
 133         }
 134         return $ret;
 135     }
 136 }
 137
 138 // vim: et sw=4 sts=4