3 * This file is part of PHPUnit.
5 * (c) Sebastian Bergmann <sebastian@phpunit.de>
7 * For the full copyright and license information, please view the LICENSE
8 * file that was distributed with this source code.
14 * @since Class available since Release 3.2.0
16 class PHPUnit_Util_XML
19 * Escapes a string for the use in XML documents
20 * Any Unicode character is allowed, excluding the surrogate blocks, FFFE,
21 * and FFFF (not even as character reference).
22 * See http://www.w3.org/TR/xml/#charsets
24 * @param string $string
28 * @since Method available since Release 3.4.6
30 public static function prepareString($string)
33 '/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]/',
36 PHPUnit_Util_String::convertToUtf8($string),
44 * Loads an XML (or HTML) file into a DOMDocument object.
46 * @param string $filename
48 * @param bool $xinclude
53 * @since Method available since Release 3.3.0
55 public static function loadFile($filename, $isHtml = false, $xinclude = false, $strict = false)
57 $reporting = error_reporting(0);
58 $contents = file_get_contents($filename);
59 error_reporting($reporting);
61 if ($contents === false) {
62 throw new PHPUnit_Framework_Exception(
64 'Could not read "%s".',
70 return self::load($contents, $isHtml, $filename, $xinclude, $strict);
74 * Load an $actual document into a DOMDocument. This is called
75 * from the selector assertions.
77 * If $actual is already a DOMDocument, it is returned with
78 * no changes. Otherwise, $actual is loaded into a new DOMDocument
79 * as either HTML or XML, depending on the value of $isHtml. If $isHtml is
80 * false and $xinclude is true, xinclude is performed on the loaded
83 * Note: prior to PHPUnit 3.3.0, this method loaded a file and
84 * not a string as it currently does. To load a file into a
85 * DOMDocument, use loadFile() instead.
87 * @param string|DOMDocument $actual
89 * @param string $filename
90 * @param bool $xinclude
95 * @since Method available since Release 3.3.0
97 public static function load($actual, $isHtml = false, $filename = '', $xinclude = false, $strict = false)
99 if ($actual instanceof DOMDocument) {
103 if (!is_string($actual)) {
104 throw new PHPUnit_Framework_Exception('Could not load XML from ' . gettype($actual));
107 if ($actual === '') {
108 throw new PHPUnit_Framework_Exception('Could not load XML from empty string');
111 // Required for XInclude on Windows.
114 @chdir(dirname($filename));
117 $document = new DOMDocument;
118 $document->preserveWhiteSpace = false;
120 $internal = libxml_use_internal_errors(true);
122 $reporting = error_reporting(0);
124 if ('' !== $filename) {
125 // Necessary for xinclude
126 $document->documentURI = $filename;
130 $loaded = $document->loadHTML($actual);
132 $loaded = $document->loadXML($actual);
135 if (!$isHtml && $xinclude) {
136 $document->xinclude();
139 foreach (libxml_get_errors() as $error) {
140 $message .= "\n" . $error->message;
143 libxml_use_internal_errors($internal);
144 error_reporting($reporting);
150 if ($loaded === false || ($strict && $message !== '')) {
151 if ($filename !== '') {
152 throw new PHPUnit_Framework_Exception(
154 'Could not load "%s".%s',
156 $message != '' ? "\n" . $message : ''
160 if ($message === '') {
161 $message = 'Could not load XML for unknown reason';
163 throw new PHPUnit_Framework_Exception($message);
171 * @param DOMNode $node
175 * @since Method available since Release 3.4.0
177 public static function nodeToText(DOMNode $node)
179 if ($node->childNodes->length == 1) {
180 return $node->textContent;
185 foreach ($node->childNodes as $childNode) {
186 $result .= $node->ownerDocument->saveXML($childNode);
193 * @param DOMNode $node
195 * @since Method available since Release 3.3.0
197 public static function removeCharacterDataNodes(DOMNode $node)
199 if ($node->hasChildNodes()) {
200 for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
201 if (($child = $node->childNodes->item($i)) instanceof DOMCharacterData) {
202 $node->removeChild($child);
209 * "Convert" a DOMElement object into a PHP variable.
211 * @param DOMElement $element
215 * @since Method available since Release 3.4.0
217 public static function xmlToVariable(DOMElement $element)
221 switch ($element->tagName) {
225 foreach ($element->childNodes as $entry) {
226 if (!$entry instanceof DOMElement || $entry->tagName !== 'element') {
229 $item = $entry->childNodes->item(0);
231 if ($item instanceof DOMText) {
232 $item = $entry->childNodes->item(1);
235 $value = self::xmlToVariable($item);
237 if ($entry->hasAttribute('key')) {
238 $variable[(string) $entry->getAttribute('key')] = $value;
240 $variable[] = $value;
246 $className = $element->getAttribute('class');
248 if ($element->hasChildNodes()) {
249 $arguments = $element->childNodes->item(1)->childNodes;
250 $constructorArgs = array();
252 foreach ($arguments as $argument) {
253 if ($argument instanceof DOMElement) {
254 $constructorArgs[] = self::xmlToVariable($argument);
258 $class = new ReflectionClass($className);
259 $variable = $class->newInstanceArgs($constructorArgs);
261 $variable = new $className;
266 $variable = $element->textContent == 'true' ? true : false;
272 $variable = $element->textContent;
274 settype($variable, $element->tagName);
282 * Validate list of keys in the associative array.
285 * @param array $validKeys
289 * @throws PHPUnit_Framework_Exception
291 * @since Method available since Release 3.3.0
293 public static function assertValidKeys(array $hash, array $validKeys)
297 // Normalize validation keys so that we can use both indexed and
298 // associative arrays.
299 foreach ($validKeys as $key => $val) {
300 is_int($key) ? $valids[$val] = null : $valids[$key] = $val;
303 $validKeys = array_keys($valids);
305 // Check for invalid keys.
306 foreach ($hash as $key => $value) {
307 if (!in_array($key, $validKeys)) {
312 if (!empty($unknown)) {
313 throw new PHPUnit_Framework_Exception(
314 'Unknown key(s): ' . implode(', ', $unknown)
318 // Add default values for any valid keys that are empty.
319 foreach ($valids as $key => $value) {
320 if (!isset($hash[$key])) {
321 $hash[$key] = $value;
329 * Parse a CSS selector into an associative array suitable for
330 * use with findNodes().
332 * @param string $selector
333 * @param mixed $content
337 * @since Method available since Release 3.3.0
339 public static function convertSelectToTag($selector, $content = true)
341 $selector = trim(preg_replace("/\s+/", ' ', $selector));
343 // substitute spaces within attribute value
344 while (preg_match('/\[[^\]]+"[^"]+\s[^"]+"\]/', $selector)) {
345 $selector = preg_replace(
346 '/(\[[^\]]+"[^"]+)\s([^"]+"\])/',
352 if (strstr($selector, ' ')) {
353 $elements = explode(' ', $selector);
355 $elements = array($selector);
358 $previousTag = array();
360 foreach (array_reverse($elements) as $element) {
361 $element = str_replace('__SPACE__', ' ', $element);
364 if ($element == '>') {
365 $previousTag = array('child' => $previousTag['descendant']);
369 // adjacent-sibling selector
370 if ($element == '+') {
371 $previousTag = array('adjacent-sibling' => $previousTag['descendant']);
378 preg_match("/^([^\.#\[]*)/", $element, $eltMatches);
380 if (!empty($eltMatches[1])) {
381 $tag['tag'] = $eltMatches[1];
384 // match attributes (\[[^\]]*\]*), ids (#[^\.#\[]*),
385 // and classes (\.[^\.#\[]*))
387 "/(\[[^\]]*\]*|#[^\.#\[]*|\.[^\.#\[]*)/",
392 if (!empty($matches[1])) {
396 foreach ($matches[1] as $match) {
398 if (substr($match, 0, 1) == '#') {
399 $tag['id'] = substr($match, 1);
401 elseif (substr($match, 0, 1) == '.') {
402 $classes[] = substr($match, 1);
403 } // attribute matched
404 elseif (substr($match, 0, 1) == '[' &&
405 substr($match, -1, 1) == ']') {
406 $attribute = substr($match, 1, strlen($match) - 2);
407 $attribute = str_replace('"', '', $attribute);
410 if (strstr($attribute, '~=')) {
411 list($key, $value) = explode('~=', $attribute);
412 $value = "regexp:/.*\b$value\b.*/";
414 elseif (strstr($attribute, '*=')) {
415 list($key, $value) = explode('*=', $attribute);
416 $value = "regexp:/.*$value.*/";
419 list($key, $value) = explode('=', $attribute);
422 $attrs[$key] = $value;
426 if (!empty($classes)) {
427 $tag['class'] = implode(' ', $classes);
430 if (!empty($attrs)) {
431 $tag['attributes'] = $attrs;
436 if (is_string($content)) {
437 $tag['content'] = $content;
440 // determine previous child/descendants
441 if (!empty($previousTag['descendant'])) {
442 $tag['descendant'] = $previousTag['descendant'];
443 } elseif (!empty($previousTag['child'])) {
444 $tag['child'] = $previousTag['child'];
445 } elseif (!empty($previousTag['adjacent-sibling'])) {
446 $tag['adjacent-sibling'] = $previousTag['adjacent-sibling'];
447 unset($tag['content']);
450 $previousTag = array('descendant' => $tag);
457 * Parse an $actual document and return an array of DOMNodes
458 * matching the CSS $selector. If an error occurs, it will
461 * To only return nodes containing a certain content, give
462 * the $content to match as a string. Otherwise, setting
463 * $content to true will return all nodes matching $selector.
465 * The $actual document may be a DOMDocument or a string
466 * containing XML or HTML, identified by $isHtml.
468 * @param array $selector
469 * @param string $content
470 * @param mixed $actual
471 * @param bool $isHtml
475 * @since Method available since Release 3.3.0
477 public static function cssSelect($selector, $content, $actual, $isHtml = true)
479 $matcher = self::convertSelectToTag($selector, $content);
480 $dom = self::load($actual, $isHtml);
481 $tags = self::findNodes($dom, $matcher, $isHtml);
487 * Parse out the options from the tag using DOM object tree.
489 * @param DOMDocument $dom
490 * @param array $options
491 * @param bool $isHtml
495 * @since Method available since Release 3.3.0
497 public static function findNodes(DOMDocument $dom, array $options, $isHtml = true)
500 'id', 'class', 'tag', 'content', 'attributes', 'parent',
501 'child', 'ancestor', 'descendant', 'children', 'adjacent-sibling'
505 $options = self::assertValidKeys($options, $valid);
507 // find the element by id
508 if ($options['id']) {
509 $options['attributes']['id'] = $options['id'];
512 if ($options['class']) {
513 $options['attributes']['class'] = $options['class'];
518 // find the element by a tag type
519 if ($options['tag']) {
521 $elements = self::getElementsByCaseInsensitiveTagName(
526 $elements = $dom->getElementsByTagName($options['tag']);
529 foreach ($elements as $element) {
536 } // no tag selected, get them all
539 'a', 'abbr', 'acronym', 'address', 'area', 'b', 'base', 'bdo',
540 'big', 'blockquote', 'body', 'br', 'button', 'caption', 'cite',
541 'code', 'col', 'colgroup', 'dd', 'del', 'div', 'dfn', 'dl',
542 'dt', 'em', 'fieldset', 'form', 'frame', 'frameset', 'h1', 'h2',
543 'h3', 'h4', 'h5', 'h6', 'head', 'hr', 'html', 'i', 'iframe',
544 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'link',
545 'map', 'meta', 'noframes', 'noscript', 'object', 'ol', 'optgroup',
546 'option', 'p', 'param', 'pre', 'q', 'samp', 'script', 'select',
547 'small', 'span', 'strong', 'style', 'sub', 'sup', 'table',
548 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'title',
549 'tr', 'tt', 'ul', 'var',
551 'article', 'aside', 'audio', 'bdi', 'canvas', 'command',
552 'datalist', 'details', 'dialog', 'embed', 'figure', 'figcaption',
553 'footer', 'header', 'hgroup', 'keygen', 'mark', 'meter', 'nav',
554 'output', 'progress', 'ruby', 'rt', 'rp', 'track', 'section',
555 'source', 'summary', 'time', 'video', 'wbr'
558 foreach ($tags as $tag) {
560 $elements = self::getElementsByCaseInsensitiveTagName(
565 $elements = $dom->getElementsByTagName($tag);
568 foreach ($elements as $element) {
578 // filter by attributes
579 if ($options['attributes']) {
580 foreach ($nodes as $node) {
583 foreach ($options['attributes'] as $name => $value) {
584 // match by regexp if like "regexp:/foo/i"
585 if (preg_match('/^regexp\s*:\s*(.*)/i', $value, $matches)) {
586 if (!preg_match($matches[1], $node->getAttribute($name))) {
589 } // class can match only a part
590 elseif ($name == 'class') {
591 // split to individual classes
592 $findClasses = explode(
594 preg_replace("/\s+/", ' ', $value)
597 $allClasses = explode(
599 preg_replace("/\s+/", ' ', $node->getAttribute($name))
602 // make sure each class given is in the actual node
603 foreach ($findClasses as $findClass) {
604 if (!in_array($findClass, $allClasses)) {
608 } // match by exact string
610 if ($node->getAttribute($name) != $value) {
616 // if every attribute given matched
631 if ($options['content'] !== null) {
632 foreach ($nodes as $node) {
635 // match by regexp if like "regexp:/foo/i"
636 if (preg_match('/^regexp\s*:\s*(.*)/i', $options['content'], $matches)) {
637 if (!preg_match($matches[1], self::getNodeText($node))) {
640 } // match empty string
641 elseif ($options['content'] === '') {
642 if (self::getNodeText($node) !== '') {
645 } // match by exact string
646 elseif (strstr(self::getNodeText($node), $options['content']) === false) {
663 // filter by parent node
664 if ($options['parent']) {
665 $parentNodes = self::findNodes($dom, $options['parent'], $isHtml);
666 $parentNode = isset($parentNodes[0]) ? $parentNodes[0] : null;
668 foreach ($nodes as $node) {
669 if ($parentNode !== $node->parentNode) {
684 // filter by child node
685 if ($options['child']) {
686 $childNodes = self::findNodes($dom, $options['child'], $isHtml);
687 $childNodes = !empty($childNodes) ? $childNodes : array();
689 foreach ($nodes as $node) {
690 foreach ($node->childNodes as $child) {
691 foreach ($childNodes as $childNode) {
692 if ($childNode === $child) {
707 // filter by adjacent-sibling
708 if ($options['adjacent-sibling']) {
709 $adjacentSiblingNodes = self::findNodes($dom, $options['adjacent-sibling'], $isHtml);
710 $adjacentSiblingNodes = !empty($adjacentSiblingNodes) ? $adjacentSiblingNodes : array();
712 foreach ($nodes as $node) {
715 while ($sibling = $sibling->nextSibling) {
716 if ($sibling->nodeType !== XML_ELEMENT_NODE) {
720 foreach ($adjacentSiblingNodes as $adjacentSiblingNode) {
721 if ($sibling === $adjacentSiblingNode) {
739 // filter by ancestor
740 if ($options['ancestor']) {
741 $ancestorNodes = self::findNodes($dom, $options['ancestor'], $isHtml);
742 $ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : null;
744 foreach ($nodes as $node) {
745 $parent = $node->parentNode;
747 while ($parent && $parent->nodeType != XML_HTML_DOCUMENT_NODE) {
748 if ($parent === $ancestorNode) {
752 $parent = $parent->parentNode;
764 // filter by descendant
765 if ($options['descendant']) {
766 $descendantNodes = self::findNodes($dom, $options['descendant'], $isHtml);
767 $descendantNodes = !empty($descendantNodes) ? $descendantNodes : array();
769 foreach ($nodes as $node) {
770 foreach (self::getDescendants($node) as $descendant) {
771 foreach ($descendantNodes as $descendantNode) {
772 if ($descendantNode === $descendant) {
787 // filter by children
788 if ($options['children']) {
789 $validChild = array('count', 'greater_than', 'less_than', 'only');
790 $childOptions = self::assertValidKeys(
791 $options['children'],
795 foreach ($nodes as $node) {
796 $childNodes = $node->childNodes;
798 foreach ($childNodes as $childNode) {
799 if ($childNode->nodeType !== XML_CDATA_SECTION_NODE &&
800 $childNode->nodeType !== XML_TEXT_NODE) {
801 $children[] = $childNode;
805 // we must have children to pass this filter
806 if (!empty($children)) {
807 // exact count of children
808 if ($childOptions['count'] !== null) {
809 if (count($children) !== $childOptions['count']) {
812 } // range count of children
813 elseif ($childOptions['less_than'] !== null &&
814 $childOptions['greater_than'] !== null) {
815 if (count($children) >= $childOptions['less_than'] ||
816 count($children) <= $childOptions['greater_than']) {
819 } // less than a given count
820 elseif ($childOptions['less_than'] !== null) {
821 if (count($children) >= $childOptions['less_than']) {
824 } // more than a given count
825 elseif ($childOptions['greater_than'] !== null) {
826 if (count($children) <= $childOptions['greater_than']) {
831 // match each child against a specific tag
832 if ($childOptions['only']) {
833 $onlyNodes = self::findNodes(
835 $childOptions['only'],
839 // try to match each child to one of the 'only' nodes
840 foreach ($children as $child) {
843 foreach ($onlyNodes as $onlyNode) {
844 if ($onlyNode === $child) {
866 // return the first node that matches all criteria
867 return !empty($nodes) ? $nodes : array();
871 * Recursively get flat array of all descendants of this node.
873 * @param DOMNode $node
877 * @since Method available since Release 3.3.0
879 protected static function getDescendants(DOMNode $node)
881 $allChildren = array();
882 $childNodes = $node->childNodes ? $node->childNodes : array();
884 foreach ($childNodes as $child) {
885 if ($child->nodeType === XML_CDATA_SECTION_NODE ||
886 $child->nodeType === XML_TEXT_NODE) {
890 $children = self::getDescendants($child);
891 $allChildren = array_merge($allChildren, $children, array($child));
894 return isset($allChildren) ? $allChildren : array();
898 * Gets elements by case insensitive tagname.
900 * @param DOMDocument $dom
903 * @return DOMNodeList
905 * @since Method available since Release 3.4.0
907 protected static function getElementsByCaseInsensitiveTagName(DOMDocument $dom, $tag)
909 $elements = $dom->getElementsByTagName(strtolower($tag));
911 if ($elements->length == 0) {
912 $elements = $dom->getElementsByTagName(strtoupper($tag));
919 * Get the text value of this node's child text node.
921 * @param DOMNode $node
925 * @since Method available since Release 3.3.0
927 protected static function getNodeText(DOMNode $node)
929 if (!$node->childNodes instanceof DOMNodeList) {
935 foreach ($node->childNodes as $childNode) {
936 if ($childNode->nodeType === XML_TEXT_NODE ||
937 $childNode->nodeType === XML_CDATA_SECTION_NODE) {
938 $result .= trim($childNode->data) . ' ';
940 $result .= self::getNodeText($childNode);
944 return str_replace(' ', ' ', $result);