*/
class Crawler implements \Countable, \IteratorAggregate
{
- /**
- * @var string The current URI
- */
protected $uri;
/**
private $isHtml = true;
/**
- * @param mixed $node A Node to use as the base for the crawling
- * @param string $currentUri The current URI
- * @param string $baseHref The base href value
+ * @param mixed $node A Node to use as the base for the crawling
+ * @param string $uri The current URI
+ * @param string $baseHref The base href value
*/
- public function __construct($node = null, $currentUri = null, $baseHref = null)
+ public function __construct($node = null, $uri = null, $baseHref = null)
{
- $this->uri = $currentUri;
- $this->baseHref = $baseHref ?: $currentUri;
+ $this->uri = $uri;
+ $this->baseHref = $baseHref ?: $uri;
$this->add($node);
}
*
* @param \DOMNodeList|\DOMNode|array|string|null $node A node
*
- * @throws \InvalidArgumentException When node is not the expected type.
+ * @throws \InvalidArgumentException when node is not the expected type
*/
public function add($node)
{
$this->addNodeList($node);
} elseif ($node instanceof \DOMNode) {
$this->addNode($node);
- } elseif (is_array($node)) {
+ } elseif (\is_array($node)) {
$this->addNodes($node);
- } elseif (is_string($node)) {
+ } elseif (\is_string($node)) {
$this->addContent($node);
} elseif (null !== $node) {
- throw new \InvalidArgumentException(sprintf('Expecting a DOMNodeList or DOMNode instance, an array, a string, or null, but got "%s".', is_object($node) ? get_class($node) : gettype($node)));
+ throw new \InvalidArgumentException(sprintf('Expecting a DOMNodeList or DOMNode instance, an array, a string, or null, but got "%s".', \is_object($node) ? \get_class($node) : \gettype($node)));
}
}
/**
* Adds HTML/XML content.
*
- * If the charset is not set via the content type, it is assumed
- * to be ISO-8859-1, which is the default charset defined by the
+ * If the charset is not set via the content type, it is assumed to be UTF-8,
+ * or ISO-8859-1 as a fallback, which is the default charset defined by the
* HTTP 1.1 specification.
*
* @param string $content A string to parse as HTML/XML
- * @param null|string $type The content type of the string
+ * @param string|null $type The content type of the string
*/
public function addContent($content, $type = null)
{
}
if (null === $charset) {
- $charset = 'ISO-8859-1';
+ $charset = preg_match('//u', $content) ? 'UTF-8' : 'ISO-8859-1';
}
if ('x' === $xmlMatches[1]) {
$base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href'));
$baseHref = current($base);
- if (count($base) && !empty($baseHref)) {
+ if (\count($base) && !empty($baseHref)) {
if ($this->baseHref) {
$linkNode = $dom->createElement('a');
$linkNode->setAttribute('href', $baseHref);
}
// Don't add duplicate nodes in the Crawler
- if (in_array($node, $this->nodes, true)) {
+ if (\in_array($node, $this->nodes, true)) {
return;
}
*/
public function slice($offset = 0, $length = null)
{
- return $this->createSubCrawler(array_slice($this->nodes, $offset, $length));
+ return $this->createSubCrawler(\array_slice($this->nodes, $offset, $length));
}
/**
*/
public function last()
{
- return $this->eq(count($this->nodes) - 1);
+ return $this->eq(\count($this->nodes) - 1);
}
/**
*
* Example:
*
- * $crawler->filter('h1 a')->extract(array('_text', 'href'));
+ * $crawler->filter('h1 a')->extract(array('_text', 'href'));
*
* @param array $attributes An array of attributes
*
public function extract($attributes)
{
$attributes = (array) $attributes;
- $count = count($attributes);
+ $count = \count($attributes);
$data = array();
foreach ($this->nodes as $node) {
}
}
- $data[] = $count > 1 ? $elements : $elements[0];
+ $data[] = 1 === $count ? $elements[0] : $elements;
}
return $data;
*/
public function filter($selector)
{
- if (!class_exists('Symfony\\Component\\CssSelector\\CssSelectorConverter')) {
- throw new \RuntimeException('Unable to filter with a CSS selector as the Symfony CssSelector 2.8+ is not installed (you can use filterXPath instead).');
+ if (!class_exists(CssSelectorConverter::class)) {
+ throw new \RuntimeException('To filter with a CSS selector, install the CssSelector component ("composer require symfony/css-selector"). Or use filterXpath instead.');
}
$converter = new CssSelectorConverter($this->isHtml);
public function selectButton($value)
{
$translate = 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")';
- $xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
+ $xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%1$s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, static::xpathLiteral(' '.$value.' ')).
sprintf('or (contains(%s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)) or @id=%s or @name=%s] ', $translate, static::xpathLiteral(' '.$value.' '), static::xpathLiteral($value), static::xpathLiteral($value)).
sprintf('| descendant-or-self::button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id=%s or @name=%s]', static::xpathLiteral(' '.$value.' '), static::xpathLiteral($value), static::xpathLiteral($value));
$node = $this->getNode(0);
if (!$node instanceof \DOMElement) {
- throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node)));
+ throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', \get_class($node)));
}
return new Link($node, $this->baseHref, $method);
$links = array();
foreach ($this->nodes as $node) {
if (!$node instanceof \DOMElement) {
- throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', get_class($node)));
+ throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', \get_class($node)));
}
$links[] = new Link($node, $this->baseHref, 'get');
*/
public function image()
{
- if (!count($this)) {
+ if (!\count($this)) {
throw new \InvalidArgumentException('The current node list is empty.');
}
$node = $this->getNode(0);
if (!$node instanceof \DOMElement) {
- throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node)));
+ throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', \get_class($node)));
}
return new Image($node, $this->baseHref);
$images = array();
foreach ($this as $node) {
if (!$node instanceof \DOMElement) {
- throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', get_class($node)));
+ throw new \InvalidArgumentException(sprintf('The current node list should contain only DOMElement instances, "%s" found.', \get_class($node)));
}
$images[] = new Image($node, $this->baseHref);
$node = $this->getNode(0);
if (!$node instanceof \DOMElement) {
- throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', get_class($node)));
+ throw new \InvalidArgumentException(sprintf('The selected node should be instance of DOMElement, got "%s".', \get_class($node)));
}
$form = new Form($node, $this->uri, $method, $this->baseHref);
* Escaped characters are: quotes (") and apostrophe (').
*
* Examples:
- * <code>
+ *
* echo Crawler::xpathLiteral('foo " bar');
* //prints 'foo " bar'
*
*
* echo Crawler::xpathLiteral('a\'b"c');
* //prints concat('a', "'", 'b"c')
- * </code>
+ *
*
* @param string $s String to be escaped
*
// We cannot simply drop
$nonMatchingExpression = 'a[name() = "b"]';
- $xpathLen = strlen($xpath);
+ $xpathLen = \strlen($xpath);
$openedBrackets = 0;
$startPosition = strspn($xpath, " \t\n\r\0\x0B");
*/
public function count()
{
- return count($this->nodes);
+ return \count($this->nodes);
}
/**
- * @return \ArrayIterator
+ * @return \ArrayIterator|\DOMElement[]
*/
public function getIterator()
{
$nodes = array();
do {
- if ($node !== $this->getNode(0) && $node->nodeType === 1) {
+ if ($node !== $this->getNode(0) && 1 === $node->nodeType) {
$nodes[] = $node;
}
} while ($node = $node->$siblingDir);