4 * This file is part of the Symfony package.
6 * (c) Fabien Potencier <fabien@symfony.com>
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
12 namespace Symfony\Component\DomCrawler\Tests;
14 use PHPUnit\Framework\TestCase;
15 use Symfony\Component\DomCrawler\Crawler;
17 class CrawlerTest extends TestCase
19 public function testConstructor()
21 $crawler = new Crawler();
22 $this->assertCount(0, $crawler, '__construct() returns an empty crawler');
24 $doc = new \DOMDocument();
25 $node = $doc->createElement('test');
27 $crawler = new Crawler($node);
28 $this->assertCount(1, $crawler, '__construct() takes a node as a first argument');
31 public function testGetUri()
33 $uri = 'http://symfony.com';
34 $crawler = new Crawler(null, $uri);
35 $this->assertEquals($uri, $crawler->getUri());
38 public function testGetBaseHref()
40 $baseHref = 'http://symfony.com';
41 $crawler = new Crawler(null, null, $baseHref);
42 $this->assertEquals($baseHref, $crawler->getBaseHref());
45 public function testAdd()
47 $crawler = new Crawler();
48 $crawler->add($this->createDomDocument());
49 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->add() adds nodes from a \DOMDocument');
51 $crawler = new Crawler();
52 $crawler->add($this->createNodeList());
53 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->add() adds nodes from a \DOMNodeList');
56 foreach ($this->createNodeList() as $node) {
59 $crawler = new Crawler();
61 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->add() adds nodes from an array of nodes');
63 $crawler = new Crawler();
64 $crawler->add($this->createNodeList()->item(0));
65 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->add() adds nodes from a \DOMNode');
67 $crawler = new Crawler();
68 $crawler->add('<html><body>Foo</body></html>');
69 $this->assertEquals('Foo', $crawler->filterXPath('//body')->text(), '->add() adds nodes from a string');
73 * @expectedException \InvalidArgumentException
75 public function testAddInvalidType()
77 $crawler = new Crawler();
82 * @expectedException \InvalidArgumentException
83 * @expectedExceptionMessage Attaching DOM nodes from multiple documents in the same crawler is forbidden.
85 public function testAddMultipleDocumentNode()
87 $crawler = $this->createTestCrawler();
88 $crawler->addHtmlContent('<html><div class="foo"></html>', 'UTF-8');
91 public function testAddHtmlContent()
93 $crawler = new Crawler();
94 $crawler->addHtmlContent('<html><div class="foo"></html>', 'UTF-8');
96 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addHtmlContent() adds nodes from an HTML string');
99 public function testAddHtmlContentWithBaseTag()
101 $crawler = new Crawler();
103 $crawler->addHtmlContent('<html><head><base href="http://symfony.com"></head><a href="/contact"></a></html>', 'UTF-8');
105 $this->assertEquals('http://symfony.com', $crawler->filterXPath('//base')->attr('href'), '->addHtmlContent() adds nodes from an HTML string');
106 $this->assertEquals('http://symfony.com/contact', $crawler->filterXPath('//a')->link()->getUri(), '->addHtmlContent() adds nodes from an HTML string');
110 * @requires extension mbstring
112 public function testAddHtmlContentCharset()
114 $crawler = new Crawler();
115 $crawler->addHtmlContent('<html><div class="foo">Tiếng Việt</html>', 'UTF-8');
117 $this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
120 public function testAddHtmlContentInvalidBaseTag()
122 $crawler = new Crawler(null, 'http://symfony.com');
124 $crawler->addHtmlContent('<html><head><base target="_top"></head><a href="/contact"></a></html>', 'UTF-8');
126 $this->assertEquals('http://symfony.com/contact', current($crawler->filterXPath('//a')->links())->getUri(), '->addHtmlContent() correctly handles a non-existent base tag href attribute');
129 public function testAddHtmlContentUnsupportedCharset()
131 $crawler = new Crawler();
132 $crawler->addHtmlContent(file_get_contents(__DIR__.'/Fixtures/windows-1250.html'), 'Windows-1250');
134 $this->assertEquals('Žťčýů', $crawler->filterXPath('//p')->text());
138 * @requires extension mbstring
140 public function testAddHtmlContentCharsetGbk()
142 $crawler = new Crawler();
143 //gbk encode of <html><p>中文</p></html>
144 $crawler->addHtmlContent(base64_decode('PGh0bWw+PHA+1tDOxDwvcD48L2h0bWw+'), 'gbk');
146 $this->assertEquals('中文', $crawler->filterXPath('//p')->text());
149 public function testAddHtmlContentWithErrors()
151 $internalErrors = libxml_use_internal_errors(true);
153 $crawler = new Crawler();
154 $crawler->addHtmlContent(<<<'EOF'
160 <nav><a href="#"><a href="#"></nav>
166 $errors = libxml_get_errors();
167 $this->assertCount(1, $errors);
168 $this->assertEquals("Tag nav invalid\n", $errors[0]->message);
170 libxml_clear_errors();
171 libxml_use_internal_errors($internalErrors);
174 public function testAddXmlContent()
176 $crawler = new Crawler();
177 $crawler->addXmlContent('<html><div class="foo"></div></html>', 'UTF-8');
179 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addXmlContent() adds nodes from an XML string');
182 public function testAddXmlContentCharset()
184 $crawler = new Crawler();
185 $crawler->addXmlContent('<html><div class="foo">Tiếng Việt</div></html>', 'UTF-8');
187 $this->assertEquals('Tiếng Việt', $crawler->filterXPath('//div')->text());
190 public function testAddXmlContentWithErrors()
192 $internalErrors = libxml_use_internal_errors(true);
194 $crawler = new Crawler();
195 $crawler->addXmlContent(<<<'EOF'
201 <nav><a href="#"><a href="#"></nav>
207 $this->assertGreaterThan(1, libxml_get_errors());
209 libxml_clear_errors();
210 libxml_use_internal_errors($internalErrors);
213 public function testAddContent()
215 $crawler = new Crawler();
216 $crawler->addContent('<html><div class="foo"></html>', 'text/html; charset=UTF-8');
217 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addContent() adds nodes from an HTML string');
219 $crawler = new Crawler();
220 $crawler->addContent('<html><div class="foo"></html>', 'text/html; charset=UTF-8; dir=RTL');
221 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addContent() adds nodes from an HTML string with extended content type');
223 $crawler = new Crawler();
224 $crawler->addContent('<html><div class="foo"></html>');
225 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addContent() uses text/html as the default type');
227 $crawler = new Crawler();
228 $crawler->addContent('<html><div class="foo"></div></html>', 'text/xml; charset=UTF-8');
229 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addContent() adds nodes from an XML string');
231 $crawler = new Crawler();
232 $crawler->addContent('<html><div class="foo"></div></html>', 'text/xml');
233 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addContent() adds nodes from an XML string');
235 $crawler = new Crawler();
236 $crawler->addContent('foo bar', 'text/plain');
237 $this->assertCount(0, $crawler, '->addContent() does nothing if the type is not (x|ht)ml');
239 $crawler = new Crawler();
240 $crawler->addContent('<html><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><span>中文</span></html>');
241 $this->assertEquals('中文', $crawler->filterXPath('//span')->text(), '->addContent() guess wrong charset');
245 * @requires extension iconv
247 public function testAddContentNonUtf8()
249 $crawler = new Crawler();
250 $crawler->addContent(iconv('UTF-8', 'SJIS', '<html><head><meta charset="Shift_JIS"></head><body>日本語</body></html>'));
251 $this->assertEquals('日本語', $crawler->filterXPath('//body')->text(), '->addContent() can recognize "Shift_JIS" in html5 meta charset tag');
254 public function testAddDocument()
256 $crawler = new Crawler();
257 $crawler->addDocument($this->createDomDocument());
259 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addDocument() adds nodes from a \DOMDocument');
262 public function testAddNodeList()
264 $crawler = new Crawler();
265 $crawler->addNodeList($this->createNodeList());
267 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addNodeList() adds nodes from a \DOMNodeList');
270 public function testAddNodes()
273 foreach ($this->createNodeList() as $node) {
277 $crawler = new Crawler();
278 $crawler->addNodes($list);
280 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addNodes() adds nodes from an array of nodes');
283 public function testAddNode()
285 $crawler = new Crawler();
286 $crawler->addNode($this->createNodeList()->item(0));
288 $this->assertEquals('foo', $crawler->filterXPath('//div')->attr('class'), '->addNode() adds nodes from a \DOMNode');
291 public function testClear()
293 $doc = new \DOMDocument();
294 $node = $doc->createElement('test');
296 $crawler = new Crawler($node);
298 $this->assertCount(0, $crawler, '->clear() removes all the nodes from the crawler');
301 public function testEq()
303 $crawler = $this->createTestCrawler()->filterXPath('//li');
304 $this->assertNotSame($crawler, $crawler->eq(0), '->eq() returns a new instance of a crawler');
305 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->eq() returns a new instance of a crawler');
307 $this->assertEquals('Two', $crawler->eq(1)->text(), '->eq() returns the nth node of the list');
308 $this->assertCount(0, $crawler->eq(100), '->eq() returns an empty crawler if the nth node does not exist');
311 public function testEach()
313 $data = $this->createTestCrawler()->filterXPath('//ul[1]/li')->each(function ($node, $i) {
314 return $i.'-'.$node->text();
317 $this->assertEquals(array('0-One', '1-Two', '2-Three'), $data, '->each() executes an anonymous function on each node of the list');
320 public function testIteration()
322 $crawler = $this->createTestCrawler()->filterXPath('//li');
324 $this->assertInstanceOf('Traversable', $crawler);
325 $this->assertContainsOnlyInstancesOf('DOMElement', iterator_to_array($crawler), 'Iterating a Crawler gives DOMElement instances');
328 public function testSlice()
330 $crawler = $this->createTestCrawler()->filterXPath('//ul[1]/li');
331 $this->assertNotSame($crawler->slice(), $crawler, '->slice() returns a new instance of a crawler');
332 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler->slice(), '->slice() returns a new instance of a crawler');
334 $this->assertCount(3, $crawler->slice(), '->slice() does not slice the nodes in the list if any param is entered');
335 $this->assertCount(1, $crawler->slice(1, 1), '->slice() slices the nodes in the list');
338 public function testReduce()
340 $crawler = $this->createTestCrawler()->filterXPath('//ul[1]/li');
341 $nodes = $crawler->reduce(function ($node, $i) {
344 $this->assertNotSame($nodes, $crawler, '->reduce() returns a new instance of a crawler');
345 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $nodes, '->reduce() returns a new instance of a crawler');
347 $this->assertCount(2, $nodes, '->reduce() filters the nodes in the list');
350 public function testAttr()
352 $this->assertEquals('first', $this->createTestCrawler()->filterXPath('//li')->attr('class'), '->attr() returns the attribute of the first element of the node list');
355 $this->createTestCrawler()->filterXPath('//ol')->attr('class');
356 $this->fail('->attr() throws an \InvalidArgumentException if the node list is empty');
357 } catch (\InvalidArgumentException $e) {
358 $this->assertTrue(true, '->attr() throws an \InvalidArgumentException if the node list is empty');
362 public function testMissingAttrValueIsNull()
364 $crawler = new Crawler();
365 $crawler->addContent('<html><div non-empty-attr="sample value" empty-attr=""></div></html>', 'text/html; charset=UTF-8');
366 $div = $crawler->filterXPath('//div');
368 $this->assertEquals('sample value', $div->attr('non-empty-attr'), '->attr() reads non-empty attributes correctly');
369 $this->assertEquals('', $div->attr('empty-attr'), '->attr() reads empty attributes correctly');
370 $this->assertNull($div->attr('missing-attr'), '->attr() reads missing attributes correctly');
373 public function testNodeName()
375 $this->assertEquals('li', $this->createTestCrawler()->filterXPath('//li')->nodeName(), '->nodeName() returns the node name of the first element of the node list');
378 $this->createTestCrawler()->filterXPath('//ol')->nodeName();
379 $this->fail('->nodeName() throws an \InvalidArgumentException if the node list is empty');
380 } catch (\InvalidArgumentException $e) {
381 $this->assertTrue(true, '->nodeName() throws an \InvalidArgumentException if the node list is empty');
385 public function testText()
387 $this->assertEquals('One', $this->createTestCrawler()->filterXPath('//li')->text(), '->text() returns the node value of the first element of the node list');
390 $this->createTestCrawler()->filterXPath('//ol')->text();
391 $this->fail('->text() throws an \InvalidArgumentException if the node list is empty');
392 } catch (\InvalidArgumentException $e) {
393 $this->assertTrue(true, '->text() throws an \InvalidArgumentException if the node list is empty');
397 public function testHtml()
399 $this->assertEquals('<img alt="Bar">', $this->createTestCrawler()->filterXPath('//a[5]')->html());
400 $this->assertEquals('<input type="text" value="TextValue" name="TextName"><input type="submit" value="FooValue" name="FooName" id="FooId"><input type="button" value="BarValue" name="BarName" id="BarId"><button value="ButtonValue" name="ButtonName" id="ButtonId"></button>', trim(preg_replace('~>\s+<~', '><', $this->createTestCrawler()->filterXPath('//form[@id="FooFormId"]')->html())));
403 $this->createTestCrawler()->filterXPath('//ol')->html();
404 $this->fail('->html() throws an \InvalidArgumentException if the node list is empty');
405 } catch (\InvalidArgumentException $e) {
406 $this->assertTrue(true, '->html() throws an \InvalidArgumentException if the node list is empty');
410 public function testExtract()
412 $crawler = $this->createTestCrawler()->filterXPath('//ul[1]/li');
414 $this->assertEquals(array('One', 'Two', 'Three'), $crawler->extract('_text'), '->extract() returns an array of extracted data from the node list');
415 $this->assertEquals(array(array('One', 'first'), array('Two', ''), array('Three', '')), $crawler->extract(array('_text', 'class')), '->extract() returns an array of extracted data from the node list');
416 $this->assertEquals(array(array(), array(), array()), $crawler->extract(array()), '->extract() returns empty arrays if the attribute list is empty');
418 $this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->extract('_text'), '->extract() returns an empty array if the node list is empty');
421 public function testFilterXpathComplexQueries()
423 $crawler = $this->createTestCrawler()->filterXPath('//body');
425 $this->assertCount(0, $crawler->filterXPath('/input'));
426 $this->assertCount(0, $crawler->filterXPath('/body'));
427 $this->assertCount(1, $crawler->filterXPath('./body'));
428 $this->assertCount(1, $crawler->filterXPath('.//body'));
429 $this->assertCount(5, $crawler->filterXPath('.//input'));
430 $this->assertCount(4, $crawler->filterXPath('//form')->filterXPath('//button | //input'));
431 $this->assertCount(1, $crawler->filterXPath('body'));
432 $this->assertCount(6, $crawler->filterXPath('//button | //input'));
433 $this->assertCount(1, $crawler->filterXPath('//body'));
434 $this->assertCount(1, $crawler->filterXPath('descendant-or-self::body'));
435 $this->assertCount(1, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('./div'), 'A child selection finds only the current div');
436 $this->assertCount(3, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('descendant::div'), 'A descendant selector matches the current div and its child');
437 $this->assertCount(3, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('//div'), 'A descendant selector matches the current div and its child');
438 $this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
439 $this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
440 $this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
441 $this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]"));
444 public function testFilterXPath()
446 $crawler = $this->createTestCrawler();
447 $this->assertNotSame($crawler, $crawler->filterXPath('//li'), '->filterXPath() returns a new instance of a crawler');
448 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filterXPath() returns a new instance of a crawler');
450 $crawler = $this->createTestCrawler()->filterXPath('//ul');
451 $this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression');
453 $crawler = $this->createTestCrawler();
454 $this->assertCount(3, $crawler->filterXPath('//body')->filterXPath('//button')->parents(), '->filterXpath() preserves parents when chained');
457 public function testFilterRemovesDuplicates()
459 $crawler = $this->createTestCrawler()->filter('html, body')->filter('li');
460 $this->assertCount(6, $crawler, 'The crawler removes duplicates when filtering.');
463 public function testFilterXPathWithDefaultNamespace()
465 $crawler = $this->createTestXmlCrawler()->filterXPath('//default:entry/default:id');
466 $this->assertCount(1, $crawler, '->filterXPath() automatically registers a namespace');
467 $this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
470 public function testFilterXPathWithCustomDefaultNamespace()
472 $crawler = $this->createTestXmlCrawler();
473 $crawler->setDefaultNamespacePrefix('x');
474 $crawler = $crawler->filterXPath('//x:entry/x:id');
476 $this->assertCount(1, $crawler, '->filterXPath() lets to override the default namespace prefix');
477 $this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
480 public function testFilterXPathWithNamespace()
482 $crawler = $this->createTestXmlCrawler()->filterXPath('//yt:accessControl');
483 $this->assertCount(2, $crawler, '->filterXPath() automatically registers a namespace');
486 public function testFilterXPathWithMultipleNamespaces()
488 $crawler = $this->createTestXmlCrawler()->filterXPath('//media:group/yt:aspectRatio');
489 $this->assertCount(1, $crawler, '->filterXPath() automatically registers multiple namespaces');
490 $this->assertSame('widescreen', $crawler->text());
493 public function testFilterXPathWithManuallyRegisteredNamespace()
495 $crawler = $this->createTestXmlCrawler();
496 $crawler->registerNamespace('m', 'http://search.yahoo.com/mrss/');
498 $crawler = $crawler->filterXPath('//m:group/yt:aspectRatio');
499 $this->assertCount(1, $crawler, '->filterXPath() uses manually registered namespace');
500 $this->assertSame('widescreen', $crawler->text());
503 public function testFilterXPathWithAnUrl()
505 $crawler = $this->createTestXmlCrawler();
507 $crawler = $crawler->filterXPath('//media:category[@scheme="http://gdata.youtube.com/schemas/2007/categories.cat"]');
508 $this->assertCount(1, $crawler);
509 $this->assertSame('Music', $crawler->text());
512 public function testFilterXPathWithFakeRoot()
514 $crawler = $this->createTestCrawler();
515 $this->assertCount(0, $crawler->filterXPath('.'), '->filterXPath() returns an empty result if the XPath references the fake root node');
516 $this->assertCount(0, $crawler->filterXPath('self::*'), '->filterXPath() returns an empty result if the XPath references the fake root node');
517 $this->assertCount(0, $crawler->filterXPath('self::_root'), '->filterXPath() returns an empty result if the XPath references the fake root node');
520 public function testFilterXPathWithAncestorAxis()
522 $crawler = $this->createTestCrawler()->filterXPath('//form');
524 $this->assertCount(0, $crawler->filterXPath('ancestor::*'), 'The fake root node has no ancestor nodes');
527 public function testFilterXPathWithAncestorOrSelfAxis()
529 $crawler = $this->createTestCrawler()->filterXPath('//form');
531 $this->assertCount(0, $crawler->filterXPath('ancestor-or-self::*'), 'The fake root node has no ancestor nodes');
534 public function testFilterXPathWithAttributeAxis()
536 $crawler = $this->createTestCrawler()->filterXPath('//form');
538 $this->assertCount(0, $crawler->filterXPath('attribute::*'), 'The fake root node has no attribute nodes');
541 public function testFilterXPathWithAttributeAxisAfterElementAxis()
543 $this->assertCount(3, $this->createTestCrawler()->filterXPath('//form/button/attribute::*'), '->filterXPath() handles attribute axes properly when they are preceded by an element filtering axis');
546 public function testFilterXPathWithChildAxis()
548 $crawler = $this->createTestCrawler()->filterXPath('//div[@id="parent"]');
550 $this->assertCount(1, $crawler->filterXPath('child::div'), 'A child selection finds only the current div');
553 public function testFilterXPathWithFollowingAxis()
555 $crawler = $this->createTestCrawler()->filterXPath('//a');
557 $this->assertCount(0, $crawler->filterXPath('following::div'), 'The fake root node has no following nodes');
560 public function testFilterXPathWithFollowingSiblingAxis()
562 $crawler = $this->createTestCrawler()->filterXPath('//a');
564 $this->assertCount(0, $crawler->filterXPath('following-sibling::div'), 'The fake root node has no following nodes');
567 public function testFilterXPathWithNamespaceAxis()
569 $crawler = $this->createTestCrawler()->filterXPath('//button');
571 $this->assertCount(0, $crawler->filterXPath('namespace::*'), 'The fake root node has no namespace nodes');
574 public function testFilterXPathWithNamespaceAxisAfterElementAxis()
576 $crawler = $this->createTestCrawler()->filterXPath('//div[@id="parent"]/namespace::*');
578 $this->assertCount(0, $crawler->filterXPath('namespace::*'), 'Namespace axes cannot be requested');
581 public function testFilterXPathWithParentAxis()
583 $crawler = $this->createTestCrawler()->filterXPath('//button');
585 $this->assertCount(0, $crawler->filterXPath('parent::*'), 'The fake root node has no parent nodes');
588 public function testFilterXPathWithPrecedingAxis()
590 $crawler = $this->createTestCrawler()->filterXPath('//form');
592 $this->assertCount(0, $crawler->filterXPath('preceding::*'), 'The fake root node has no preceding nodes');
595 public function testFilterXPathWithPrecedingSiblingAxis()
597 $crawler = $this->createTestCrawler()->filterXPath('//form');
599 $this->assertCount(0, $crawler->filterXPath('preceding-sibling::*'), 'The fake root node has no preceding nodes');
602 public function testFilterXPathWithSelfAxes()
604 $crawler = $this->createTestCrawler()->filterXPath('//a');
606 $this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name');
607 $this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name');
608 $this->assertCount(10, $crawler->filterXPath('self::*/a'));
611 public function testFilter()
613 $crawler = $this->createTestCrawler();
614 $this->assertNotSame($crawler, $crawler->filter('li'), '->filter() returns a new instance of a crawler');
615 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filter() returns a new instance of a crawler');
617 $crawler = $this->createTestCrawler()->filter('ul');
619 $this->assertCount(6, $crawler->filter('li'), '->filter() filters the node list with the CSS selector');
622 public function testFilterWithDefaultNamespace()
624 $crawler = $this->createTestXmlCrawler()->filter('default|entry default|id');
625 $this->assertCount(1, $crawler, '->filter() automatically registers namespaces');
626 $this->assertSame('tag:youtube.com,2008:video:kgZRZmEc9j4', $crawler->text());
629 public function testFilterWithNamespace()
631 $crawler = $this->createTestXmlCrawler()->filter('yt|accessControl');
632 $this->assertCount(2, $crawler, '->filter() automatically registers namespaces');
635 public function testFilterWithMultipleNamespaces()
637 $crawler = $this->createTestXmlCrawler()->filter('media|group yt|aspectRatio');
638 $this->assertCount(1, $crawler, '->filter() automatically registers namespaces');
639 $this->assertSame('widescreen', $crawler->text());
642 public function testFilterWithDefaultNamespaceOnly()
644 $crawler = new Crawler('<?xml version="1.0" encoding="UTF-8"?>
645 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
647 <loc>http://localhost/foo</loc>
648 <changefreq>weekly</changefreq>
649 <priority>0.5</priority>
650 <lastmod>2012-11-16</lastmod>
653 <loc>http://localhost/bar</loc>
654 <changefreq>weekly</changefreq>
655 <priority>0.5</priority>
656 <lastmod>2012-11-16</lastmod>
661 $this->assertEquals(2, $crawler->filter('url')->count());
664 public function testSelectLink()
666 $crawler = $this->createTestCrawler();
667 $this->assertNotSame($crawler, $crawler->selectLink('Foo'), '->selectLink() returns a new instance of a crawler');
668 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->selectLink() returns a new instance of a crawler');
670 $this->assertCount(1, $crawler->selectLink('Fabien\'s Foo'), '->selectLink() selects links by the node values');
671 $this->assertCount(1, $crawler->selectLink('Fabien\'s Bar'), '->selectLink() selects links by the alt attribute of a clickable image');
673 $this->assertCount(2, $crawler->selectLink('Fabien"s Foo'), '->selectLink() selects links by the node values');
674 $this->assertCount(2, $crawler->selectLink('Fabien"s Bar'), '->selectLink() selects links by the alt attribute of a clickable image');
676 $this->assertCount(1, $crawler->selectLink('\' Fabien"s Foo'), '->selectLink() selects links by the node values');
677 $this->assertCount(1, $crawler->selectLink('\' Fabien"s Bar'), '->selectLink() selects links by the alt attribute of a clickable image');
679 $this->assertCount(4, $crawler->selectLink('Foo'), '->selectLink() selects links by the node values');
680 $this->assertCount(4, $crawler->selectLink('Bar'), '->selectLink() selects links by the node values');
683 public function testSelectImage()
685 $crawler = $this->createTestCrawler();
686 $this->assertNotSame($crawler, $crawler->selectImage('Bar'), '->selectImage() returns a new instance of a crawler');
687 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->selectImage() returns a new instance of a crawler');
689 $this->assertCount(1, $crawler->selectImage('Fabien\'s Bar'), '->selectImage() selects images by alt attribute');
690 $this->assertCount(2, $crawler->selectImage('Fabien"s Bar'), '->selectImage() selects images by alt attribute');
691 $this->assertCount(1, $crawler->selectImage('\' Fabien"s Bar'), '->selectImage() selects images by alt attribute');
694 public function testSelectButton()
696 $crawler = $this->createTestCrawler();
697 $this->assertNotSame($crawler, $crawler->selectButton('FooValue'), '->selectButton() returns a new instance of a crawler');
698 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->selectButton() returns a new instance of a crawler');
700 $this->assertEquals(1, $crawler->selectButton('FooValue')->count(), '->selectButton() selects buttons');
701 $this->assertEquals(1, $crawler->selectButton('FooName')->count(), '->selectButton() selects buttons');
702 $this->assertEquals(1, $crawler->selectButton('FooId')->count(), '->selectButton() selects buttons');
704 $this->assertEquals(1, $crawler->selectButton('BarValue')->count(), '->selectButton() selects buttons');
705 $this->assertEquals(1, $crawler->selectButton('BarName')->count(), '->selectButton() selects buttons');
706 $this->assertEquals(1, $crawler->selectButton('BarId')->count(), '->selectButton() selects buttons');
708 $this->assertEquals(1, $crawler->selectButton('FooBarValue')->count(), '->selectButton() selects buttons with form attribute too');
709 $this->assertEquals(1, $crawler->selectButton('FooBarName')->count(), '->selectButton() selects buttons with form attribute too');
712 public function testSelectButtonWithSingleQuotesInNameAttribute()
719 <a href="/index.php?r=site/login">Login</a>
721 <form id="login-form" action="/index.php?r=site/login" method="post">
722 <button type="submit" name="Click 'Here'">Submit</button>
728 $crawler = new Crawler($html);
730 $this->assertCount(1, $crawler->selectButton('Click \'Here\''));
733 public function testSelectButtonWithDoubleQuotesInNameAttribute()
740 <a href="/index.php?r=site/login">Login</a>
742 <form id="login-form" action="/index.php?r=site/login" method="post">
743 <button type="submit" name='Click "Here"'>Submit</button>
749 $crawler = new Crawler($html);
751 $this->assertCount(1, $crawler->selectButton('Click "Here"'));
754 public function testLink()
756 $crawler = $this->createTestCrawler('http://example.com/bar/')->selectLink('Foo');
757 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Link', $crawler->link(), '->link() returns a Link instance');
759 $this->assertEquals('POST', $crawler->link('post')->getMethod(), '->link() takes a method as its argument');
761 $crawler = $this->createTestCrawler('http://example.com/bar')->selectLink('GetLink');
762 $this->assertEquals('http://example.com/bar?get=param', $crawler->link()->getUri(), '->link() returns a Link instance');
765 $this->createTestCrawler()->filterXPath('//ol')->link();
766 $this->fail('->link() throws an \InvalidArgumentException if the node list is empty');
767 } catch (\InvalidArgumentException $e) {
768 $this->assertTrue(true, '->link() throws an \InvalidArgumentException if the node list is empty');
773 * @expectedException \InvalidArgumentException
774 * @expectedExceptionMessage The selected node should be instance of DOMElement
776 public function testInvalidLink()
778 $crawler = $this->createTestCrawler('http://example.com/bar/');
779 $crawler->filterXPath('//li/text()')->link();
783 * @expectedException \InvalidArgumentException
784 * @expectedExceptionMessage The selected node should be instance of DOMElement
786 public function testInvalidLinks()
788 $crawler = $this->createTestCrawler('http://example.com/bar/');
789 $crawler->filterXPath('//li/text()')->link();
792 public function testImage()
794 $crawler = $this->createTestCrawler('http://example.com/bar/')->selectImage('Bar');
795 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Image', $crawler->image(), '->image() returns an Image instance');
798 $this->createTestCrawler()->filterXPath('//ol')->image();
799 $this->fail('->image() throws an \InvalidArgumentException if the node list is empty');
800 } catch (\InvalidArgumentException $e) {
801 $this->assertTrue(true, '->image() throws an \InvalidArgumentException if the node list is empty');
805 public function testSelectLinkAndLinkFiltered()
812 <a href="/index.php?r=site/login">Login</a>
814 <form id="login-form" action="/index.php?r=site/login" method="post">
815 <button type="submit">Submit</button>
821 $crawler = new Crawler($html);
822 $filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'login-form']");
824 $this->assertCount(0, $filtered->selectLink('Login'));
825 $this->assertCount(1, $filtered->selectButton('Submit'));
827 $filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'action']");
829 $this->assertCount(1, $filtered->selectLink('Login'));
830 $this->assertCount(0, $filtered->selectButton('Submit'));
832 $this->assertCount(1, $crawler->selectLink('Login')->selectLink('Login'));
833 $this->assertCount(1, $crawler->selectButton('Submit')->selectButton('Submit'));
836 public function testChaining()
838 $crawler = new Crawler('<div name="a"><div name="b"><div name="c"></div></div></div>');
840 $this->assertEquals('a', $crawler->filterXPath('//div')->filterXPath('div')->filterXPath('div')->attr('name'));
843 public function testLinks()
845 $crawler = $this->createTestCrawler('http://example.com/bar/')->selectLink('Foo');
846 $this->assertInternalType('array', $crawler->links(), '->links() returns an array');
848 $this->assertCount(4, $crawler->links(), '->links() returns an array');
849 $links = $crawler->links();
850 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Link', $links[0], '->links() returns an array of Link instances');
852 $this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->links(), '->links() returns an empty array if the node selection is empty');
855 public function testImages()
857 $crawler = $this->createTestCrawler('http://example.com/bar/')->selectImage('Bar');
858 $this->assertInternalType('array', $crawler->images(), '->images() returns an array');
860 $this->assertCount(4, $crawler->images(), '->images() returns an array');
861 $images = $crawler->images();
862 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Image', $images[0], '->images() returns an array of Image instances');
864 $this->assertEquals(array(), $this->createTestCrawler()->filterXPath('//ol')->links(), '->links() returns an empty array if the node selection is empty');
867 public function testForm()
869 $testCrawler = $this->createTestCrawler('http://example.com/bar/');
870 $crawler = $testCrawler->selectButton('FooValue');
871 $crawler2 = $testCrawler->selectButton('FooBarValue');
872 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Form', $crawler->form(), '->form() returns a Form instance');
873 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Form', $crawler2->form(), '->form() returns a Form instance');
875 $this->assertEquals($crawler->form()->getFormNode()->getAttribute('id'), $crawler2->form()->getFormNode()->getAttribute('id'), '->form() works on elements with form attribute');
877 $this->assertEquals(array('FooName' => 'FooBar', 'TextName' => 'TextValue', 'FooTextName' => 'FooTextValue'), $crawler->form(array('FooName' => 'FooBar'))->getValues(), '->form() takes an array of values to submit as its first argument');
878 $this->assertEquals(array('FooName' => 'FooValue', 'TextName' => 'TextValue', 'FooTextName' => 'FooTextValue'), $crawler->form()->getValues(), '->getValues() returns correct form values');
879 $this->assertEquals(array('FooBarName' => 'FooBarValue', 'TextName' => 'TextValue', 'FooTextName' => 'FooTextValue'), $crawler2->form()->getValues(), '->getValues() returns correct form values');
882 $this->createTestCrawler()->filterXPath('//ol')->form();
883 $this->fail('->form() throws an \InvalidArgumentException if the node list is empty');
884 } catch (\InvalidArgumentException $e) {
885 $this->assertTrue(true, '->form() throws an \InvalidArgumentException if the node list is empty');
890 * @expectedException \InvalidArgumentException
891 * @expectedExceptionMessage The selected node should be instance of DOMElement
893 public function testInvalidForm()
895 $crawler = $this->createTestCrawler('http://example.com/bar/');
896 $crawler->filterXPath('//li/text()')->form();
899 public function testLast()
901 $crawler = $this->createTestCrawler()->filterXPath('//ul[1]/li');
902 $this->assertNotSame($crawler, $crawler->last(), '->last() returns a new instance of a crawler');
903 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->last() returns a new instance of a crawler');
905 $this->assertEquals('Three', $crawler->last()->text());
908 public function testFirst()
910 $crawler = $this->createTestCrawler()->filterXPath('//li');
911 $this->assertNotSame($crawler, $crawler->first(), '->first() returns a new instance of a crawler');
912 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->first() returns a new instance of a crawler');
914 $this->assertEquals('One', $crawler->first()->text());
917 public function testSiblings()
919 $crawler = $this->createTestCrawler()->filterXPath('//li')->eq(1);
920 $this->assertNotSame($crawler, $crawler->siblings(), '->siblings() returns a new instance of a crawler');
921 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->siblings() returns a new instance of a crawler');
923 $nodes = $crawler->siblings();
924 $this->assertEquals(2, $nodes->count());
925 $this->assertEquals('One', $nodes->eq(0)->text());
926 $this->assertEquals('Three', $nodes->eq(1)->text());
928 $nodes = $this->createTestCrawler()->filterXPath('//li')->eq(0)->siblings();
929 $this->assertEquals(2, $nodes->count());
930 $this->assertEquals('Two', $nodes->eq(0)->text());
931 $this->assertEquals('Three', $nodes->eq(1)->text());
934 $this->createTestCrawler()->filterXPath('//ol')->siblings();
935 $this->fail('->siblings() throws an \InvalidArgumentException if the node list is empty');
936 } catch (\InvalidArgumentException $e) {
937 $this->assertTrue(true, '->siblings() throws an \InvalidArgumentException if the node list is empty');
941 public function testNextAll()
943 $crawler = $this->createTestCrawler()->filterXPath('//li')->eq(1);
944 $this->assertNotSame($crawler, $crawler->nextAll(), '->nextAll() returns a new instance of a crawler');
945 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->nextAll() returns a new instance of a crawler');
947 $nodes = $crawler->nextAll();
948 $this->assertEquals(1, $nodes->count());
949 $this->assertEquals('Three', $nodes->eq(0)->text());
952 $this->createTestCrawler()->filterXPath('//ol')->nextAll();
953 $this->fail('->nextAll() throws an \InvalidArgumentException if the node list is empty');
954 } catch (\InvalidArgumentException $e) {
955 $this->assertTrue(true, '->nextAll() throws an \InvalidArgumentException if the node list is empty');
959 public function testPreviousAll()
961 $crawler = $this->createTestCrawler()->filterXPath('//li')->eq(2);
962 $this->assertNotSame($crawler, $crawler->previousAll(), '->previousAll() returns a new instance of a crawler');
963 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->previousAll() returns a new instance of a crawler');
965 $nodes = $crawler->previousAll();
966 $this->assertEquals(2, $nodes->count());
967 $this->assertEquals('Two', $nodes->eq(0)->text());
970 $this->createTestCrawler()->filterXPath('//ol')->previousAll();
971 $this->fail('->previousAll() throws an \InvalidArgumentException if the node list is empty');
972 } catch (\InvalidArgumentException $e) {
973 $this->assertTrue(true, '->previousAll() throws an \InvalidArgumentException if the node list is empty');
977 public function testChildren()
979 $crawler = $this->createTestCrawler()->filterXPath('//ul');
980 $this->assertNotSame($crawler, $crawler->children(), '->children() returns a new instance of a crawler');
981 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->children() returns a new instance of a crawler');
983 $nodes = $crawler->children();
984 $this->assertEquals(3, $nodes->count());
985 $this->assertEquals('One', $nodes->eq(0)->text());
986 $this->assertEquals('Two', $nodes->eq(1)->text());
987 $this->assertEquals('Three', $nodes->eq(2)->text());
990 $this->createTestCrawler()->filterXPath('//ol')->children();
991 $this->fail('->children() throws an \InvalidArgumentException if the node list is empty');
992 } catch (\InvalidArgumentException $e) {
993 $this->assertTrue(true, '->children() throws an \InvalidArgumentException if the node list is empty');
997 $crawler = new Crawler('<p></p>');
998 $crawler->filter('p')->children();
999 $this->assertTrue(true, '->children() does not trigger a notice if the node has no children');
1000 } catch (\PHPUnit\Framework\Error\Notice $e) {
1001 $this->fail('->children() does not trigger a notice if the node has no children');
1002 } catch (\PHPUnit_Framework_Error_Notice $e) {
1003 $this->fail('->children() does not trigger a notice if the node has no children');
1007 public function testParents()
1009 $crawler = $this->createTestCrawler()->filterXPath('//li[1]');
1010 $this->assertNotSame($crawler, $crawler->parents(), '->parents() returns a new instance of a crawler');
1011 $this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->parents() returns a new instance of a crawler');
1013 $nodes = $crawler->parents();
1014 $this->assertEquals(3, $nodes->count());
1016 $nodes = $this->createTestCrawler()->filterXPath('//html')->parents();
1017 $this->assertEquals(0, $nodes->count());
1020 $this->createTestCrawler()->filterXPath('//ol')->parents();
1021 $this->fail('->parents() throws an \InvalidArgumentException if the node list is empty');
1022 } catch (\InvalidArgumentException $e) {
1023 $this->assertTrue(true, '->parents() throws an \InvalidArgumentException if the node list is empty');
1028 * @dataProvider getBaseTagData
1030 public function testBaseTag($baseValue, $linkValue, $expectedUri, $currentUri = null, $description = '')
1032 $crawler = new Crawler('<html><base href="'.$baseValue.'"><a href="'.$linkValue.'"></a></html>', $currentUri);
1033 $this->assertEquals($expectedUri, $crawler->filterXPath('//a')->link()->getUri(), $description);
1036 public function getBaseTagData()
1039 array('http://base.com', 'link', 'http://base.com/link'),
1040 array('//base.com', 'link', 'https://base.com/link', 'https://domain.com', '<base> tag can use a schema-less URL'),
1041 array('path/', 'link', 'https://domain.com/path/link', 'https://domain.com', '<base> tag can set a path'),
1042 array('http://base.com', '#', 'http://base.com#', 'http://domain.com/path/link', '<base> tag does work with links to an anchor'),
1043 array('http://base.com', '', 'http://base.com', 'http://domain.com/path/link', '<base> tag does work with empty links'),
1048 * @dataProvider getBaseTagWithFormData
1050 public function testBaseTagWithForm($baseValue, $actionValue, $expectedUri, $currentUri = null, $description = null)
1052 $crawler = new Crawler('<html><base href="'.$baseValue.'"><form method="post" action="'.$actionValue.'"><button type="submit" name="submit"/></form></html>', $currentUri);
1053 $this->assertEquals($expectedUri, $crawler->filterXPath('//button')->form()->getUri(), $description);
1056 public function getBaseTagWithFormData()
1059 array('https://base.com/', 'link/', 'https://base.com/link/', 'https://base.com/link/', '<base> tag does work with a path and relative form action'),
1060 array('/basepath', '/registration', 'http://domain.com/registration', 'http://domain.com/registration', '<base> tag does work with a path and form action'),
1061 array('/basepath', '', 'http://domain.com/registration', 'http://domain.com/registration', '<base> tag does work with a path and empty form action'),
1062 array('http://base.com/', '/registration', 'http://base.com/registration', 'http://domain.com/registration', '<base> tag does work with a URL and form action'),
1063 array('http://base.com', '', 'http://domain.com/path/form', 'http://domain.com/path/form', '<base> tag does work with a URL and an empty form action'),
1064 array('http://base.com/path', '/registration', 'http://base.com/registration', 'http://domain.com/path/form', '<base> tag does work with a URL and form action'),
1068 public function testCountOfNestedElements()
1070 $crawler = new Crawler('<html><body><ul><li>List item 1<ul><li>Sublist item 1</li><li>Sublist item 2</ul></li></ul></body></html>');
1072 $this->assertCount(1, $crawler->filter('li:contains("List item 1")'));
1075 public function testEvaluateReturnsTypedResultOfXPathExpressionOnADocumentSubset()
1077 $crawler = $this->createTestCrawler();
1079 $result = $crawler->filterXPath('//form/input')->evaluate('substring-before(@name, "Name")');
1081 $this->assertSame(array('Text', 'Foo', 'Bar'), $result);
1084 public function testEvaluateReturnsTypedResultOfNamespacedXPathExpressionOnADocumentSubset()
1086 $crawler = $this->createTestXmlCrawler();
1088 $result = $crawler->filterXPath('//yt:accessControl/@action')->evaluate('string(.)');
1090 $this->assertSame(array('comment', 'videoRespond'), $result);
1093 public function testEvaluateReturnsTypedResultOfNamespacedXPathExpression()
1095 $crawler = $this->createTestXmlCrawler();
1096 $crawler->registerNamespace('youtube', 'http://gdata.youtube.com/schemas/2007');
1098 $result = $crawler->evaluate('string(//youtube:accessControl/@action)');
1100 $this->assertSame(array('comment'), $result);
1103 public function testEvaluateReturnsACrawlerIfXPathExpressionEvaluatesToANode()
1105 $crawler = $this->createTestCrawler()->evaluate('//form/input[1]');
1107 $this->assertInstanceOf(Crawler::class, $crawler);
1108 $this->assertCount(1, $crawler);
1109 $this->assertSame('input', $crawler->first()->nodeName());
1113 * @expectedException \LogicException
1115 public function testEvaluateThrowsAnExceptionIfDocumentIsEmpty()
1117 (new Crawler())->evaluate('//form/input[1]');
1120 public function createTestCrawler($uri = null)
1122 $dom = new \DOMDocument();
1126 <a href="foo">Foo</a>
1127 <a href="/foo"> Fabien\'s Foo </a>
1128 <a href="/foo">Fabien"s Foo</a>
1129 <a href="/foo">\' Fabien"s Foo</a>
1131 <a href="/bar"><img alt="Bar"/></a>
1132 <a href="/bar"><img alt=" Fabien\'s Bar "/></a>
1133 <a href="/bar"><img alt="Fabien"s Bar"/></a>
1134 <a href="/bar"><img alt="\' Fabien"s Bar"/></a>
1136 <a href="?get=param">GetLink</a>
1138 <a href="/example">Klausi|Claudiu</a>
1140 <form action="foo" id="FooFormId">
1141 <input type="text" value="TextValue" name="TextName" />
1142 <input type="submit" value="FooValue" name="FooName" id="FooId" />
1143 <input type="button" value="BarValue" name="BarName" id="BarId" />
1144 <button value="ButtonValue" name="ButtonName" id="ButtonId" />
1147 <input type="submit" value="FooBarValue" name="FooBarName" form="FooFormId" />
1148 <input type="text" value="FooTextValue" name="FooTextName" form="FooFormId" />
1151 <li class="first">One</li>
1161 <div id="child"></div>
1162 <div id="child2" xmlns:foo="http://example.com"></div>
1164 <div id="sibling"><img /></div>
1169 return new Crawler($dom, $uri);
1172 protected function createTestXmlCrawler($uri = null)
1174 $xml = '<?xml version="1.0" encoding="UTF-8"?>
1175 <entry xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xmlns:yt="http://gdata.youtube.com/schemas/2007">
1176 <id>tag:youtube.com,2008:video:kgZRZmEc9j4</id>
1177 <yt:accessControl action="comment" permission="allowed"/>
1178 <yt:accessControl action="videoRespond" permission="moderated"/>
1180 <media:title type="plain">Chordates - CrashCourse Biology #24</media:title>
1181 <yt:aspectRatio>widescreen</yt:aspectRatio>
1183 <media:category label="Music" scheme="http://gdata.youtube.com/schemas/2007/categories.cat">Music</media:category>
1186 return new Crawler($xml, $uri);
1189 protected function createDomDocument()
1191 $dom = new \DOMDocument();
1192 $dom->loadXML('<html><div class="foo"></div></html>');
1197 protected function createNodeList()
1199 $dom = new \DOMDocument();
1200 $dom->loadXML('<html><div class="foo"></div></html>');
1201 $domxpath = new \DOMXPath($dom);
1203 return $domxpath->query('//div');