2 namespace Masterminds\HTML5\Parser;
7 * This scans over an input stream.
12 const CHARS_HEX = 'abcdefABCDEF01234567890';
14 const CHARS_ALNUM = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890';
16 const CHARS_ALPHA = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
20 // Flipping this to true will give minisculely more debugging info.
21 public $debug = false;
24 * Create a new Scanner.
26 * @param \Masterminds\HTML5\Parser\InputStream $input
27 * An InputStream to be scanned.
29 public function __construct($input)
35 * Get the current position.
37 * @return int The current intiger byte position.
39 public function position()
41 return $this->is->key();
45 * Take a peek at the next character in the data.
47 * @return string The next character.
49 public function peek()
51 return $this->is->peek();
55 * Get the next character.
57 * Note: This advances the pointer.
59 * @return string The next character.
61 public function next()
64 if ($this->is->valid()) {
66 fprintf(STDOUT, "> %s\n", $this->is->current());
67 return $this->is->current();
74 * Get the current character.
76 * Note, this does not advance the pointer.
78 * @return string The current character.
80 public function current()
82 if ($this->is->valid()) {
83 return $this->is->current();
90 * Silently consume N chars.
92 public function consume($count = 1)
94 for ($i = 0; $i < $count; ++ $i) {
100 * Unconsume some of the data.
101 * This moves the data pointer backwards.
103 * @param int $howMany
104 * The number of characters to move the pointer back.
106 public function unconsume($howMany = 1)
108 $this->is->unconsume($howMany);
112 * Get the next group of that contains hex characters.
114 * Note, along with getting the characters the pointer in the data will be
117 * @return string The next group that is hex characters.
119 public function getHex()
121 return $this->is->charsWhile(static::CHARS_HEX);
125 * Get the next group of characters that are ASCII Alpha characters.
127 * Note, along with getting the characters the pointer in the data will be
130 * @return string The next group of ASCII alpha characters.
132 public function getAsciiAlpha()
134 return $this->is->charsWhile(static::CHARS_ALPHA);
138 * Get the next group of characters that are ASCII Alpha characters and numbers.
140 * Note, along with getting the characters the pointer in the data will be
143 * @return string The next group of ASCII alpha characters and numbers.
145 public function getAsciiAlphaNum()
147 return $this->is->charsWhile(static::CHARS_ALNUM);
151 * Get the next group of numbers.
153 * Note, along with getting the characters the pointer in the data will be
156 * @return string The next group of numbers.
158 public function getNumeric()
160 return $this->is->charsWhile('0123456789');
164 * Consume whitespace.
166 * Whitespace in HTML5 is: formfeed, tab, newline, space.
168 public function whitespace()
170 return $this->is->charsWhile("\n\t\f ");
174 * Returns the current line that is being consumed.
176 * @return int The current line number.
178 public function currentLine()
180 return $this->is->currentLine();
184 * Read chars until something in the mask is encountered.
186 public function charsUntil($mask)
188 return $this->is->charsUntil($mask);
192 * Read chars as long as the mask matches.
194 public function charsWhile($mask)
196 return $this->is->charsWhile($mask);
200 * Returns the current column of the current line that the tokenizer is at.
202 * Newlines are column 0. The first char after a newline is column 1.
204 * @return int The column number.
206 public function columnOffset()
208 return $this->is->columnOffset();
212 * Get all characters until EOF.
214 * This consumes characters until the EOF.
216 * @return int The number of characters remaining.
218 public function remainingChars()
220 return $this->is->remainingChars();