3 namespace Drupal\Component\Gettext;
5 use Drupal\Component\Render\FormattableMarkup;
8 * Implements Gettext PO stream reader.
10 * The PO file format parsing is implemented according to the documentation at
11 * http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files
13 class PoStreamReader implements PoStreamInterface, PoReaderInterface {
16 * Source line number of the stream being parsed.
20 protected $lineNumber = 0;
23 * Parser context for the stream reader state machine.
25 * Possible contexts are:
28 * - 'MSGID_PLURAL' (msgid_plural)
29 * - 'MSGCTXT' (msgctxt)
30 * - 'MSGSTR' (msgstr or msgstr[])
31 * - 'MSGSTR_ARR' (msgstr_arg)
35 protected $context = 'COMMENT';
38 * Current entry being read. Incomplete.
42 protected $currentItem = [];
45 * Current plural index for plural translations.
49 protected $currentPluralIndex = 0;
52 * URI of the PO stream that is being read.
59 * Language code for the PO stream being read.
63 protected $langcode = NULL;
66 * File handle of the current PO stream.
73 * The PO stream header.
75 * @var \Drupal\Component\Gettext\PoHeader
80 * Object wrapper for the last read source/translation pair.
82 * @var \Drupal\Component\Gettext\PoItem
87 * Indicator of whether the stream reading is finished.
94 * Array of translated error strings recorded on reading this stream so far.
103 public function getLangcode() {
104 return $this->langcode;
110 public function setLangcode($langcode) {
111 $this->langcode = $langcode;
117 public function getHeader() {
118 return $this->header;
122 * Implements Drupal\Component\Gettext\PoMetadataInterface::setHeader().
124 * Not applicable to stream reading and therefore not implemented.
126 public function setHeader(PoHeader $header) {
132 public function getURI() {
139 public function setURI($uri) {
144 * Implements Drupal\Component\Gettext\PoStreamInterface::open().
146 * Opens the stream and reads the header. The stream is ready for reading
150 * If the URI is not yet set.
152 public function open() {
153 if (!empty($this->uri)) {
154 $this->fd = fopen($this->uri, 'rb');
158 throw new \Exception('Cannot open stream without URI set.');
163 * Implements Drupal\Component\Gettext\PoStreamInterface::close().
166 * If the stream is not open.
168 public function close() {
173 throw new \Exception('Cannot close stream that is not open.');
180 public function readItem() {
181 // Clear out the last item.
182 $this->lastItem = NULL;
184 // Read until finished with the stream or a complete item was identified.
185 while (!$this->finished && is_null($this->lastItem)) {
189 return $this->lastItem;
193 * Sets the seek position for the current PO stream.
196 * The new seek position to set.
198 public function setSeek($seek) {
199 fseek($this->fd, $seek);
203 * Gets the pointer position of the current PO stream.
205 public function getSeek() {
206 return ftell($this->fd);
210 * Read the header from the PO stream.
212 * The header is a special case PoItem, using the empty string as source and
213 * key-value pairs as translation. We just reuse the item reader logic to
216 private function readHeader() {
217 $item = $this->readItem();
218 // Handle the case properly when the .po file is empty (0 bytes).
222 $header = new PoHeader();
223 $header->setFromString(trim($item->getTranslation()));
224 $this->header = $header;
228 * Reads a line from the PO stream and stores data internally.
230 * Expands $this->current_item based on new data for the current item. If
231 * this line ends the current item, it is saved with setItemFromArray() with
232 * data from $this->current_item.
234 * An internal state machine is maintained in this reader using
235 * $this->context as the reading state. PO items are in between COMMENT
236 * states (when items have at least one line or comment in between them) or
237 * indicated by MSGSTR or MSGSTR_ARR followed immediately by an MSGID or
238 * MSGCTXT (when items closely follow each other).
241 * FALSE if an error was logged, NULL otherwise. The errors are considered
242 * non-blocking, so reading can continue, while the errors are collected
243 * for later presentation.
245 private function readLine() {
246 // Read a line and set the stream finished indicator if it was not
248 $line = fgets($this->fd);
249 $this->finished = ($line === FALSE);
251 if (!$this->finished) {
253 if ($this->lineNumber == 0) {
254 // The first line might come with a UTF-8 BOM, which should be removed.
255 $line = str_replace("\xEF\xBB\xBF", '', $line);
256 // Current plurality for 'msgstr[]'.
257 $this->currentPluralIndex = 0;
260 // Track the line number for error reporting.
263 // Initialize common values for error logging.
265 '%uri' => $this->getURI(),
266 '%line' => $this->lineNumber,
269 // Trim away the linefeed. \\n might appear at the end of the string if
270 // another line continuing the same string follows. We can remove that.
271 $line = trim(strtr($line, ["\\\n" => ""]));
273 if (!strncmp('#', $line, 1)) {
274 // Lines starting with '#' are comments.
276 if ($this->context == 'COMMENT') {
277 // Already in comment context, add to current comment.
278 $this->currentItem['#'][] = substr($line, 1);
280 elseif (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
281 // We are currently in string context, save current item.
282 $this->setItemFromArray($this->currentItem);
284 // Start a new entry for the comment.
285 $this->currentItem = [];
286 $this->currentItem['#'][] = substr($line, 1);
288 $this->context = 'COMMENT';
292 // A comment following any other context is a syntax error.
293 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgstr" was expected but not found on line %line.', $log_vars);
298 elseif (!strncmp('msgid_plural', $line, 12)) {
299 // A plural form for the current source string.
301 if ($this->context != 'MSGID') {
302 // A plural form can only be added to an msgid directly.
303 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgid_plural" was expected but not found on line %line.', $log_vars);
307 // Remove 'msgid_plural' and trim away whitespace.
308 $line = trim(substr($line, 12));
310 // Only the plural source string is left, parse it.
311 $quoted = $this->parseQuoted($line);
312 if ($quoted === FALSE) {
313 // The plural form must be wrapped in quotes.
314 $this->errors[] = new FormattableMarkup('The translation stream %uri contains a syntax error on line %line.', $log_vars);
318 // Append the plural source to the current entry.
319 if (is_string($this->currentItem['msgid'])) {
320 // The first value was stored as string. Now we know the context is
321 // plural, it is converted to array.
322 $this->currentItem['msgid'] = [$this->currentItem['msgid']];
324 $this->currentItem['msgid'][] = $quoted;
326 $this->context = 'MSGID_PLURAL';
329 elseif (!strncmp('msgid', $line, 5)) {
330 // Starting a new message.
332 if (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
333 // We are currently in string context, save current item.
334 $this->setItemFromArray($this->currentItem);
336 // Start a new context for the msgid.
337 $this->currentItem = [];
339 elseif ($this->context == 'MSGID') {
340 // We are currently already in the context, meaning we passed an id with no data.
341 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgid" is unexpected on line %line.', $log_vars);
345 // Remove 'msgid' and trim away whitespace.
346 $line = trim(substr($line, 5));
348 // Only the message id string is left, parse it.
349 $quoted = $this->parseQuoted($line);
350 if ($quoted === FALSE) {
351 // The message id must be wrapped in quotes.
352 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgid" on line %line.', $log_vars, $log_vars);
356 $this->currentItem['msgid'] = $quoted;
357 $this->context = 'MSGID';
360 elseif (!strncmp('msgctxt', $line, 7)) {
361 // Starting a new context.
363 if (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
364 // We are currently in string context, save current item.
365 $this->setItemFromArray($this->currentItem);
366 $this->currentItem = [];
368 elseif (!empty($this->currentItem['msgctxt'])) {
369 // A context cannot apply to another context.
370 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgctxt" is unexpected on line %line.', $log_vars);
374 // Remove 'msgctxt' and trim away whitespaces.
375 $line = trim(substr($line, 7));
377 // Only the msgctxt string is left, parse it.
378 $quoted = $this->parseQuoted($line);
379 if ($quoted === FALSE) {
380 // The context string must be quoted.
381 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgctxt" on line %line.', $log_vars);
385 $this->currentItem['msgctxt'] = $quoted;
387 $this->context = 'MSGCTXT';
390 elseif (!strncmp('msgstr[', $line, 7)) {
391 // A message string for a specific plurality.
393 if (($this->context != 'MSGID') &&
394 ($this->context != 'MSGCTXT') &&
395 ($this->context != 'MSGID_PLURAL') &&
396 ($this->context != 'MSGSTR_ARR')) {
397 // Plural message strings must come after msgid, msgctxt,
398 // msgid_plural, or other msgstr[] entries.
399 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgstr[]" is unexpected on line %line.', $log_vars);
403 // Ensure the plurality is terminated.
404 if (strpos($line, ']') === FALSE) {
405 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
409 // Extract the plurality.
410 $frombracket = strstr($line, '[');
411 $this->currentPluralIndex = substr($frombracket, 1, strpos($frombracket, ']') - 1);
413 // Skip to the next whitespace and trim away any further whitespace,
414 // bringing $line to the message text only.
415 $line = trim(strstr($line, " "));
417 $quoted = $this->parseQuoted($line);
418 if ($quoted === FALSE) {
419 // The string must be quoted.
420 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
423 if (!isset($this->currentItem['msgstr']) || !is_array($this->currentItem['msgstr'])) {
424 $this->currentItem['msgstr'] = [];
427 $this->currentItem['msgstr'][$this->currentPluralIndex] = $quoted;
429 $this->context = 'MSGSTR_ARR';
432 elseif (!strncmp("msgstr", $line, 6)) {
433 // A string pair for an msgid (with optional context).
435 if (($this->context != 'MSGID') && ($this->context != 'MSGCTXT')) {
436 // Strings are only valid within an id or context scope.
437 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgstr" is unexpected on line %line.', $log_vars);
441 // Remove 'msgstr' and trim away away whitespaces.
442 $line = trim(substr($line, 6));
444 // Only the msgstr string is left, parse it.
445 $quoted = $this->parseQuoted($line);
446 if ($quoted === FALSE) {
447 // The string must be quoted.
448 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgstr" on line %line.', $log_vars);
452 $this->currentItem['msgstr'] = $quoted;
454 $this->context = 'MSGSTR';
457 elseif ($line != '') {
458 // Anything that is not a token may be a continuation of a previous token.
460 $quoted = $this->parseQuoted($line);
461 if ($quoted === FALSE) {
462 // This string must be quoted.
463 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: string continuation expected on line %line.', $log_vars);
467 // Append the string to the current item.
468 if (($this->context == 'MSGID') || ($this->context == 'MSGID_PLURAL')) {
469 if (is_array($this->currentItem['msgid'])) {
470 // Add string to last array element for plural sources.
471 $last_index = count($this->currentItem['msgid']) - 1;
472 $this->currentItem['msgid'][$last_index] .= $quoted;
475 // Singular source, just append the string.
476 $this->currentItem['msgid'] .= $quoted;
479 elseif ($this->context == 'MSGCTXT') {
480 // Multiline context name.
481 $this->currentItem['msgctxt'] .= $quoted;
483 elseif ($this->context == 'MSGSTR') {
484 // Multiline translation string.
485 $this->currentItem['msgstr'] .= $quoted;
487 elseif ($this->context == 'MSGSTR_ARR') {
488 // Multiline plural translation string.
489 $this->currentItem['msgstr'][$this->currentPluralIndex] .= $quoted;
492 // No valid context to append to.
493 $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: unexpected string on line %line.', $log_vars);
500 // Empty line read or EOF of PO stream, close out the last entry.
501 if (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
502 $this->setItemFromArray($this->currentItem);
503 $this->currentItem = [];
505 elseif ($this->context != 'COMMENT') {
506 $this->errors[] = new FormattableMarkup('The translation stream %uri ended unexpectedly at line %line.', $log_vars);
514 * Store the parsed values as a PoItem object.
516 public function setItemFromArray($value) {
520 if (isset($value['#'])) {
521 $comments = $this->shortenComments($value['#']);
524 if (is_array($value['msgstr'])) {
525 // Sort plural variants by their form index.
526 ksort($value['msgstr']);
530 $item = new PoItem();
531 $item->setContext(isset($value['msgctxt']) ? $value['msgctxt'] : '');
532 $item->setSource($value['msgid']);
533 $item->setTranslation($value['msgstr']);
534 $item->setPlural($plural);
535 $item->setComment($comments);
536 $item->setLangcode($this->langcode);
538 $this->lastItem = $item;
540 $this->context = 'COMMENT';
544 * Parses a string in quotes.
547 * A string specified with enclosing quotes.
550 * The string parsed from inside the quotes.
552 public function parseQuoted($string) {
553 if (substr($string, 0, 1) != substr($string, -1, 1)) {
554 // Start and end quotes must be the same.
557 $quote = substr($string, 0, 1);
558 $string = substr($string, 1, -1);
560 // Double quotes: strip slashes.
561 return stripcslashes($string);
563 elseif ($quote == "'") {
564 // Simple quote: return as-is.
568 // Unrecognized quote.
574 * Generates a short, one-string version of the passed comment array.
577 * An array of strings containing a comment.
580 * Short one-string version of the comment.
582 private function shortenComments($comment) {
584 while (count($comment)) {
585 $test = $comm . substr(array_shift($comment), 1) . ', ';
586 if (strlen($comm) < 130) {
593 return trim(substr($comm, 0, -2));