3 namespace Drupal\Tests\Component\Utility;
5 use Drupal\Component\Utility\Unicode;
6 use PHPUnit\Framework\TestCase;
9 * Test unicode handling features implemented in Unicode component.
13 * @coversDefaultClass \Drupal\Component\Utility\Unicode
15 class UnicodeTest extends TestCase {
22 protected function setUp() {
23 // Initialize unicode component.
28 * Getting and settings the multibyte environment status.
30 * @dataProvider providerTestStatus
34 public function testStatus($value, $expected, $invalid = FALSE) {
36 if (method_exists($this, 'expectException')) {
37 $this->expectException('InvalidArgumentException');
40 $this->setExpectedException('InvalidArgumentException');
43 Unicode::setStatus($value);
44 $this->assertEquals($expected, Unicode::getStatus());
48 * Data provider for testStatus().
53 * An array containing:
54 * - The status value to set.
55 * - The status value to expect after setting the new value.
56 * - (optional) Boolean indicating invalid status. Defaults to FALSE.
58 public function providerTestStatus() {
60 [Unicode::STATUS_SINGLEBYTE, Unicode::STATUS_SINGLEBYTE],
61 [rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE],
62 [rand(10, 100), Unicode::STATUS_SINGLEBYTE, TRUE],
63 [Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE],
64 [rand(10, 100), Unicode::STATUS_MULTIBYTE, TRUE],
65 [Unicode::STATUS_ERROR, Unicode::STATUS_ERROR],
66 [Unicode::STATUS_MULTIBYTE, Unicode::STATUS_MULTIBYTE],
71 * Tests multibyte encoding and decoding.
73 * @dataProvider providerTestMimeHeader
74 * @covers ::mimeHeaderEncode
75 * @covers ::mimeHeaderDecode
77 public function testMimeHeader($value, $encoded) {
78 $this->assertEquals($encoded, Unicode::mimeHeaderEncode($value));
79 $this->assertEquals($value, Unicode::mimeHeaderDecode($encoded));
83 * Data provider for testMimeHeader().
85 * @see testMimeHeader()
88 * An array containing a string and its encoded value.
90 public function providerTestMimeHeader() {
92 ['tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='],
93 // Simple ASCII characters.
99 * Tests multibyte strtolower.
101 * @dataProvider providerStrtolower
102 * @covers ::strtolower
105 public function testStrtolower($text, $expected, $multibyte = FALSE) {
106 $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
107 Unicode::setStatus($status);
108 $this->assertEquals($expected, Unicode::strtolower($text));
112 * Data provider for testStrtolower().
114 * @see testStrtolower()
117 * An array containing a string, its lowercase version and whether it should
118 * be processed as multibyte.
120 public function providerStrtolower() {
122 ['tHe QUIcK bRoWn', 'the quick brown'],
123 ['FrançAIS is ÜBER-åwesome', 'français is über-åwesome'],
125 foreach ($cases as $case) {
126 // Test the same string both in multibyte and singlebyte conditions.
127 array_push($case, TRUE);
130 // Add a multibyte string.
131 $cases[] = ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αβγδεζηθικλμνξοσὠ', TRUE];
136 * Tests multibyte strtoupper.
138 * @dataProvider providerStrtoupper
139 * @covers ::strtoupper
142 public function testStrtoupper($text, $expected, $multibyte = FALSE) {
143 $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
144 Unicode::setStatus($status);
145 $this->assertEquals($expected, Unicode::strtoupper($text));
149 * Data provider for testStrtoupper().
151 * @see testStrtoupper()
154 * An array containing a string, its uppercase version and whether it should
155 * be processed as multibyte.
157 public function providerStrtoupper() {
159 ['tHe QUIcK bRoWn', 'THE QUICK BROWN'],
160 ['FrançAIS is ÜBER-åwesome', 'FRANÇAIS IS ÜBER-ÅWESOME'],
162 foreach ($cases as $case) {
163 // Test the same string both in multibyte and singlebyte conditions.
164 array_push($case, TRUE);
167 // Add a multibyte string.
168 $cases[] = ['αβγδεζηθικλμνξοσὠ', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE];
173 * Tests multibyte ucfirst.
175 * @dataProvider providerUcfirst
178 public function testUcfirst($text, $expected) {
179 $this->assertEquals($expected, Unicode::ucfirst($text));
183 * Data provider for testUcfirst().
188 * An array containing a string and its uppercase first version.
190 public function providerUcfirst() {
192 ['tHe QUIcK bRoWn', 'THe QUIcK bRoWn'],
193 ['françAIS', 'FrançAIS'],
195 ['åwesome', 'Åwesome'],
196 // A multibyte string.
202 * Tests multibyte lcfirst.
204 * @dataProvider providerLcfirst
207 public function testLcfirst($text, $expected, $multibyte = FALSE) {
208 $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
209 Unicode::setStatus($status);
210 $this->assertEquals($expected, Unicode::lcfirst($text));
214 * Data provider for testLcfirst().
219 * An array containing a string, its lowercase version and whether it should
220 * be processed as multibyte.
222 public function providerLcfirst() {
224 ['tHe QUIcK bRoWn', 'tHe QUIcK bRoWn'],
225 ['FrançAIS is ÜBER-åwesome', 'françAIS is ÜBER-åwesome'],
227 ['Åwesome', 'åwesome'],
228 // Add a multibyte string.
229 ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', TRUE],
234 * Tests multibyte ucwords.
236 * @dataProvider providerUcwords
239 public function testUcwords($text, $expected, $multibyte = FALSE) {
240 $status = $multibyte ? Unicode::STATUS_MULTIBYTE : Unicode::STATUS_SINGLEBYTE;
241 Unicode::setStatus($status);
242 $this->assertEquals($expected, Unicode::ucwords($text));
246 * Data provider for testUcwords().
251 * An array containing a string, its capitalized version and whether it should
252 * be processed as multibyte.
254 public function providerUcwords() {
256 ['tHe QUIcK bRoWn', 'THe QUIcK BRoWn'],
257 ['françAIS', 'FrançAIS'],
259 ['åwesome', 'Åwesome'],
260 // Make sure we don't mangle extra spaces.
261 ['frànçAIS is über-åwesome', 'FrànçAIS Is Über-Åwesome'],
262 // Add a multibyte string.
263 ['σion', 'Σion', TRUE],
268 * Tests multibyte strlen.
270 * @dataProvider providerStrlen
273 public function testStrlen($text, $expected) {
274 // Run through multibyte code path.
275 Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
276 $this->assertEquals($expected, Unicode::strlen($text));
277 // Run through singlebyte code path.
278 Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
279 $this->assertEquals($expected, Unicode::strlen($text));
283 * Data provider for testStrlen().
288 * An array containing a string and its length.
290 public function providerStrlen() {
292 ['tHe QUIcK bRoWn', 15],
293 ['ÜBER-åwesome', 12],
294 ['以呂波耳・ほへとち。リヌルヲ。', 15],
299 * Tests multibyte substr.
301 * @dataProvider providerSubstr
304 public function testSubstr($text, $start, $length, $expected) {
305 // Run through multibyte code path.
306 Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
307 $this->assertEquals($expected, Unicode::substr($text, $start, $length));
308 // Run through singlebyte code path.
309 Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
310 $this->assertEquals($expected, Unicode::substr($text, $start, $length));
314 * Data provider for testSubstr().
319 * An array containing:
320 * - The string to test.
321 * - The start number to be processed by substr.
322 * - The length number to be processed by substr.
323 * - The expected string result.
325 public function providerSubstr() {
327 ['frànçAIS is über-åwesome', 0, NULL, 'frànçAIS is über-åwesome'],
328 ['frànçAIS is über-åwesome', 0, 0, ''],
329 ['frànçAIS is über-åwesome', 0, 1, 'f'],
330 ['frànçAIS is über-åwesome', 0, 8, 'frànçAIS'],
331 ['frànçAIS is über-åwesome', 0, 23, 'frànçAIS is über-åwesom'],
332 ['frànçAIS is über-åwesome', 0, 24, 'frànçAIS is über-åwesome'],
333 ['frànçAIS is über-åwesome', 0, 25, 'frànçAIS is über-åwesome'],
334 ['frànçAIS is über-åwesome', 0, 100, 'frànçAIS is über-åwesome'],
335 ['frànçAIS is über-åwesome', 4, 4, 'çAIS'],
336 ['frànçAIS is über-åwesome', 1, 0, ''],
337 ['frànçAIS is über-åwesome', 100, 0, ''],
338 ['frànçAIS is über-åwesome', -4, 2, 'so'],
339 ['frànçAIS is über-åwesome', -4, 3, 'som'],
340 ['frànçAIS is über-åwesome', -4, 4, 'some'],
341 ['frànçAIS is über-åwesome', -4, 5, 'some'],
342 ['frànçAIS is über-åwesome', -7, 10, 'åwesome'],
343 ['frànçAIS is über-åwesome', 5, -10, 'AIS is üb'],
344 ['frànçAIS is über-åwesome', 0, -10, 'frànçAIS is üb'],
345 ['frànçAIS is über-åwesome', 0, -1, 'frànçAIS is über-åwesom'],
346 ['frànçAIS is über-åwesome', -7, -2, 'åweso'],
347 ['frànçAIS is über-åwesome', -7, -6, 'å'],
348 ['frànçAIS is über-åwesome', -7, -7, ''],
349 ['frànçAIS is über-åwesome', -7, -8, ''],
351 ['以呂波耳・ほへとち。リヌルヲ。', 1, 3, '呂波耳'],
356 * Tests multibyte truncate.
358 * @dataProvider providerTruncate
361 public function testTruncate($text, $max_length, $expected, $wordsafe = FALSE, $add_ellipsis = FALSE) {
362 $this->assertEquals($expected, Unicode::truncate($text, $max_length, $wordsafe, $add_ellipsis));
366 * Data provider for testTruncate().
368 * @see testTruncate()
371 * An array containing:
372 * - The string to test.
373 * - The max length to truncate this string to.
374 * - The expected string result.
375 * - (optional) Boolean for the $wordsafe flag. Defaults to FALSE.
376 * - (optional) Boolean for the $add_ellipsis flag. Defaults to FALSE.
378 public function providerTruncate() {
380 ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'],
381 ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwesom'],
382 ['frànçAIS is über-åwesome', 17, 'frànçAIS is über-'],
383 ['以呂波耳・ほへとち。リヌルヲ。', 6, '以呂波耳・ほ'],
384 ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', FALSE, TRUE],
385 ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åweso…', FALSE, TRUE],
386 ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', FALSE, TRUE],
387 ['123', 1, '…', TRUE, TRUE],
388 ['123', 2, '1…', TRUE, TRUE],
389 ['123', 3, '123', TRUE, TRUE],
390 ['1234', 3, '12…', TRUE, TRUE],
391 ['1234567890', 10, '1234567890', TRUE, TRUE],
392 ['12345678901', 10, '123456789…', TRUE, TRUE],
393 ['12345678901', 11, '12345678901', TRUE, TRUE],
394 ['123456789012', 11, '1234567890…', TRUE, TRUE],
395 ['12345 7890', 10, '12345 7890', TRUE, TRUE],
396 ['12345 7890', 9, '12345…', TRUE, TRUE],
397 ['123 567 90', 10, '123 567 90', TRUE, TRUE],
398 ['123 567 901', 10, '123 567…', TRUE, TRUE],
399 ['Stop. Hammertime.', 17, 'Stop. Hammertime.', TRUE, TRUE],
400 ['Stop. Hammertime.', 16, 'Stop…', TRUE, TRUE],
401 ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', TRUE, TRUE],
402 ['frànçAIS is über-åwesome', 23, 'frànçAIS is über…', TRUE, TRUE],
403 ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', TRUE, TRUE],
404 ['¿Dónde está el niño?', 20, '¿Dónde está el niño?', TRUE, TRUE],
405 ['¿Dónde está el niño?', 19, '¿Dónde está el…', TRUE, TRUE],
406 ['¿Dónde está el niño?', 13, '¿Dónde está…', TRUE, TRUE],
407 ['¿Dónde está el niño?', 10, '¿Dónde…', TRUE, TRUE],
408 ['Help! Help! Help!', 17, 'Help! Help! Help!', TRUE, TRUE],
409 ['Help! Help! Help!', 16, 'Help! Help!…', TRUE, TRUE],
410 ['Help! Help! Help!', 15, 'Help! Help!…', TRUE, TRUE],
411 ['Help! Help! Help!', 14, 'Help! Help!…', TRUE, TRUE],
412 ['Help! Help! Help!', 13, 'Help! Help!…', TRUE, TRUE],
413 ['Help! Help! Help!', 12, 'Help! Help!…', TRUE, TRUE],
414 ['Help! Help! Help!', 11, 'Help! Help…', TRUE, TRUE],
415 ['Help! Help! Help!', 10, 'Help!…', TRUE, TRUE],
416 ['Help! Help! Help!', 9, 'Help!…', TRUE, TRUE],
417 ['Help! Help! Help!', 8, 'Help!…', TRUE, TRUE],
418 ['Help! Help! Help!', 7, 'Help!…', TRUE, TRUE],
419 ['Help! Help! Help!', 6, 'Help!…', TRUE, TRUE],
420 ['Help! Help! Help!', 5, 'Help…', TRUE, TRUE],
421 ['Help! Help! Help!', 4, 'Hel…', TRUE, TRUE],
422 ['Help! Help! Help!', 3, 'He…', TRUE, TRUE],
423 ['Help! Help! Help!', 2, 'H…', TRUE, TRUE],
426 // Test truncate on text with multiple lines.
428 This is a text that spans multiple lines.
431 $multi_line_wordsafe = <<<EOF
432 This is a text that spans multiple lines.
435 $multi_line_non_wordsafe = <<<EOF
436 This is a text that spans multiple lines.
439 $tests[] = [$multi_line, 51, $multi_line_wordsafe, TRUE];
440 $tests[] = [$multi_line, 51, $multi_line_non_wordsafe, FALSE];
446 * Tests multibyte truncate bytes.
448 * @dataProvider providerTestTruncateBytes
449 * @covers ::truncateBytes
451 * @param string $text
452 * The string to truncate.
453 * @param int $max_length
454 * The upper limit on the returned string length.
455 * @param string $expected
456 * The expected return from Unicode::truncateBytes().
458 public function testTruncateBytes($text, $max_length, $expected) {
459 $this->assertEquals($expected, Unicode::truncateBytes($text, $max_length), 'The string was not correctly truncated.');
463 * Provides data for self::testTruncateBytes().
466 * An array of arrays, each containing the parameters to
467 * self::testTruncateBytes().
469 public function providerTestTruncateBytes() {
471 // String shorter than max length.
472 ['Short string', 42, 'Short string'],
473 // Simple string longer than max length.
474 ['Longer string than previous.', 10, 'Longer str'],
476 ['以呂波耳・ほへとち。リヌルヲ。', 10, '以呂波'],
481 * Tests UTF-8 validation.
483 * @dataProvider providerTestValidateUtf8
484 * @covers ::validateUtf8
486 * @param string $text
487 * The text to validate.
488 * @param bool $expected
489 * The expected return value from Unicode::validateUtf8().
490 * @param string $message
491 * The message to display on failure.
493 public function testValidateUtf8($text, $expected, $message) {
494 $this->assertEquals($expected, Unicode::validateUtf8($text), $message);
498 * Provides data for self::testValidateUtf8().
500 * Invalid UTF-8 examples sourced from http://stackoverflow.com/a/11709412/109119.
503 * An array of arrays, each containing the parameters for
504 * self::testValidateUtf8().
506 public function providerTestValidateUtf8() {
509 ['', TRUE, 'An empty string did not validate.'],
510 // Simple text string.
511 ['Simple text.', TRUE, 'A simple ASCII text string did not validate.'],
512 // Invalid UTF-8, overlong 5 byte encoding.
513 [chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), FALSE, 'Invalid UTF-8 was validated.'],
514 // High code-point without trailing characters.
515 [chr(0xD0) . chr(0x01), FALSE, 'Invalid UTF-8 was validated.'],
520 * Tests UTF-8 conversion.
522 * @dataProvider providerTestConvertToUtf8
523 * @covers ::convertToUtf8
525 * @param string $data
526 * The data to be converted.
527 * @param string $encoding
528 * The encoding the data is in.
529 * @param string|bool $expected
530 * The expected result.
532 public function testConvertToUtf8($data, $encoding, $expected) {
533 $this->assertEquals($expected, Unicode::convertToUtf8($data, $encoding));
537 * Provides data to self::testConvertToUtf8().
540 * An array of arrays, each containing the parameters to
541 * self::testConvertUtf8(). }
543 public function providerTestConvertToUtf8() {
545 [chr(0x97), 'Windows-1252', '—'],
546 [chr(0x99), 'Windows-1252', '™'],
547 [chr(0x80), 'Windows-1252', '€'],
552 * Tests multibyte strpos.
554 * @dataProvider providerStrpos
557 public function testStrpos($haystack, $needle, $offset, $expected) {
558 // Run through multibyte code path.
559 Unicode::setStatus(Unicode::STATUS_MULTIBYTE);
560 $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
561 // Run through singlebyte code path.
562 Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
563 $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
567 * Data provider for testStrpos().
572 * An array containing:
573 * - The haystack string to be searched in.
574 * - The needle string to search for.
575 * - The offset integer to start at.
576 * - The expected integer/FALSE result.
578 public function providerStrpos() {
580 ['frànçAIS is über-åwesome', 'frànçAIS is über-åwesome', 0, 0],
581 ['frànçAIS is über-åwesome', 'rànçAIS is über-åwesome', 0, 1],
582 ['frànçAIS is über-åwesome', 'not in string', 0, FALSE],
583 ['frànçAIS is über-åwesome', 'r', 0, 1],
584 ['frànçAIS is über-åwesome', 'nçAIS', 0, 3],
585 ['frànçAIS is über-åwesome', 'nçAIS', 2, 3],
586 ['frànçAIS is über-åwesome', 'nçAIS', 3, 3],
587 ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 0, 2],
588 ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 1, 2],
589 ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 2, 2],