3 namespace Drupal\Tests\Component\Utility;
5 use Drupal\Component\Utility\Unicode;
6 use PHPUnit\Framework\TestCase;
9 * Test unicode handling features implemented in Unicode component.
13 * @coversDefaultClass \Drupal\Component\Utility\Unicode
15 class UnicodeTest extends TestCase {
19 * @expectedDeprecation \Drupal\Component\Utility\Unicode::setStatus() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. In Drupal 9 there will be no way to set the status and in Drupal 8 this ability has been removed because mb_*() functions are supplied using Symfony's polyfill. See https://www.drupal.org/node/2850048.
21 public function testSetStatus() {
22 Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
26 * Tests multibyte encoding and decoding.
28 * @dataProvider providerTestMimeHeader
29 * @covers ::mimeHeaderEncode
30 * @covers ::mimeHeaderDecode
32 public function testMimeHeader($value, $encoded) {
33 $this->assertEquals($encoded, Unicode::mimeHeaderEncode($value));
34 $this->assertEquals($value, Unicode::mimeHeaderDecode($encoded));
38 * Data provider for testMimeHeader().
40 * @see testMimeHeader()
43 * An array containing a string and its encoded value.
45 public function providerTestMimeHeader() {
47 ['tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='],
48 // Simple ASCII characters.
54 * Tests multibyte strtolower.
56 * @dataProvider providerStrtolower
57 * @covers ::strtolower
60 * @expectedDeprecation \Drupal\Component\Utility\Unicode::strtolower() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strtolower() instead. See https://www.drupal.org/node/2850048.
62 public function testStrtolower($text, $expected) {
63 $this->assertEquals($expected, Unicode::strtolower($text));
67 * Data provider for testStrtolower().
69 * @see testStrtolower()
72 * An array containing a string and its lowercase version.
74 public function providerStrtolower() {
76 ['tHe QUIcK bRoWn', 'the quick brown'],
77 ['FrançAIS is ÜBER-åwesome', 'français is über-åwesome'],
78 ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αβγδεζηθικλμνξοσὠ'],
83 * Tests multibyte strtoupper.
85 * @dataProvider providerStrtoupper
86 * @covers ::strtoupper
89 * @expectedDeprecation \Drupal\Component\Utility\Unicode::strtoupper() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strtoupper() instead. See https://www.drupal.org/node/2850048.
91 public function testStrtoupper($text, $expected) {
92 $this->assertEquals($expected, Unicode::strtoupper($text));
96 * Data provider for testStrtoupper().
98 * @see testStrtoupper()
101 * An array containing a string and its uppercase version.
103 public function providerStrtoupper() {
105 ['tHe QUIcK bRoWn', 'THE QUICK BROWN'],
106 ['FrançAIS is ÜBER-åwesome', 'FRANÇAIS IS ÜBER-ÅWESOME'],
107 ['αβγδεζηθικλμνξοσὠ', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'],
112 * Tests multibyte ucfirst.
114 * @dataProvider providerUcfirst
117 public function testUcfirst($text, $expected) {
118 $this->assertEquals($expected, Unicode::ucfirst($text));
122 * Data provider for testUcfirst().
127 * An array containing a string and its uppercase first version.
129 public function providerUcfirst() {
131 ['tHe QUIcK bRoWn', 'THe QUIcK bRoWn'],
132 ['françAIS', 'FrançAIS'],
134 ['åwesome', 'Åwesome'],
135 // A multibyte string.
141 * Tests multibyte lcfirst.
143 * @dataProvider providerLcfirst
146 public function testLcfirst($text, $expected) {
147 $this->assertEquals($expected, Unicode::lcfirst($text));
151 * Data provider for testLcfirst().
156 * An array containing a string and its lowercase version.
158 public function providerLcfirst() {
160 ['tHe QUIcK bRoWn', 'tHe QUIcK bRoWn'],
161 ['FrançAIS is ÜBER-åwesome', 'françAIS is ÜBER-åwesome'],
163 ['Åwesome', 'åwesome'],
164 // Add a multibyte string.
165 ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'],
170 * Tests multibyte ucwords.
172 * @dataProvider providerUcwords
175 public function testUcwords($text, $expected) {
176 $this->assertEquals($expected, Unicode::ucwords($text));
180 * Data provider for testUcwords().
185 * An array containing a string and its capitalized version.
187 public function providerUcwords() {
189 ['tHe QUIcK bRoWn', 'THe QUIcK BRoWn'],
190 ['françAIS', 'FrançAIS'],
192 ['åwesome', 'Åwesome'],
193 // Make sure we don't mangle extra spaces.
194 ['frànçAIS is über-åwesome', 'FrànçAIS Is Über-Åwesome'],
195 // Add a multibyte string.
201 * Tests multibyte strlen.
203 * @dataProvider providerStrlen
206 * @expectedDeprecation \Drupal\Component\Utility\Unicode::strlen() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strlen() instead. See https://www.drupal.org/node/2850048.
208 public function testStrlen($text, $expected) {
209 $this->assertEquals($expected, Unicode::strlen($text));
213 * Data provider for testStrlen().
218 * An array containing a string and its length.
220 public function providerStrlen() {
222 ['tHe QUIcK bRoWn', 15],
223 ['ÜBER-åwesome', 12],
224 ['以呂波耳・ほへとち。リヌルヲ。', 15],
229 * Tests multibyte substr.
231 * @dataProvider providerSubstr
234 * @expectedDeprecation \Drupal\Component\Utility\Unicode::substr() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_substr() instead. See https://www.drupal.org/node/2850048.
236 public function testSubstr($text, $start, $length, $expected) {
237 $this->assertEquals($expected, Unicode::substr($text, $start, $length));
241 * Data provider for testSubstr().
246 * An array containing:
247 * - The string to test.
248 * - The start number to be processed by substr.
249 * - The length number to be processed by substr.
250 * - The expected string result.
252 public function providerSubstr() {
254 ['frànçAIS is über-åwesome', 0, NULL, 'frànçAIS is über-åwesome'],
255 ['frànçAIS is über-åwesome', 0, 0, ''],
256 ['frànçAIS is über-åwesome', 0, 1, 'f'],
257 ['frànçAIS is über-åwesome', 0, 8, 'frànçAIS'],
258 ['frànçAIS is über-åwesome', 0, 23, 'frànçAIS is über-åwesom'],
259 ['frànçAIS is über-åwesome', 0, 24, 'frànçAIS is über-åwesome'],
260 ['frànçAIS is über-åwesome', 0, 25, 'frànçAIS is über-åwesome'],
261 ['frànçAIS is über-åwesome', 0, 100, 'frànçAIS is über-åwesome'],
262 ['frànçAIS is über-åwesome', 4, 4, 'çAIS'],
263 ['frànçAIS is über-åwesome', 1, 0, ''],
264 ['frànçAIS is über-åwesome', 100, 0, ''],
265 ['frànçAIS is über-åwesome', -4, 2, 'so'],
266 ['frànçAIS is über-åwesome', -4, 3, 'som'],
267 ['frànçAIS is über-åwesome', -4, 4, 'some'],
268 ['frànçAIS is über-åwesome', -4, 5, 'some'],
269 ['frànçAIS is über-åwesome', -7, 10, 'åwesome'],
270 ['frànçAIS is über-åwesome', 5, -10, 'AIS is üb'],
271 ['frànçAIS is über-åwesome', 0, -10, 'frànçAIS is üb'],
272 ['frànçAIS is über-åwesome', 0, -1, 'frànçAIS is über-åwesom'],
273 ['frànçAIS is über-åwesome', -7, -2, 'åweso'],
274 ['frànçAIS is über-åwesome', -7, -6, 'å'],
275 ['frànçAIS is über-åwesome', -7, -7, ''],
276 ['frànçAIS is über-åwesome', -7, -8, ''],
278 ['以呂波耳・ほへとち。リヌルヲ。', 1, 3, '呂波耳'],
283 * Tests multibyte truncate.
285 * @dataProvider providerTruncate
288 public function testTruncate($text, $max_length, $expected, $wordsafe = FALSE, $add_ellipsis = FALSE) {
289 $this->assertEquals($expected, Unicode::truncate($text, $max_length, $wordsafe, $add_ellipsis));
293 * Data provider for testTruncate().
295 * @see testTruncate()
298 * An array containing:
299 * - The string to test.
300 * - The max length to truncate this string to.
301 * - The expected string result.
302 * - (optional) Boolean for the $wordsafe flag. Defaults to FALSE.
303 * - (optional) Boolean for the $add_ellipsis flag. Defaults to FALSE.
305 public function providerTruncate() {
307 ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'],
308 ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwesom'],
309 ['frànçAIS is über-åwesome', 17, 'frànçAIS is über-'],
310 ['以呂波耳・ほへとち。リヌルヲ。', 6, '以呂波耳・ほ'],
311 ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', FALSE, TRUE],
312 ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åweso…', FALSE, TRUE],
313 ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', FALSE, TRUE],
314 ['123', 1, '…', TRUE, TRUE],
315 ['123', 2, '1…', TRUE, TRUE],
316 ['123', 3, '123', TRUE, TRUE],
317 ['1234', 3, '12…', TRUE, TRUE],
318 ['1234567890', 10, '1234567890', TRUE, TRUE],
319 ['12345678901', 10, '123456789…', TRUE, TRUE],
320 ['12345678901', 11, '12345678901', TRUE, TRUE],
321 ['123456789012', 11, '1234567890…', TRUE, TRUE],
322 ['12345 7890', 10, '12345 7890', TRUE, TRUE],
323 ['12345 7890', 9, '12345…', TRUE, TRUE],
324 ['123 567 90', 10, '123 567 90', TRUE, TRUE],
325 ['123 567 901', 10, '123 567…', TRUE, TRUE],
326 ['Stop. Hammertime.', 17, 'Stop. Hammertime.', TRUE, TRUE],
327 ['Stop. Hammertime.', 16, 'Stop…', TRUE, TRUE],
328 ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', TRUE, TRUE],
329 ['frànçAIS is über-åwesome', 23, 'frànçAIS is über…', TRUE, TRUE],
330 ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', TRUE, TRUE],
331 ['¿Dónde está el niño?', 20, '¿Dónde está el niño?', TRUE, TRUE],
332 ['¿Dónde está el niño?', 19, '¿Dónde está el…', TRUE, TRUE],
333 ['¿Dónde está el niño?', 13, '¿Dónde está…', TRUE, TRUE],
334 ['¿Dónde está el niño?', 10, '¿Dónde…', TRUE, TRUE],
335 ['Help! Help! Help!', 17, 'Help! Help! Help!', TRUE, TRUE],
336 ['Help! Help! Help!', 16, 'Help! Help!…', TRUE, TRUE],
337 ['Help! Help! Help!', 15, 'Help! Help!…', TRUE, TRUE],
338 ['Help! Help! Help!', 14, 'Help! Help!…', TRUE, TRUE],
339 ['Help! Help! Help!', 13, 'Help! Help!…', TRUE, TRUE],
340 ['Help! Help! Help!', 12, 'Help! Help!…', TRUE, TRUE],
341 ['Help! Help! Help!', 11, 'Help! Help…', TRUE, TRUE],
342 ['Help! Help! Help!', 10, 'Help!…', TRUE, TRUE],
343 ['Help! Help! Help!', 9, 'Help!…', TRUE, TRUE],
344 ['Help! Help! Help!', 8, 'Help!…', TRUE, TRUE],
345 ['Help! Help! Help!', 7, 'Help!…', TRUE, TRUE],
346 ['Help! Help! Help!', 6, 'Help!…', TRUE, TRUE],
347 ['Help! Help! Help!', 5, 'Help…', TRUE, TRUE],
348 ['Help! Help! Help!', 4, 'Hel…', TRUE, TRUE],
349 ['Help! Help! Help!', 3, 'He…', TRUE, TRUE],
350 ['Help! Help! Help!', 2, 'H…', TRUE, TRUE],
353 // Test truncate on text with multiple lines.
355 This is a text that spans multiple lines.
358 $multi_line_wordsafe = <<<EOF
359 This is a text that spans multiple lines.
362 $multi_line_non_wordsafe = <<<EOF
363 This is a text that spans multiple lines.
366 $tests[] = [$multi_line, 51, $multi_line_wordsafe, TRUE];
367 $tests[] = [$multi_line, 51, $multi_line_non_wordsafe, FALSE];
373 * Tests multibyte truncate bytes.
375 * @dataProvider providerTestTruncateBytes
376 * @covers ::truncateBytes
378 * @param string $text
379 * The string to truncate.
380 * @param int $max_length
381 * The upper limit on the returned string length.
382 * @param string $expected
383 * The expected return from Unicode::truncateBytes().
385 public function testTruncateBytes($text, $max_length, $expected) {
386 $this->assertEquals($expected, Unicode::truncateBytes($text, $max_length), 'The string was not correctly truncated.');
390 * Provides data for self::testTruncateBytes().
393 * An array of arrays, each containing the parameters to
394 * self::testTruncateBytes().
396 public function providerTestTruncateBytes() {
398 // String shorter than max length.
399 ['Short string', 42, 'Short string'],
400 // Simple string longer than max length.
401 ['Longer string than previous.', 10, 'Longer str'],
403 ['以呂波耳・ほへとち。リヌルヲ。', 10, '以呂波'],
408 * Tests UTF-8 validation.
410 * @dataProvider providerTestValidateUtf8
411 * @covers ::validateUtf8
413 * @param string $text
414 * The text to validate.
415 * @param bool $expected
416 * The expected return value from Unicode::validateUtf8().
417 * @param string $message
418 * The message to display on failure.
420 public function testValidateUtf8($text, $expected, $message) {
421 $this->assertEquals($expected, Unicode::validateUtf8($text), $message);
425 * Provides data for self::testValidateUtf8().
427 * Invalid UTF-8 examples sourced from http://stackoverflow.com/a/11709412/109119.
430 * An array of arrays, each containing the parameters for
431 * self::testValidateUtf8().
433 public function providerTestValidateUtf8() {
436 ['', TRUE, 'An empty string did not validate.'],
437 // Simple text string.
438 ['Simple text.', TRUE, 'A simple ASCII text string did not validate.'],
439 // Invalid UTF-8, overlong 5 byte encoding.
440 [chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), FALSE, 'Invalid UTF-8 was validated.'],
441 // High code-point without trailing characters.
442 [chr(0xD0) . chr(0x01), FALSE, 'Invalid UTF-8 was validated.'],
447 * Tests UTF-8 conversion.
449 * @dataProvider providerTestConvertToUtf8
450 * @covers ::convertToUtf8
452 * @param string $data
453 * The data to be converted.
454 * @param string $encoding
455 * The encoding the data is in.
456 * @param string|bool $expected
457 * The expected result.
459 public function testConvertToUtf8($data, $encoding, $expected) {
460 $this->assertEquals($expected, Unicode::convertToUtf8($data, $encoding));
464 * Provides data to self::testConvertToUtf8().
467 * An array of arrays, each containing the parameters to
468 * self::testConvertUtf8(). }
470 public function providerTestConvertToUtf8() {
472 [chr(0x97), 'Windows-1252', '—'],
473 [chr(0x99), 'Windows-1252', '™'],
474 [chr(0x80), 'Windows-1252', '€'],
479 * Tests multibyte strpos.
481 * @dataProvider providerStrpos
484 * @expectedDeprecation \Drupal\Component\Utility\Unicode::strpos() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strpos() instead. See https://www.drupal.org/node/2850048.
486 public function testStrpos($haystack, $needle, $offset, $expected) {
487 $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
491 * Data provider for testStrpos().
496 * An array containing:
497 * - The haystack string to be searched in.
498 * - The needle string to search for.
499 * - The offset integer to start at.
500 * - The expected integer/FALSE result.
502 public function providerStrpos() {
504 ['frànçAIS is über-åwesome', 'frànçAIS is über-åwesome', 0, 0],
505 ['frànçAIS is über-åwesome', 'rànçAIS is über-åwesome', 0, 1],
506 ['frànçAIS is über-åwesome', 'not in string', 0, FALSE],
507 ['frànçAIS is über-åwesome', 'r', 0, 1],
508 ['frànçAIS is über-åwesome', 'nçAIS', 0, 3],
509 ['frànçAIS is über-åwesome', 'nçAIS', 2, 3],
510 ['frànçAIS is über-åwesome', 'nçAIS', 3, 3],
511 ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 0, 2],
512 ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 1, 2],
513 ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 2, 2],