3 namespace Caxy\HtmlDiff;
5 use Caxy\HtmlDiff\Table\TableDiff;
10 class HtmlDiff extends AbstractDiff
15 protected $wordIndices;
27 protected $newIsolatedDiffTags;
31 protected $oldIsolatedDiffTags;
34 * @param string $oldText
35 * @param string $newText
36 * @param HtmlDiffConfig|null $config
40 public static function create($oldText, $newText, HtmlDiffConfig $config = null)
42 $diff = new self($oldText, $newText);
44 if (null !== $config) {
45 $diff->setConfig($config);
56 * @deprecated since 0.1.0
58 public function setUseTableDiffing($bool)
60 $this->config->setUseTableDiffing($bool);
66 * @param bool $boolean
70 * @deprecated since 0.1.0
72 public function setInsertSpaceInReplace($boolean)
74 $this->config->setInsertSpaceInReplace($boolean);
82 * @deprecated since 0.1.0
84 public function getInsertSpaceInReplace()
86 return $this->config->isInsertSpaceInReplace();
92 public function build()
96 if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) {
97 $this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText);
99 return $this->content;
102 // Pre-processing Optimizations
105 if ($this->oldText == $this->newText) {
106 return $this->newText;
109 $this->splitInputsToWords();
110 $this->replaceIsolatedDiffTags();
111 $this->indexNewWords();
113 $operations = $this->operations();
115 foreach ($operations as $item) {
116 $this->performOperation($item);
119 if ($this->hasDiffCache()) {
120 $this->getDiffCache()->save($this->oldText, $this->newText, $this->content);
123 return $this->content;
126 protected function indexNewWords()
128 $this->wordIndices = array();
129 foreach ($this->newWords as $i => $word) {
130 if ($this->isTag($word)) {
131 $word = $this->stripTagAttributes($word);
133 if (isset($this->wordIndices[ $word ])) {
134 $this->wordIndices[ $word ][] = $i;
136 $this->wordIndices[ $word ] = array($i);
141 protected function replaceIsolatedDiffTags()
143 $this->oldIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->oldWords);
144 $this->newIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->newWords);
148 * @param array $words
152 protected function createIsolatedDiffTagPlaceholders(&$words)
154 $openIsolatedDiffTags = 0;
155 $isolatedDiffTagIndices = array();
156 $isolatedDiffTagStart = 0;
157 $currentIsolatedDiffTag = null;
158 foreach ($words as $index => $word) {
159 $openIsolatedDiffTag = $this->isOpeningIsolatedDiffTag($word, $currentIsolatedDiffTag);
160 if ($openIsolatedDiffTag) {
161 if ($this->isSelfClosingTag($word) || mb_stripos($word, '<img') !== false) {
162 if ($openIsolatedDiffTags === 0) {
163 $isolatedDiffTagIndices[] = array(
166 'tagType' => $openIsolatedDiffTag,
168 $currentIsolatedDiffTag = null;
171 if ($openIsolatedDiffTags === 0) {
172 $isolatedDiffTagStart = $index;
174 ++$openIsolatedDiffTags;
175 $currentIsolatedDiffTag = $openIsolatedDiffTag;
177 } elseif ($openIsolatedDiffTags > 0 && $this->isClosingIsolatedDiffTag($word, $currentIsolatedDiffTag)) {
178 --$openIsolatedDiffTags;
179 if ($openIsolatedDiffTags == 0) {
180 $isolatedDiffTagIndices[] = array('start' => $isolatedDiffTagStart, 'length' => $index - $isolatedDiffTagStart + 1, 'tagType' => $currentIsolatedDiffTag);
181 $currentIsolatedDiffTag = null;
185 $isolatedDiffTagScript = array();
187 foreach ($isolatedDiffTagIndices as $isolatedDiffTagIndex) {
188 $start = $isolatedDiffTagIndex['start'] - $offset;
189 $placeholderString = $this->config->getIsolatedDiffTagPlaceholder($isolatedDiffTagIndex['tagType']);
190 $isolatedDiffTagScript[$start] = array_splice($words, $start, $isolatedDiffTagIndex['length'], $placeholderString);
191 $offset += $isolatedDiffTagIndex['length'] - 1;
194 return $isolatedDiffTagScript;
198 * @param string $item
199 * @param null|string $currentIsolatedDiffTag
201 * @return false|string
203 protected function isOpeningIsolatedDiffTag($item, $currentIsolatedDiffTag = null)
205 $tagsToMatch = $currentIsolatedDiffTag !== null
206 ? array($currentIsolatedDiffTag => $this->config->getIsolatedDiffTagPlaceholder($currentIsolatedDiffTag))
207 : $this->config->getIsolatedDiffTags();
208 $pattern = '#<%s(\s+[^>]*)?>#iUu';
209 foreach ($tagsToMatch as $key => $value) {
210 if (preg_match(sprintf($pattern, $key), $item)) {
218 protected function isSelfClosingTag($text)
220 return (bool) preg_match('/<[^>]+\/\s*>/u', $text);
224 * @param string $item
225 * @param null|string $currentIsolatedDiffTag
227 * @return false|string
229 protected function isClosingIsolatedDiffTag($item, $currentIsolatedDiffTag = null)
231 $tagsToMatch = $currentIsolatedDiffTag !== null
232 ? array($currentIsolatedDiffTag => $this->config->getIsolatedDiffTagPlaceholder($currentIsolatedDiffTag))
233 : $this->config->getIsolatedDiffTags();
234 $pattern = '#</%s(\s+[^>]*)?>#iUu';
235 foreach ($tagsToMatch as $key => $value) {
236 if (preg_match(sprintf($pattern, $key), $item)) {
245 * @param Operation $operation
247 protected function performOperation($operation)
249 switch ($operation->action) {
251 $this->processEqualOperation($operation);
254 $this->processDeleteOperation($operation, 'diffdel');
257 $this->processInsertOperation($operation, 'diffins');
260 $this->processReplaceOperation($operation);
268 * @param Operation $operation
270 protected function processReplaceOperation($operation)
272 $this->processDeleteOperation($operation, 'diffmod');
273 $this->processInsertOperation($operation, 'diffmod');
277 * @param Operation $operation
278 * @param string $cssClass
280 protected function processInsertOperation($operation, $cssClass)
283 foreach ($this->newWords as $pos => $s) {
284 if ($pos >= $operation->startInNew && $pos < $operation->endInNew) {
285 if ($this->config->isIsolatedDiffTagPlaceholder($s) && isset($this->newIsolatedDiffTags[$pos])) {
286 foreach ($this->newIsolatedDiffTags[$pos] as $word) {
294 $this->insertTag('ins', $cssClass, $text);
298 * @param Operation $operation
299 * @param string $cssClass
301 protected function processDeleteOperation($operation, $cssClass)
304 foreach ($this->oldWords as $pos => $s) {
305 if ($pos >= $operation->startInOld && $pos < $operation->endInOld) {
306 if ($this->config->isIsolatedDiffTagPlaceholder($s) && isset($this->oldIsolatedDiffTags[$pos])) {
307 foreach ($this->oldIsolatedDiffTags[$pos] as $word) {
315 $this->insertTag('del', $cssClass, $text);
319 * @param Operation $operation
321 * @param string $placeholder
322 * @param bool $stripWrappingTags
326 protected function diffIsolatedPlaceholder($operation, $pos, $placeholder, $stripWrappingTags = true)
328 $oldText = implode('', $this->findIsolatedDiffTagsInOld($operation, $pos));
329 $newText = implode('', $this->newIsolatedDiffTags[$pos]);
331 if ($this->isListPlaceholder($placeholder)) {
332 return $this->diffList($oldText, $newText);
333 } elseif ($this->config->isUseTableDiffing() && $this->isTablePlaceholder($placeholder)) {
334 return $this->diffTables($oldText, $newText);
335 } elseif ($this->isLinkPlaceholder($placeholder)) {
336 return $this->diffElementsByAttribute($oldText, $newText, 'href', 'a');
337 } elseif ($this->isImagePlaceholder($placeholder)) {
338 return $this->diffElementsByAttribute($oldText, $newText, 'src', 'img');
341 return $this->diffElements($oldText, $newText, $stripWrappingTags);
345 * @param string $oldText
346 * @param string $newText
347 * @param bool $stripWrappingTags
351 protected function diffElements($oldText, $newText, $stripWrappingTags = true)
356 if ($stripWrappingTags) {
357 $pattern = '/(^<[^>]+>)|(<\/[^>]+>$)/iu';
360 if (preg_match_all($pattern, $newText, $matches)) {
361 $wrapStart = isset($matches[0][0]) ? $matches[0][0] : '';
362 $wrapEnd = isset($matches[0][1]) ? $matches[0][1] : '';
364 $oldText = preg_replace($pattern, '', $oldText);
365 $newText = preg_replace($pattern, '', $newText);
368 $diff = self::create($oldText, $newText, $this->config);
370 return $wrapStart.$diff->build().$wrapEnd;
374 * @param string $oldText
375 * @param string $newText
379 protected function diffList($oldText, $newText)
381 $diff = ListDiffLines::create($oldText, $newText, $this->config);
383 return $diff->build();
387 * @param string $oldText
388 * @param string $newText
392 protected function diffTables($oldText, $newText)
394 $diff = TableDiff::create($oldText, $newText, $this->config);
396 return $diff->build();
399 protected function diffElementsByAttribute($oldText, $newText, $attribute, $element)
401 $oldAttribute = $this->getAttributeFromTag($oldText, $attribute);
402 $newAttribute = $this->getAttributeFromTag($newText, $attribute);
404 if ($oldAttribute !== $newAttribute) {
405 $diffClass = sprintf('diffmod diff%s diff%s', $element, $attribute);
409 $this->wrapText($oldText, 'del', $diffClass),
410 $this->wrapText($newText, 'ins', $diffClass)
414 return $this->diffElements($oldText, $newText);
418 * @param Operation $operation
420 protected function processEqualOperation($operation)
423 foreach ($this->newWords as $pos => $s) {
424 if ($pos >= $operation->startInNew && $pos < $operation->endInNew) {
425 if ($this->config->isIsolatedDiffTagPlaceholder($s) && isset($this->newIsolatedDiffTags[$pos])) {
426 $result[] = $this->diffIsolatedPlaceholder($operation, $pos, $s);
432 $this->content .= implode('', $result);
436 * @param string $text
437 * @param string $attribute
439 * @return null|string
441 protected function getAttributeFromTag($text, $attribute)
444 if (preg_match(sprintf('/<[^>]*\b%s\s*=\s*([\'"])(.*)\1[^>]*>/iu', $attribute), $text, $matches)) {
445 return htmlspecialchars_decode($matches[2]);
452 * @param string $text
456 protected function isListPlaceholder($text)
458 return $this->isPlaceholderType($text, array('ol', 'dl', 'ul'));
462 * @param string $text
466 public function isLinkPlaceholder($text)
468 return $this->isPlaceholderType($text, 'a');
472 * @param string $text
476 public function isImagePlaceholder($text)
478 return $this->isPlaceholderType($text, 'img');
482 * @param string $text
483 * @param array|string $types
484 * @param bool $strict
488 protected function isPlaceholderType($text, $types, $strict = true)
490 if (!is_array($types)) {
491 $types = array($types);
495 foreach ($types as $type) {
496 if ($this->config->isIsolatedDiffTag($type)) {
497 $criteria[] = $this->config->getIsolatedDiffTagPlaceholder($type);
503 return in_array($text, $criteria, $strict);
507 * @param string $text
511 protected function isTablePlaceholder($text)
513 return $this->isPlaceholderType($text, 'table');
517 * @param Operation $operation
518 * @param int $posInNew
522 protected function findIsolatedDiffTagsInOld($operation, $posInNew)
524 $offset = $posInNew - $operation->startInNew;
526 return $this->oldIsolatedDiffTags[$operation->startInOld + $offset];
531 * @param string $cssClass
532 * @param array $words
534 protected function insertTag($tag, $cssClass, &$words)
537 if (count($words) == 0) {
541 $nonTags = $this->extractConsecutiveWords($words, 'noTag');
543 $specialCaseTagInjection = '';
544 $specialCaseTagInjectionIsBefore = false;
546 if (count($nonTags) != 0) {
547 $text = $this->wrapText(implode('', $nonTags), $tag, $cssClass);
548 $this->content .= $text;
550 $firstOrDefault = false;
551 foreach ($this->config->getSpecialCaseOpeningTags() as $x) {
552 if (preg_match($x, $words[ 0 ])) {
553 $firstOrDefault = $x;
557 if ($firstOrDefault) {
558 $specialCaseTagInjection = '<ins class="mod">';
562 } elseif (array_search($words[ 0 ], $this->config->getSpecialCaseClosingTags()) !== false) {
563 $specialCaseTagInjection = '</ins>';
564 $specialCaseTagInjectionIsBefore = true;
570 if (count($words) == 0 && mb_strlen($specialCaseTagInjection) == 0) {
573 if ($specialCaseTagInjectionIsBefore) {
574 $this->content .= $specialCaseTagInjection.implode('', $this->extractConsecutiveWords($words, 'tag'));
576 $workTag = $this->extractConsecutiveWords($words, 'tag');
577 if (isset($workTag[ 0 ]) && $this->isOpeningTag($workTag[ 0 ]) && !$this->isClosingTag($workTag[ 0 ])) {
578 if (mb_strpos($workTag[ 0 ], 'class=')) {
579 $workTag[ 0 ] = str_replace('class="', 'class="diffmod ', $workTag[ 0 ]);
580 $workTag[ 0 ] = str_replace("class='", 'class="diffmod ', $workTag[ 0 ]);
582 $workTag[ 0 ] = str_replace('>', ' class="diffmod">', $workTag[ 0 ]);
586 $appendContent = implode('', $workTag).$specialCaseTagInjection;
587 if (isset($workTag[0]) && false !== mb_stripos($workTag[0], '<img')) {
588 $appendContent = $this->wrapText($appendContent, $tag, $cssClass);
590 $this->content .= $appendContent;
596 * @param string $word
597 * @param string $condition
601 protected function checkCondition($word, $condition)
603 return $condition == 'tag' ? $this->isTag($word) : !$this->isTag($word);
607 * @param string $text
608 * @param string $tagName
609 * @param string $cssClass
613 protected function wrapText($text, $tagName, $cssClass)
615 return sprintf('<%1$s class="%2$s">%3$s</%1$s>', $tagName, $cssClass, $text);
619 * @param array $words
620 * @param string $condition
624 protected function extractConsecutiveWords(&$words, $condition)
626 $indexOfFirstTag = null;
627 $words = array_values($words);
628 foreach ($words as $i => $word) {
629 if (!$this->checkCondition($word, $condition)) {
630 $indexOfFirstTag = $i;
634 if ($indexOfFirstTag !== null) {
636 foreach ($words as $pos => $s) {
637 if ($pos >= 0 && $pos < $indexOfFirstTag) {
641 if ($indexOfFirstTag > 0) {
642 array_splice($words, 0, $indexOfFirstTag);
648 foreach ($words as $pos => $s) {
649 if ($pos >= 0 && $pos <= count($words)) {
653 array_splice($words, 0, count($words));
660 * @param string $item
664 protected function isTag($item)
666 return $this->isOpeningTag($item) || $this->isClosingTag($item);
670 * @param string $item
674 protected function isOpeningTag($item)
676 return preg_match('#<[^>]+>\\s*#iUu', $item);
680 * @param string $item
684 protected function isClosingTag($item)
686 return preg_match('#</[^>]+>\\s*#iUu', $item);
690 * @return Operation[]
692 protected function operations()
696 $operations = array();
698 $matches = $this->matchingBlocks();
699 $matches[] = new Match(count($this->oldWords), count($this->newWords), 0);
701 foreach ($matches as $i => $match) {
702 $matchStartsAtCurrentPositionInOld = ($positionInOld === $match->startInOld);
703 $matchStartsAtCurrentPositionInNew = ($positionInNew === $match->startInNew);
705 if ($matchStartsAtCurrentPositionInOld === false && $matchStartsAtCurrentPositionInNew === false) {
707 } elseif ($matchStartsAtCurrentPositionInOld === true && $matchStartsAtCurrentPositionInNew === false) {
709 } elseif ($matchStartsAtCurrentPositionInOld === false && $matchStartsAtCurrentPositionInNew === true) {
711 } else { // This occurs if the first few words are the same in both versions
715 if ($action !== 'none') {
716 $operations[] = new Operation($action, $positionInOld, $match->startInOld, $positionInNew, $match->startInNew);
719 if (count($match) !== 0) {
720 $operations[] = new Operation('equal', $match->startInOld, $match->endInOld(), $match->startInNew, $match->endInNew());
723 $positionInOld = $match->endInOld();
724 $positionInNew = $match->endInNew();
733 protected function matchingBlocks()
735 $matchingBlocks = array();
736 $this->findMatchingBlocks(0, count($this->oldWords), 0, count($this->newWords), $matchingBlocks);
738 return $matchingBlocks;
742 * @param int $startInOld
743 * @param int $endInOld
744 * @param int $startInNew
745 * @param int $endInNew
746 * @param array $matchingBlocks
748 protected function findMatchingBlocks($startInOld, $endInOld, $startInNew, $endInNew, &$matchingBlocks)
750 $match = $this->findMatch($startInOld, $endInOld, $startInNew, $endInNew);
752 if ($match !== null) {
753 if ($startInOld < $match->startInOld && $startInNew < $match->startInNew) {
754 $this->findMatchingBlocks($startInOld, $match->startInOld, $startInNew, $match->startInNew, $matchingBlocks);
757 $matchingBlocks[] = $match;
759 if ($match->endInOld() < $endInOld && $match->endInNew() < $endInNew) {
760 $this->findMatchingBlocks($match->endInOld(), $endInOld, $match->endInNew(), $endInNew, $matchingBlocks);
766 * @param string $word
770 protected function stripTagAttributes($word)
772 $space = mb_strpos($word, ' ', 1);
775 return '<' . mb_substr($word, 1, $space) . '>';
778 return trim($word, '<>');
782 * @param int $startInOld
783 * @param int $endInOld
784 * @param int $startInNew
785 * @param int $endInNew
789 protected function findMatch($startInOld, $endInOld, $startInNew, $endInNew)
791 $bestMatchInOld = $startInOld;
792 $bestMatchInNew = $startInNew;
794 $matchLengthAt = array();
796 for ($indexInOld = $startInOld; $indexInOld < $endInOld; ++$indexInOld) {
797 $newMatchLengthAt = array();
798 $index = $this->oldWords[ $indexInOld ];
799 if ($this->isTag($index)) {
800 $index = $this->stripTagAttributes($index);
802 if (!isset($this->wordIndices[ $index ])) {
803 $matchLengthAt = $newMatchLengthAt;
806 foreach ($this->wordIndices[ $index ] as $indexInNew) {
807 if ($indexInNew < $startInNew) {
810 if ($indexInNew >= $endInNew) {
814 $newMatchLength = (isset($matchLengthAt[ $indexInNew - 1 ]) ? $matchLengthAt[ $indexInNew - 1 ] : 0) + 1;
815 $newMatchLengthAt[ $indexInNew ] = $newMatchLength;
817 if ($newMatchLength > $bestMatchSize ||
819 $this->isGroupDiffs() &&
820 $bestMatchSize > 0 &&
821 $this->isOnlyWhitespace($this->array_slice_cached($this->oldWords, $bestMatchInOld, $bestMatchSize))
824 $bestMatchInOld = $indexInOld - $newMatchLength + 1;
825 $bestMatchInNew = $indexInNew - $newMatchLength + 1;
826 $bestMatchSize = $newMatchLength;
829 $matchLengthAt = $newMatchLengthAt;
832 // Skip match if none found or match consists only of whitespace
833 if ($bestMatchSize != 0 &&
835 !$this->isGroupDiffs() ||
836 !$this->isOnlyWhitespace($this->array_slice_cached($this->oldWords, $bestMatchInOld, $bestMatchSize))
839 return new Match($bestMatchInOld, $bestMatchInNew, $bestMatchSize);
850 protected function isOnlyWhitespace($str)
852 // Slightly faster then using preg_match
853 return $str !== '' && (mb_strlen(trim($str)) === 0);
857 * Special array_slice function that caches its last request.
859 * The diff algorithm seems to request the same information many times in a row.
860 * by returning the previous answer the algorithm preforms way faster.
862 * The result is a string instead of an array, this way we safe on the amount of
863 * memory intensive implode() calls.
865 * @param array &$array
866 * @param integer $offset
867 * @param integer|null $length
871 protected function array_slice_cached(&$array, $offset, $length = null)
873 static $lastOffset = null;
874 static $lastLength = null;
875 static $cache = null;
877 // PHP has no support for by-reference comparing.
878 // to prevent false positive hits, reset the cache when the oldWords or newWords is changed.
879 if ($this->resetCache === true) {
882 $this->resetCache = false;
887 $lastLength === $length &&
888 $lastOffset === $offset
893 $lastOffset = $offset;
894 $lastLength = $length;
896 $cache = implode('', array_slice($array, $offset, $length));