setConfig($config); } return $diff; } /** * @param $bool * * @return $this * * @deprecated since 0.1.0 */ public function setUseTableDiffing($bool) { $this->config->setUseTableDiffing($bool); return $this; } /** * @param bool $boolean * * @return HtmlDiff * * @deprecated since 0.1.0 */ public function setInsertSpaceInReplace($boolean) { $this->config->setInsertSpaceInReplace($boolean); return $this; } /** * @return bool * * @deprecated since 0.1.0 */ public function getInsertSpaceInReplace() { return $this->config->isInsertSpaceInReplace(); } /** * @return string */ public function build() { $this->prepare(); if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) { $this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText); return $this->content; } // Pre-processing Optimizations // 1. Equality if ($this->oldText == $this->newText) { return $this->newText; } $this->splitInputsToWords(); $this->replaceIsolatedDiffTags(); $this->indexNewWords(); $operations = $this->operations(); foreach ($operations as $item) { $this->performOperation($item); } if ($this->hasDiffCache()) { $this->getDiffCache()->save($this->oldText, $this->newText, $this->content); } return $this->content; } protected function indexNewWords() { $this->wordIndices = array(); foreach ($this->newWords as $i => $word) { if ($this->isTag($word)) { $word = $this->stripTagAttributes($word); } if (isset($this->wordIndices[ $word ])) { $this->wordIndices[ $word ][] = $i; } else { $this->wordIndices[ $word ] = array($i); } } } protected function replaceIsolatedDiffTags() { $this->oldIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->oldWords); $this->newIsolatedDiffTags = $this->createIsolatedDiffTagPlaceholders($this->newWords); } /** * @param array $words * * @return array */ protected function createIsolatedDiffTagPlaceholders(&$words) { $openIsolatedDiffTags = 0; $isolatedDiffTagIndices = array(); $isolatedDiffTagStart = 0; $currentIsolatedDiffTag = null; foreach ($words as $index => $word) { $openIsolatedDiffTag = $this->isOpeningIsolatedDiffTag($word, $currentIsolatedDiffTag); if ($openIsolatedDiffTag) { if ($this->isSelfClosingTag($word) || mb_stripos($word, ' $index, 'length' => 1, 'tagType' => $openIsolatedDiffTag, ); $currentIsolatedDiffTag = null; } } else { if ($openIsolatedDiffTags === 0) { $isolatedDiffTagStart = $index; } ++$openIsolatedDiffTags; $currentIsolatedDiffTag = $openIsolatedDiffTag; } } elseif ($openIsolatedDiffTags > 0 && $this->isClosingIsolatedDiffTag($word, $currentIsolatedDiffTag)) { --$openIsolatedDiffTags; if ($openIsolatedDiffTags == 0) { $isolatedDiffTagIndices[] = array('start' => $isolatedDiffTagStart, 'length' => $index - $isolatedDiffTagStart + 1, 'tagType' => $currentIsolatedDiffTag); $currentIsolatedDiffTag = null; } } } $isolatedDiffTagScript = array(); $offset = 0; foreach ($isolatedDiffTagIndices as $isolatedDiffTagIndex) { $start = $isolatedDiffTagIndex['start'] - $offset; $placeholderString = $this->config->getIsolatedDiffTagPlaceholder($isolatedDiffTagIndex['tagType']); $isolatedDiffTagScript[$start] = array_splice($words, $start, $isolatedDiffTagIndex['length'], $placeholderString); $offset += $isolatedDiffTagIndex['length'] - 1; } return $isolatedDiffTagScript; } /** * @param string $item * @param null|string $currentIsolatedDiffTag * * @return false|string */ protected function isOpeningIsolatedDiffTag($item, $currentIsolatedDiffTag = null) { $tagsToMatch = $currentIsolatedDiffTag !== null ? array($currentIsolatedDiffTag => $this->config->getIsolatedDiffTagPlaceholder($currentIsolatedDiffTag)) : $this->config->getIsolatedDiffTags(); $pattern = '#<%s(\s+[^>]*)?>#iUu'; foreach ($tagsToMatch as $key => $value) { if (preg_match(sprintf($pattern, $key), $item)) { return $key; } } return false; } protected function isSelfClosingTag($text) { return (bool) preg_match('/<[^>]+\/\s*>/u', $text); } /** * @param string $item * @param null|string $currentIsolatedDiffTag * * @return false|string */ protected function isClosingIsolatedDiffTag($item, $currentIsolatedDiffTag = null) { $tagsToMatch = $currentIsolatedDiffTag !== null ? array($currentIsolatedDiffTag => $this->config->getIsolatedDiffTagPlaceholder($currentIsolatedDiffTag)) : $this->config->getIsolatedDiffTags(); $pattern = '#]*)?>#iUu'; foreach ($tagsToMatch as $key => $value) { if (preg_match(sprintf($pattern, $key), $item)) { return $key; } } return false; } /** * @param Operation $operation */ protected function performOperation($operation) { switch ($operation->action) { case 'equal' : $this->processEqualOperation($operation); break; case 'delete' : $this->processDeleteOperation($operation, 'diffdel'); break; case 'insert' : $this->processInsertOperation($operation, 'diffins'); break; case 'replace': $this->processReplaceOperation($operation); break; default: break; } } /** * @param Operation $operation */ protected function processReplaceOperation($operation) { $this->processDeleteOperation($operation, 'diffmod'); $this->processInsertOperation($operation, 'diffmod'); } /** * @param Operation $operation * @param string $cssClass */ protected function processInsertOperation($operation, $cssClass) { $text = array(); foreach ($this->newWords as $pos => $s) { if ($pos >= $operation->startInNew && $pos < $operation->endInNew) { if ($this->config->isIsolatedDiffTagPlaceholder($s) && isset($this->newIsolatedDiffTags[$pos])) { foreach ($this->newIsolatedDiffTags[$pos] as $word) { $text[] = $word; } } else { $text[] = $s; } } } $this->insertTag('ins', $cssClass, $text); } /** * @param Operation $operation * @param string $cssClass */ protected function processDeleteOperation($operation, $cssClass) { $text = array(); foreach ($this->oldWords as $pos => $s) { if ($pos >= $operation->startInOld && $pos < $operation->endInOld) { if ($this->config->isIsolatedDiffTagPlaceholder($s) && isset($this->oldIsolatedDiffTags[$pos])) { foreach ($this->oldIsolatedDiffTags[$pos] as $word) { $text[] = $word; } } else { $text[] = $s; } } } $this->insertTag('del', $cssClass, $text); } /** * @param Operation $operation * @param int $pos * @param string $placeholder * @param bool $stripWrappingTags * * @return string */ protected function diffIsolatedPlaceholder($operation, $pos, $placeholder, $stripWrappingTags = true) { $oldText = implode('', $this->findIsolatedDiffTagsInOld($operation, $pos)); $newText = implode('', $this->newIsolatedDiffTags[$pos]); if ($this->isListPlaceholder($placeholder)) { return $this->diffList($oldText, $newText); } elseif ($this->config->isUseTableDiffing() && $this->isTablePlaceholder($placeholder)) { return $this->diffTables($oldText, $newText); } elseif ($this->isLinkPlaceholder($placeholder)) { return $this->diffElementsByAttribute($oldText, $newText, 'href', 'a'); } elseif ($this->isImagePlaceholder($placeholder)) { return $this->diffElementsByAttribute($oldText, $newText, 'src', 'img'); } return $this->diffElements($oldText, $newText, $stripWrappingTags); } /** * @param string $oldText * @param string $newText * @param bool $stripWrappingTags * * @return string */ protected function diffElements($oldText, $newText, $stripWrappingTags = true) { $wrapStart = ''; $wrapEnd = ''; if ($stripWrappingTags) { $pattern = '/(^<[^>]+>)|(<\/[^>]+>$)/iu'; $matches = array(); if (preg_match_all($pattern, $newText, $matches)) { $wrapStart = isset($matches[0][0]) ? $matches[0][0] : ''; $wrapEnd = isset($matches[0][1]) ? $matches[0][1] : ''; } $oldText = preg_replace($pattern, '', $oldText); $newText = preg_replace($pattern, '', $newText); } $diff = self::create($oldText, $newText, $this->config); return $wrapStart.$diff->build().$wrapEnd; } /** * @param string $oldText * @param string $newText * * @return string */ protected function diffList($oldText, $newText) { $diff = ListDiffLines::create($oldText, $newText, $this->config); return $diff->build(); } /** * @param string $oldText * @param string $newText * * @return string */ protected function diffTables($oldText, $newText) { $diff = TableDiff::create($oldText, $newText, $this->config); return $diff->build(); } protected function diffElementsByAttribute($oldText, $newText, $attribute, $element) { $oldAttribute = $this->getAttributeFromTag($oldText, $attribute); $newAttribute = $this->getAttributeFromTag($newText, $attribute); if ($oldAttribute !== $newAttribute) { $diffClass = sprintf('diffmod diff%s diff%s', $element, $attribute); return sprintf( '%s%s', $this->wrapText($oldText, 'del', $diffClass), $this->wrapText($newText, 'ins', $diffClass) ); } return $this->diffElements($oldText, $newText); } /** * @param Operation $operation */ protected function processEqualOperation($operation) { $result = array(); foreach ($this->newWords as $pos => $s) { if ($pos >= $operation->startInNew && $pos < $operation->endInNew) { if ($this->config->isIsolatedDiffTagPlaceholder($s) && isset($this->newIsolatedDiffTags[$pos])) { $result[] = $this->diffIsolatedPlaceholder($operation, $pos, $s); } else { $result[] = $s; } } } $this->content .= implode('', $result); } /** * @param string $text * @param string $attribute * * @return null|string */ protected function getAttributeFromTag($text, $attribute) { $matches = array(); if (preg_match(sprintf('/<[^>]*\b%s\s*=\s*([\'"])(.*)\1[^>]*>/iu', $attribute), $text, $matches)) { return htmlspecialchars_decode($matches[2]); } return; } /** * @param string $text * * @return bool */ protected function isListPlaceholder($text) { return $this->isPlaceholderType($text, array('ol', 'dl', 'ul')); } /** * @param string $text * * @return bool */ public function isLinkPlaceholder($text) { return $this->isPlaceholderType($text, 'a'); } /** * @param string $text * * @return bool */ public function isImagePlaceholder($text) { return $this->isPlaceholderType($text, 'img'); } /** * @param string $text * @param array|string $types * @param bool $strict * * @return bool */ protected function isPlaceholderType($text, $types, $strict = true) { if (!is_array($types)) { $types = array($types); } $criteria = array(); foreach ($types as $type) { if ($this->config->isIsolatedDiffTag($type)) { $criteria[] = $this->config->getIsolatedDiffTagPlaceholder($type); } else { $criteria[] = $type; } } return in_array($text, $criteria, $strict); } /** * @param string $text * * @return bool */ protected function isTablePlaceholder($text) { return $this->isPlaceholderType($text, 'table'); } /** * @param Operation $operation * @param int $posInNew * * @return array */ protected function findIsolatedDiffTagsInOld($operation, $posInNew) { $offset = $posInNew - $operation->startInNew; return $this->oldIsolatedDiffTags[$operation->startInOld + $offset]; } /** * @param string $tag * @param string $cssClass * @param array $words */ protected function insertTag($tag, $cssClass, &$words) { while (true) { if (count($words) == 0) { break; } $nonTags = $this->extractConsecutiveWords($words, 'noTag'); $specialCaseTagInjection = ''; $specialCaseTagInjectionIsBefore = false; if (count($nonTags) != 0) { $text = $this->wrapText(implode('', $nonTags), $tag, $cssClass); $this->content .= $text; } else { $firstOrDefault = false; foreach ($this->config->getSpecialCaseOpeningTags() as $x) { if (preg_match($x, $words[ 0 ])) { $firstOrDefault = $x; break; } } if ($firstOrDefault) { $specialCaseTagInjection = ''; if ($tag == 'del') { unset($words[ 0 ]); } } elseif (array_search($words[ 0 ], $this->config->getSpecialCaseClosingTags()) !== false) { $specialCaseTagInjection = ''; $specialCaseTagInjectionIsBefore = true; if ($tag == 'del') { unset($words[ 0 ]); } } } if (count($words) == 0 && mb_strlen($specialCaseTagInjection) == 0) { break; } if ($specialCaseTagInjectionIsBefore) { $this->content .= $specialCaseTagInjection.implode('', $this->extractConsecutiveWords($words, 'tag')); } else { $workTag = $this->extractConsecutiveWords($words, 'tag'); if (isset($workTag[ 0 ]) && $this->isOpeningTag($workTag[ 0 ]) && !$this->isClosingTag($workTag[ 0 ])) { if (mb_strpos($workTag[ 0 ], 'class=')) { $workTag[ 0 ] = str_replace('class="', 'class="diffmod ', $workTag[ 0 ]); $workTag[ 0 ] = str_replace("class='", 'class="diffmod ', $workTag[ 0 ]); } else { $workTag[ 0 ] = str_replace('>', ' class="diffmod">', $workTag[ 0 ]); } } $appendContent = implode('', $workTag).$specialCaseTagInjection; if (isset($workTag[0]) && false !== mb_stripos($workTag[0], 'wrapText($appendContent, $tag, $cssClass); } $this->content .= $appendContent; } } } /** * @param string $word * @param string $condition * * @return bool */ protected function checkCondition($word, $condition) { return $condition == 'tag' ? $this->isTag($word) : !$this->isTag($word); } /** * @param string $text * @param string $tagName * @param string $cssClass * * @return string */ protected function wrapText($text, $tagName, $cssClass) { return sprintf('<%1$s class="%2$s">%3$s', $tagName, $cssClass, $text); } /** * @param array $words * @param string $condition * * @return array */ protected function extractConsecutiveWords(&$words, $condition) { $indexOfFirstTag = null; $words = array_values($words); foreach ($words as $i => $word) { if (!$this->checkCondition($word, $condition)) { $indexOfFirstTag = $i; break; } } if ($indexOfFirstTag !== null) { $items = array(); foreach ($words as $pos => $s) { if ($pos >= 0 && $pos < $indexOfFirstTag) { $items[] = $s; } } if ($indexOfFirstTag > 0) { array_splice($words, 0, $indexOfFirstTag); } return $items; } else { $items = array(); foreach ($words as $pos => $s) { if ($pos >= 0 && $pos <= count($words)) { $items[] = $s; } } array_splice($words, 0, count($words)); return $items; } } /** * @param string $item * * @return bool */ protected function isTag($item) { return $this->isOpeningTag($item) || $this->isClosingTag($item); } /** * @param string $item * * @return bool */ protected function isOpeningTag($item) { return preg_match('#<[^>]+>\\s*#iUu', $item); } /** * @param string $item * * @return bool */ protected function isClosingTag($item) { return preg_match('#]+>\\s*#iUu', $item); } /** * @return Operation[] */ protected function operations() { $positionInOld = 0; $positionInNew = 0; $operations = array(); $matches = $this->matchingBlocks(); $matches[] = new Match(count($this->oldWords), count($this->newWords), 0); foreach ($matches as $i => $match) { $matchStartsAtCurrentPositionInOld = ($positionInOld === $match->startInOld); $matchStartsAtCurrentPositionInNew = ($positionInNew === $match->startInNew); if ($matchStartsAtCurrentPositionInOld === false && $matchStartsAtCurrentPositionInNew === false) { $action = 'replace'; } elseif ($matchStartsAtCurrentPositionInOld === true && $matchStartsAtCurrentPositionInNew === false) { $action = 'insert'; } elseif ($matchStartsAtCurrentPositionInOld === false && $matchStartsAtCurrentPositionInNew === true) { $action = 'delete'; } else { // This occurs if the first few words are the same in both versions $action = 'none'; } if ($action !== 'none') { $operations[] = new Operation($action, $positionInOld, $match->startInOld, $positionInNew, $match->startInNew); } if (count($match) !== 0) { $operations[] = new Operation('equal', $match->startInOld, $match->endInOld(), $match->startInNew, $match->endInNew()); } $positionInOld = $match->endInOld(); $positionInNew = $match->endInNew(); } return $operations; } /** * @return Match[] */ protected function matchingBlocks() { $matchingBlocks = array(); $this->findMatchingBlocks(0, count($this->oldWords), 0, count($this->newWords), $matchingBlocks); return $matchingBlocks; } /** * @param int $startInOld * @param int $endInOld * @param int $startInNew * @param int $endInNew * @param array $matchingBlocks */ protected function findMatchingBlocks($startInOld, $endInOld, $startInNew, $endInNew, &$matchingBlocks) { $match = $this->findMatch($startInOld, $endInOld, $startInNew, $endInNew); if ($match !== null) { if ($startInOld < $match->startInOld && $startInNew < $match->startInNew) { $this->findMatchingBlocks($startInOld, $match->startInOld, $startInNew, $match->startInNew, $matchingBlocks); } $matchingBlocks[] = $match; if ($match->endInOld() < $endInOld && $match->endInNew() < $endInNew) { $this->findMatchingBlocks($match->endInOld(), $endInOld, $match->endInNew(), $endInNew, $matchingBlocks); } } } /** * @param string $word * * @return string */ protected function stripTagAttributes($word) { $space = mb_strpos($word, ' ', 1); if ($space) { return '<' . mb_substr($word, 1, $space) . '>'; } return trim($word, '<>'); } /** * @param int $startInOld * @param int $endInOld * @param int $startInNew * @param int $endInNew * * @return Match|null */ protected function findMatch($startInOld, $endInOld, $startInNew, $endInNew) { $bestMatchInOld = $startInOld; $bestMatchInNew = $startInNew; $bestMatchSize = 0; $matchLengthAt = array(); for ($indexInOld = $startInOld; $indexInOld < $endInOld; ++$indexInOld) { $newMatchLengthAt = array(); $index = $this->oldWords[ $indexInOld ]; if ($this->isTag($index)) { $index = $this->stripTagAttributes($index); } if (!isset($this->wordIndices[ $index ])) { $matchLengthAt = $newMatchLengthAt; continue; } foreach ($this->wordIndices[ $index ] as $indexInNew) { if ($indexInNew < $startInNew) { continue; } if ($indexInNew >= $endInNew) { break; } $newMatchLength = (isset($matchLengthAt[ $indexInNew - 1 ]) ? $matchLengthAt[ $indexInNew - 1 ] : 0) + 1; $newMatchLengthAt[ $indexInNew ] = $newMatchLength; if ($newMatchLength > $bestMatchSize || ( $this->isGroupDiffs() && $bestMatchSize > 0 && $this->isOnlyWhitespace($this->array_slice_cached($this->oldWords, $bestMatchInOld, $bestMatchSize)) ) ) { $bestMatchInOld = $indexInOld - $newMatchLength + 1; $bestMatchInNew = $indexInNew - $newMatchLength + 1; $bestMatchSize = $newMatchLength; } } $matchLengthAt = $newMatchLengthAt; } // Skip match if none found or match consists only of whitespace if ($bestMatchSize != 0 && ( !$this->isGroupDiffs() || !$this->isOnlyWhitespace($this->array_slice_cached($this->oldWords, $bestMatchInOld, $bestMatchSize)) ) ) { return new Match($bestMatchInOld, $bestMatchInNew, $bestMatchSize); } return null; } /** * @param string $str * * @return bool */ protected function isOnlyWhitespace($str) { // Slightly faster then using preg_match return $str !== '' && (mb_strlen(trim($str)) === 0); } /** * Special array_slice function that caches its last request. * * The diff algorithm seems to request the same information many times in a row. * by returning the previous answer the algorithm preforms way faster. * * The result is a string instead of an array, this way we safe on the amount of * memory intensive implode() calls. * * @param array &$array * @param integer $offset * @param integer|null $length * * @return string */ protected function array_slice_cached(&$array, $offset, $length = null) { static $lastOffset = null; static $lastLength = null; static $cache = null; // PHP has no support for by-reference comparing. // to prevent false positive hits, reset the cache when the oldWords or newWords is changed. if ($this->resetCache === true) { $cache = null; $this->resetCache = false; } if ( $cache !== null && $lastLength === $length && $lastOffset === $offset ) { // Hit return $cache; } // Miss $lastOffset = $offset; $lastLength = $length; $cache = implode('', array_slice($array, $offset, $length)); return $cache; } }