3 namespace Caxy\HtmlDiff;
5 use Caxy\HtmlDiff\ListDiff\DiffList;
6 use Caxy\HtmlDiff\ListDiff\DiffListItem;
8 class ListDiff extends AbstractDiff
10 protected static $listTypes = array('ul', 'ol', 'dl');
13 * @param string $oldText
14 * @param string $newText
15 * @param HtmlDiffConfig|null $config
19 public static function create($oldText, $newText, HtmlDiffConfig $config = null)
21 $diff = new self($oldText, $newText);
23 if (null !== $config) {
24 $diff->setConfig($config);
30 public function build()
34 if ($this->hasDiffCache() && $this->getDiffCache()->contains($this->oldText, $this->newText)) {
35 $this->content = $this->getDiffCache()->fetch($this->oldText, $this->newText);
37 return $this->content;
40 $this->splitInputsToWords();
42 $this->content = $this->diffLists(
43 $this->buildDiffList($this->oldWords),
44 $this->buildDiffList($this->newWords)
47 if ($this->hasDiffCache()) {
48 $this->getDiffCache()->save($this->oldText, $this->newText, $this->content);
51 return $this->content;
54 protected function diffLists(DiffList $oldList, DiffList $newList)
56 $oldMatchData = array();
57 $newMatchData = array();
58 $oldListIndices = array();
59 $newListIndices = array();
60 $oldListItems = array();
61 $newListItems = array();
63 foreach ($oldList->getListItems() as $oldIndex => $oldListItem) {
64 if ($oldListItem instanceof DiffListItem) {
65 $oldListItems[$oldIndex] = $oldListItem;
67 $oldListIndices[] = $oldIndex;
68 $oldMatchData[$oldIndex] = array();
70 // Get match percentages
71 foreach ($newList->getListItems() as $newIndex => $newListItem) {
72 if ($newListItem instanceof DiffListItem) {
73 if (!in_array($newListItem, $newListItems)) {
74 $newListItems[$newIndex] = $newListItem;
76 if (!in_array($newIndex, $newListIndices)) {
77 $newListIndices[] = $newIndex;
79 if (!array_key_exists($newIndex, $newMatchData)) {
80 $newMatchData[$newIndex] = array();
83 $oldText = implode('', $oldListItem->getText());
84 $newText = implode('', $newListItem->getText());
88 similar_text($oldText, $newText, $percentage);
90 $oldMatchData[$oldIndex][$newIndex] = $percentage;
91 $newMatchData[$newIndex][$oldIndex] = $percentage;
97 $currentIndexInOld = 0;
98 $currentIndexInNew = 0;
99 $oldCount = count($oldListIndices);
100 $newCount = count($newListIndices);
101 $difference = max($oldCount, $newCount) - min($oldCount, $newCount);
105 foreach ($newList->getListItems() as $newIndex => $newListItem) {
106 if ($newListItem instanceof DiffListItem) {
109 $oldListIndex = array_key_exists($currentIndexInOld, $oldListIndices) ? $oldListIndices[$currentIndexInOld] : null;
112 if (null !== $oldListIndex && array_key_exists($oldListIndex, $oldMatchData)) {
113 // Check percentage matches of upcoming list items in old.
114 $matchPercentage = $oldMatchData[$oldListIndex][$newIndex];
116 // does the old list item match better?
117 $otherMatchBetter = false;
118 foreach ($oldMatchData[$oldListIndex] as $index => $percentage) {
119 if ($index > $newIndex && $percentage > $matchPercentage) {
120 $otherMatchBetter = $index;
124 if (false !== $otherMatchBetter && $newCount > $oldCount && $difference > 0) {
125 $diffOutput .= sprintf('%s', $newListItem->getHtml('normal new', 'ins'));
126 ++$currentIndexInNew;
132 $replacement = false;
134 // is there a better old list item match coming up?
135 if ($oldCount > $newCount) {
136 while ($difference > 0 && $this->hasBetterMatch($newMatchData[$newIndex], $oldListIndex)) {
137 $diffOutput .= sprintf('%s', $oldListItems[$oldListIndex]->getHtml('removed', 'del'));
139 ++$currentIndexInOld;
141 $oldListIndex = array_key_exists($currentIndexInOld, $oldListIndices) ? $oldListIndices[$currentIndexInOld] : null;
142 $matchPercentage = $oldMatchData[$oldListIndex][$newIndex];
147 $nextOldListIndex = array_key_exists($currentIndexInOld + 1, $oldListIndices) ? $oldListIndices[$currentIndexInOld + 1] : null;
149 if ($nextOldListIndex !== null && $oldMatchData[$nextOldListIndex][$newIndex] > $matchPercentage && $oldMatchData[$nextOldListIndex][$newIndex] > $this->config->getMatchThreshold()) {
150 // Following list item in old is better match, use that.
151 $diffOutput .= sprintf('%s', $oldListItems[$oldListIndex]->getHtml('removed', 'del'));
153 ++$currentIndexInOld;
154 $oldListIndex = $nextOldListIndex;
155 $matchPercentage = $oldMatchData[$oldListIndex][$newIndex];
159 if ($matchPercentage > $this->config->getMatchThreshold() || $currentIndexInNew === $currentIndexInOld) {
160 // Diff the two lists.
161 $htmlDiff = HtmlDiff::create(
162 $oldListItems[$oldListIndex]->getInnerHtml(),
163 $newListItem->getInnerHtml(),
166 $diffContent = $htmlDiff->build();
168 $diffOutput .= sprintf('%s%s%s', $newListItem->getStartTagWithDiffClass($replacement ? 'replacement' : 'normal'), $diffContent, $newListItem->getEndTag());
170 $diffOutput .= sprintf('%s', $oldListItems[$oldListIndex]->getHtml('removed', 'del'));
171 $diffOutput .= sprintf('%s', $newListItem->getHtml('replacement', 'ins'));
173 ++$currentIndexInOld;
175 $diffOutput .= sprintf('%s', $newListItem->getHtml('normal new', 'ins'));
178 ++$currentIndexInNew;
182 // Output any additional list items
183 while (array_key_exists($currentIndexInOld, $oldListIndices)) {
184 $oldListIndex = $oldListIndices[$currentIndexInOld];
185 $diffOutput .= sprintf('%s', $oldListItems[$oldListIndex]->getHtml('removed', 'del'));
186 ++$currentIndexInOld;
189 return sprintf('%s%s%s', $newList->getStartTagWithDiffClass(), $diffOutput, $newList->getEndTag());
193 * @param array $matchData
194 * @param int $currentIndex
198 protected function hasBetterMatch(array $matchData, $currentIndex)
200 $matchPercentage = $matchData[$currentIndex];
201 foreach ($matchData as $index => $percentage) {
202 if ($index > $currentIndex &&
203 $percentage > $matchPercentage &&
204 $percentage > $this->config->getMatchThreshold()
213 protected function buildDiffList($words)
216 $listStartTag = null;
218 $attributes = array();
222 $currentListItem = null;
223 $listItemType = null;
224 $listItemStart = null;
227 foreach ($words as $i => $word) {
228 if ($this->isOpeningListTag($word, $listType)) {
229 if ($openLists > 0) {
230 if ($openListItems > 0) {
231 $currentListItem[] = $word;
236 $listType = mb_substr($word, 1, 2);
237 $listStartTag = $word;
241 } elseif ($this->isClosingListTag($word, $listType)) {
242 if ($openLists > 1) {
243 if ($openListItems > 0) {
244 $currentListItem[] = $word;
253 } elseif ($this->isOpeningListItemTag($word, $listItemType)) {
254 if ($openListItems === 0) {
255 // New top-level list item
256 $currentListItem = array();
257 $listItemType = mb_substr($word, 1, 2);
258 $listItemStart = $word;
260 $currentListItem[] = $word;
264 } elseif ($this->isClosingListItemTag($word, $listItemType)) {
265 if ($openListItems === 1) {
266 $listItemEnd = $word;
267 $listItem = new DiffListItem($currentListItem, array(), $listItemStart, $listItemEnd);
269 $currentListItem = null;
271 $currentListItem[] = $word;
276 if ($openListItems > 0) {
277 $currentListItem[] = $word;
284 $diffList = new DiffList($listType, $listStartTag, $listEndTag, $list, $attributes);
289 protected function isOpeningListTag($word, $type = null)
291 $filter = $type !== null ? array('<'.$type) : array('<ul', '<ol', '<dl');
293 return in_array(mb_substr($word, 0, 3), $filter);
296 protected function isClosingListTag($word, $type = null)
298 $filter = $type !== null ? array('</'.$type) : array('</ul', '</ol', '</dl');
300 return in_array(mb_substr($word, 0, 4), $filter);
303 protected function isOpeningListItemTag($word, $type = null)
305 $filter = $type !== null ? array('<'.$type) : array('<li', '<dd', '<dt');
307 return in_array(mb_substr($word, 0, 3), $filter);
310 protected function isClosingListItemTag($word, $type = null)
312 $filter = $type !== null ? array('</'.$type) : array('</li', '</dd', '</dt');
314 return in_array(mb_substr($word, 0, 4), $filter);