FOSSology  4.4.0
Open Source License Compliance by Open Source Software
diff.c
1 /*
2  Author: Daniele Fognini, Andreas Wuerl
3  SPDX-FileCopyrightText: © 2013-2014 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 
8 #include "diff.h"
9 
10 #include <_squareVisitor.h>
11 #include <stdlib.h>
12 
13 int matchNTokens(const GArray* textTokens, size_t textStart, size_t textLength,
14  const GArray* searchTokens, size_t searchStart, size_t searchLength,
15  unsigned int numberOfWantedMatches) {
16 
17  if (
18  textStart < textLength &&
19  searchStart < searchLength &&
20  tokenEquals(
21  tokens_index(textTokens, textStart),
22  tokens_index(searchTokens, searchStart
23  )))
24  {
25  unsigned matched = 1;
26 
27  size_t canMatch = MIN(textLength - textStart, searchLength - searchStart);
28  size_t shouldMatch = MIN(numberOfWantedMatches, canMatch);
29  for (size_t i = 1; i < shouldMatch; i++) {
30  if (tokenEquals(
31  tokens_index(textTokens, i + textStart),
32  tokens_index(searchTokens, i + searchStart)))
33  matched++;
34  else
35  break;
36  }
37  return (matched == shouldMatch);
38  }
39  return 0;
40 }
41 
42 int lookForDiff(const GArray* textTokens, const GArray* searchTokens,
43  size_t iText, size_t iSearch,
44  unsigned int maxAllowedDiff, unsigned int minAdjacentMatches,
45  DiffMatchInfo* result) {
46  size_t searchLength = searchTokens->len;
47  size_t textLength = textTokens->len;
48 
49  size_t searchStopAt = MIN(iSearch + maxAllowedDiff, searchLength);
50  size_t textStopAt = MIN(iText + maxAllowedDiff, textLength);
51 
52  size_t textPos;
53  size_t searchPos;
54  for (unsigned int i = 0; i < SQUARE_VISITOR_LENGTH; i++) {
55  textPos = iText + squareVisitorX[i];
56  searchPos = iSearch + squareVisitorY[i];
57 
58  if ((textPos < textStopAt) && (searchPos < searchStopAt))
59  if (matchNTokens(textTokens, textPos, textLength,
60  searchTokens, searchPos, searchLength,
61  minAdjacentMatches))
62  {
63  result->search.start = searchPos;
64  result->search.length = searchPos - iSearch;
65  result->text.start = textPos;
66  result->text.length = textPos - iText;
67  return 1;
68  }
69  }
70 
71  return 0;
72 }
73 
74 static int applyDiff(const DiffMatchInfo* diff,
75  GArray* matchedInfo,
76  size_t* additionsCounter, size_t* removedCounter,
77  size_t* iText, size_t* iSearch) {
78 
79  DiffMatchInfo diffCopy = *diff;
80  diffCopy.text.start = *iText;
81  diffCopy.search.start = *iSearch;
82 
83  if (diffCopy.search.length == 0)
84  diffCopy.diffType = DIFF_TYPE_ADDITION;
85  else if (diffCopy.text.length == 0)
86  diffCopy.diffType = DIFF_TYPE_REMOVAL;
87  else
88  diffCopy.diffType = DIFF_TYPE_REPLACE;
89 
90  *iText = diff->text.start;
91  *iSearch = diff->search.start;
92 
93  g_array_append_val(matchedInfo, diffCopy);
94 
95  *additionsCounter += diffCopy.text.length;
96  *removedCounter += diffCopy.search.length;
97 
98  return 1;
99 }
100 
101 static void applyTailDiff(GArray* matchedInfo,
102  size_t searchLength,
103  size_t* removedCounter, size_t* additionsCounter,
104  size_t* iText, size_t* iSearch) {
105 
106  DiffMatchInfo tailDiff = (DiffMatchInfo) {
107  .search = (DiffPoint) {
108  .start = (*iSearch),
109  .length = searchLength - (*iSearch)
110  },
111  .text = (DiffPoint) {
112  .start = (*iText),
113  .length = 0
114  },
115  .diffType = NULL
116  };
117 
118  applyDiff(&tailDiff, matchedInfo, additionsCounter, removedCounter, iText, iSearch);
119 }
120 
121 static void initSimpleMatch(DiffMatchInfo* simpleMatch, size_t iText, size_t iSearch) {
122  simpleMatch->text.start = iText;
123  simpleMatch->text.length = 0;
124  simpleMatch->search.start = iSearch;
125  simpleMatch->search.length = 0;
126 }
127 
140 DiffResult* findMatchAsDiffs(const GArray* textTokens, const GArray* searchTokens,
141  size_t textStartPosition, size_t searchStartPosition,
142  unsigned int maxAllowedDiff, unsigned int minAdjacentMatches) {
143  size_t textLength = textTokens->len;
144  size_t searchLength = searchTokens->len;
145 
146  if ((searchLength<=searchStartPosition) || (textLength<=textStartPosition)) {
147  return NULL;
148  }
149 
150  DiffResult* result = malloc(sizeof(DiffResult));
151  result->matchedInfo = g_array_new(FALSE, FALSE, sizeof(DiffMatchInfo));
152  GArray* matchedInfo = result->matchedInfo;
153 
154  size_t iText = textStartPosition;
155  size_t iSearch = 0;
156 
157  size_t removedCounter = 0;
158  size_t matchedCounter = 0;
159  size_t additionsCounter = 0;
160 
161  if (searchStartPosition > 0) {
162  DiffMatchInfo licenseHeadDiff = (DiffMatchInfo) {
163  .search = (DiffPoint) {
164  .start = 0,
165  .length = searchStartPosition
166  },
167  .text = (DiffPoint) {
168  .start = iText,
169  .length = 0
170  },
171  .diffType = NULL
172  };
173  applyDiff(&licenseHeadDiff, matchedInfo, &additionsCounter, &removedCounter, &iText, &iSearch);
174  iSearch = searchStartPosition;
175  }
176 
177  // match first token
178  while (iText < textLength) {
179  Token* textToken = tokens_index(textTokens, iText);
180  Token* searchToken = tokens_index(searchTokens, iSearch);
181  if (tokenEquals(textToken, searchToken))
182  break;
183  iText++;
184  }
185 
186  if (iText < textLength) {
187  DiffMatchInfo simpleMatch;
188  simpleMatch.diffType = DIFF_TYPE_MATCH;
189  initSimpleMatch(&simpleMatch, iText, iSearch);
190 
191  while ((iText < textLength) && (iSearch < searchLength)) {
192  Token* textToken = tokens_index(textTokens, iText);
193  Token* searchToken = tokens_index(searchTokens, iSearch);
194 
195  if (tokenEquals(textToken, searchToken)) {
196  simpleMatch.text.length++;
197  simpleMatch.search.length++;
198  matchedCounter++;
199  iSearch++;
200  iText++;
201  } else {
202  /* the previous tokens matched, here starts a difference */
203  g_array_append_val(matchedInfo, simpleMatch);
204  initSimpleMatch(&simpleMatch, iText, iSearch);
205 
206  DiffMatchInfo diff;
207  if (lookForDiff(textTokens, searchTokens,
208  iText, iSearch,
209  maxAllowedDiff, minAdjacentMatches,
210  &diff)) {
211  applyDiff(&diff,
212  matchedInfo,
213  &additionsCounter, &removedCounter,
214  &iText, &iSearch);
215 
216  simpleMatch.text.start = iText;
217  simpleMatch.search.start = iSearch;
218  } else {
219  break;
220  }
221  }
222  }
223  if (simpleMatch.text.length > 0) {
224  g_array_append_val(matchedInfo, simpleMatch);
225  }
226  }
227 
228  if ((iSearch < searchLength) && (searchLength < maxAllowedDiff + iSearch)) {
229  applyTailDiff(matchedInfo, searchLength, &removedCounter, &additionsCounter, &iText, &iSearch);
230  }
231 
232  if (matchedCounter + removedCounter != searchLength) {
233  g_array_free(matchedInfo, TRUE);
234  free(result);
235 
236  return NULL;
237  } else {
238 #ifdef DEBUG_DIFF
239  printf("diff: (=%zu +%zu -%zu)/%zu\n", matchedCounter, additionsCounter, removedCounter, searchLength);
240  for (size_t i=0; i<matchedInfo->len; i++) {
241  DiffMatchInfo current = g_array_index(matchedInfo, DiffMatchInfo, i);
242  printf("info[%zu]: t[%zu+%zu] %s s[%zu+%zu]}\n",
243  i,
244  current.text.start,
245  current.text.length,
246  current.diffType,
247  current.search.start,
248  current.search.length
249  );
250  }
251  printf("\n");
252 #endif
253  result->removed = removedCounter;
254  result->added = additionsCounter;
255  result->matched = matchedCounter;
256  return result;
257  }
258 }
259 
260 void diffResult_free(DiffResult* diffResult) {
261  g_array_free(diffResult->matchedInfo, TRUE);
262  free(diffResult);
263 }
#define MIN(a, b)
Min of two.
Definition: licenses.c:64
Definition: diff.h:14