14 #include "doctorBuffer_utils.h"
17 #include <CUnit/CUnit.h>
20 #include "nomos_utils.h"
27 #include "nomos_regex.h"
28 #include "_autodefs.h"
42 fer = g_strdup_printf(
"//Th- is is a li-\n// cence of the test string");
43 buf = g_strdup_printf(
"This is a li cence of the test string");
50 CU_ASSERT_STRING_EQUAL(buf, fer);
68 fer = g_strdup_printf(
"//This is the test string");
73 printf(
"Before %d, after %d", (
int) strlen(cfer), (
int) strlen(fer));
75 for (
size_t i = 0; i < strlen(fer); i++)
77 CU_ASSERT_EQUAL(*(fer + i), *(cfer + uncollapsePosition(i, cur.docBufferPositionsAndOffsets)));
89 printf(
"I have %i matches \n", cur.
theMatches->len);
90 for (guint i = 0; i < cur.
theMatches->len; ++i)
96 printf(
"Match from %d to %d: ", PaT->
start, PaT->
end);
98 for (
int j = PaT->
start; j < PaT->end; ++j)
100 printf(
"%c", *(buf + j));
120 char *buf, *undoc, *filename;
121 filename = buf = (
char*)malloc(3000);
122 sprintf(filename,
"%s/NomosTestfiles/WXwindows/WXwindows.txt", TESTDATADIR);
123 int f = open(filename, O_RDONLY);
124 int whatIread = read(f, buf, 3000);
127 CU_ASSERT_EQUAL(whatIread, 2496);
128 undoc = g_strdup(buf);
130 printf(
"\n%s\n", undoc);
132 int licence_index = _PHR_WXWINDOWS;
133 int licence_index2 = _LT_LGPLref1;
136 cur.currentLicenceIndex=0;
137 g_array_append_val(cur.
indexList, licence_index);
138 g_array_append_val(cur.
indexList, licence_index2);
148 cur.currentLicenceIndex=0;
149 g_array_append_val(cur.
indexList, licence_index);
150 g_array_append_val(cur.
indexList, licence_index2);
154 printf(
"\n%s\n", buf);
161 cur.currentLicenceIndex=0;
162 g_array_append_val(cur.
indexList, licence_index);
163 g_array_append_val(cur.
indexList, licence_index2);
194 char* textBuffer = g_strdup_printf(
"" the big\t(C) and long\\n "\\s-1234,"
195 " test © string \n con-\n// tains losts; of . <string test> "
196 " <body> \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
197 "mimi-cs printf(\"Licence\"); and so on\n " \n ");
199 char* te22Buffer = g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234,"
200 " test © string \n con-\n// tains losts; of . <string test nbsp "
201 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
202 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
203 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
217 char* textBuffer = g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234,"
218 " test © string \n con-\n// tains losts; of . <string test nbsp "
219 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
220 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
224 char* te22Buffer = g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234,"
225 " test © string \n con-\n\377\377 tains losts; of . <string test nbsp"
226 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
227 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
228 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
241 char* textBuffer=g_strdup_printf(
"(8) (89) -9.- A %%!PS-Adobe-3.0 (12) EPSF-3.0 --8. -9.- A");
245 char* te22Buffer = g_strdup_printf(
" %%!PS-Adobe-3.0 (12) EPSF-3.0 ");
246 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
262 char* textBuffer= g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234, test"
263 " © string \n con-\n\377\377 tains losts; of . <string test nbsp "
264 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
265 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
269 char* te22Buffer = g_strdup_printf(
" quot the big\t(C) and long quot , test"
270 " © string \n con-\n\377\377 tains losts; of . <string test nbsp "
271 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
272 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
273 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
294 textBuffer = g_strdup_printf(
" quot the big\t(C) and long quot , test"
295 " © string \n con-\n\377\377 tains losts; of . <string test nbsp "
296 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
297 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
301 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot test"
302 " © string con- \377\377 tains losts of \377 test nbsp "
303 " body compli- cated / COMMENT s and funny / Words as it "
304 "mimi-cs printf Licence and so on quot ");
305 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
329 fer= g_strdup_printf(
"This- is the-test str- ing");
330 buf= g_strdup_printf(
"This\377\377is the-test str\377\377ing");
332 CU_ASSERT_STRING_EQUAL(buf, fer);
346 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , "
347 "test © string con- \377\377 tains losts of \377 test"
348 " nbsp body compli- cated / COMMENT s and funny / Words as it "
349 "mimi-cs printf Licence and so on quot ");
352 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test © "
353 "string con- \377\377 tains losts of \377 test nbsp body "
354 "compli\377\377\377cated / COMMENT s and funny / Words as it "
355 "mimi-cs printf Licence and so on quot ");
356 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
371 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , "
372 "test © string con- \377\377 tains losts of \377 test "
373 "nbsp body compli\377\377\377cated / COMMENT s and funny / Words as it "
374 "mimi-cs printf Licence and so on quot ");
378 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test "
379 "© string con \377\377 tains losts of \377 test nbsp body "
380 "compli\377\377\377cated COMMENT s and funny Words as it "
381 "mimi-cs printf Licence and so on quot ");
382 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
400 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , test © "
401 "string con \377\377 tains losts of \377 test nbsp body "
402 "compli\377\377\377cated COMMENT s and funny Words as it "
403 "mimi-cs printf Licence and so on quot ");
406 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test © "
407 "string con \377\377 tains losts of \377 test nbsp body "
408 "compli\377\377\377cated COMMENT s and funny Words as it "
409 "mimi-cs Licence and so on quot ");
410 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
425 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , test © "
426 "string con \377\377 tains losts of \377 test nbsp body "
427 "compli\377\377\377cated COMMENT s and funny Words as it "
428 "mimi-cs Licence and so on quot ");
431 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long \377\377\377quot \377\377\377\377\377\377, "
432 "test © \377string \377\377con \377\377\377\377tains losts \377of \377\377\377\377\377\377\377\377\377\377"
433 "test \377\377nbsp \377\377\377body \377\377\377compli\377\377\377cated \377\377\377COMMENT s "
434 "and funny \377\377\377Words as it \377\377mimi-cs \377\377\377\377\377\377\377\377Licence \377\377\377"
435 "and so on \377\377quot \377\377");
437 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
450 char* textBuffer= g_strdup_printf(
" quot the big (C) and long \377\377\377quot \377\377\377\377\377\377, "
451 "test © \377string \377\377con \377\377\377\377tains losts \377of \377\377\377\377\377\377\377\377\377\377"
452 "test \377\377nbsp \377\377\377body \377\377\377compli\377\377\377cated \377\377\377COMMENT s "
453 "and funny \377\377\377Words as it \377\377mimi-cs \377\377\377\377\377\377\377\377Licence \377\377\377"
454 "and so on \377\377quot \377\377");
458 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test © string "
459 "con tains losts of test nbsp body complicated COMMENT s and funny Words as "
460 "it mimi-cs Licence and so on quot ");
462 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
467 CU_TestInfo doctorBuffer_testcases[] =
void removeBackslashesAndGTroffIndicators(char *buf)
Remove groff/troff font-size indicators, the literal string backslash-n and all backslahes,...
void doctorBuffer(char *buf, int isML, int isPS, int isCR)
Convert a buffer of multiple stuff to text-only, separated by spaces.
int compressDoctoredBuffer(char *textBuffer)
garbage collect: eliminate all INVISIBLE characters in the buffer
void convertSpaceToInvisible(char *buf)
void removePunctuation(char *buf)
Clean up miscellaneous punctuation.
void removeLineComments(char *buf)
Remove comments that start at the beginning of a line.
void cleanUpPostscript(char *buf)
Remove newlines from buffer.
void convertWhitespaceToSpaceAndRemoveSpecialChars(char *buf, int isCR)
Convert white-space to real spaces, and remove unnecessary punctuation.
void ignoreFunctionCalls(char *buf)
Ignore function calls to print routines.
void removeHtmlComments(char *buf)
Remove HTML comments from buffer without removing comment text.
void licenseInit()
license initialization
int idxGrep_recordPosition(int index, char *data, int flags)
compile a regex, perform the search and record findings
int idxGrep_recordPositionDoctored(int index, char *data, int flags)
compile a regex, perform the search and record findings
FUNCTION void freeAndClearScan(struct curScan *thisScan)
Clean-up all the per scan data structures, freeing any old data.
FUNCTION void initializeCurScan(struct curScan *cur)
Initialize the scanner.
FUNCTION void addLicence(GArray *theMatches, char *licenceName)
Add a license to the matches array.
FUNCTION MatchPositionAndType * getMatchfromHighlightInfo(GArray *in, int index)
Get the MatchPositionAndType for a given index in highlight array.
FUNCTION LicenceAndMatchPositions * getLicenceAndMatchPositions(GArray *in, int index)
Get the LicenceAndMatchPositions for a given index in match array.
GArray * matchPositions
Match positions.
int start
Start position of match.
int end
End position of match.
void test_9_convertSpaceToInvisible()
Test for convertSpaceToInvisible()
void test_5_convertWhitespaceToSpaceAndRemoveSpecialChars()
Test for convertWhitespaceToSpaceAndRemoveSpecialChars()
void test_6a_dehyphen()
Test for dehyphen()
void test_10_compressDoctoredBuffer()
Test for compressDoctoredBuffer()
void test_7_removePunctuation()
Test for removePunctuation()
void test_doctorBuffer_uncollapse()
Test for uncollapsePosition()
void test_3_cleanUpPostscript()
Test for cleanUpPostscript()
static void report_Match(char *buf)
Helper function to match licenses and highlight info.
void test_1_removeHtmlComments()
Test for removeHtmlComments()
void test_2_removeLineComments()
Test for removeLineComments()
void test_6_dehyphen()
Test for dehyphen()
void test_doctorBuffer_fromFile()
Test for idxGrep_recordPosition()
void test_doctorBuffer()
Test for doctorBuffer()
void test_8_ignoreFunctionCalls()
Test for ignoreFunctionCalls()
void test_4_removeBackslashesAndGTroffIndicators()
Test for removeBackslashesAndGTroffIndicators()