14 #include "doctorBuffer_utils.h"
17 #include <CUnit/CUnit.h>
20 #include "nomos_utils.h"
27 #include "nomos_regex.h"
28 #include "_autodefs.h"
42 fer = g_strdup_printf(
"//Th- is is a li-\n// cence of the test string");
43 buf = g_strdup_printf(
"This is a li cence of the test string");
50 CU_ASSERT_STRING_EQUAL(buf, fer);
68 fer = g_strdup_printf(
"//This is the test string");
73 printf(
"Before %d, after %d", (
int) strlen(cfer), (
int) strlen(fer));
75 for (
size_t i = 0; i < strlen(fer); i++)
77 CU_ASSERT_EQUAL(*(fer + i), *(cfer + uncollapsePosition(i, cur.docBufferPositionsAndOffsets)));
89 printf(
"I have %i matches \n", cur.
theMatches->len);
90 for (guint i = 0; i < cur.
theMatches->len; ++i)
96 printf(
"Match from %d to %d: ", PaT->
start, PaT->
end);
98 for (
int j = PaT->
start; j < PaT->end; ++j)
100 printf(
"%c", *(buf + j));
120 char *buf, *undoc, *filename;
121 filename = buf = (
char*)malloc(3000);
122 sprintf(filename,
"%s/NomosTestfiles/WXwindows/WXwindows.txt", TESTDATADIR);
123 int f = open(filename, O_RDONLY);
124 int whatIread = read(f, buf, 3000);
127 CU_ASSERT_EQUAL(whatIread, 2496);
128 undoc = g_strdup(buf);
130 printf(
"\n%s\n", undoc);
132 int licence_index = _PHR_WXWINDOWS;
133 int licence_index2 = _LT_LGPLref1;
136 cur.currentLicenceIndex=0;
137 g_array_append_val(cur.
indexList, licence_index);
138 g_array_append_val(cur.
indexList, licence_index2);
146 cur.currentLicenceIndex=0;
147 g_array_append_val(cur.
indexList, licence_index);
148 g_array_append_val(cur.
indexList, licence_index2);
152 printf(
"\n%s\n", buf);
159 cur.currentLicenceIndex=0;
160 g_array_append_val(cur.
indexList, licence_index);
161 g_array_append_val(cur.
indexList, licence_index2);
192 char* textBuffer = g_strdup_printf(
"" the big\t(C) and long\\n "\\s-1234,"
193 " test © string \n con-\n// tains losts; of . <string test> "
194 " <body> \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
195 "mimi-cs printf(\"Licence\"); and so on\n " \n ");
197 char* te22Buffer = g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234,"
198 " test © string \n con-\n// tains losts; of . <string test nbsp "
199 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
200 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
201 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
215 char* textBuffer = g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234,"
216 " test © string \n con-\n// tains losts; of . <string test nbsp "
217 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
218 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
222 char* te22Buffer = g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234,"
223 " test © string \n con-\n\377\377 tains losts; of . <string test nbsp"
224 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
225 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
226 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
239 char* textBuffer=g_strdup_printf(
"(8) (89) -9.- A %%!PS-Adobe-3.0 (12) EPSF-3.0 --8. -9.- A");
243 char* te22Buffer = g_strdup_printf(
" %%!PS-Adobe-3.0 (12) EPSF-3.0 ");
244 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
260 char* textBuffer= g_strdup_printf(
" quot the big\t(C) and long\\n quot\\s-1234, test"
261 " © string \n con-\n\377\377 tains losts; of . <string test nbsp "
262 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
263 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
267 char* te22Buffer = g_strdup_printf(
" quot the big\t(C) and long quot , test"
268 " © string \n con-\n\377\377 tains losts; of . <string test nbsp "
269 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
270 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
271 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
292 textBuffer = g_strdup_printf(
" quot the big\t(C) and long quot , test"
293 " © string \n con-\n\377\377 tains losts; of . <string test nbsp "
294 " body \" compli-\n cated /* COMMENT s and funny */ Words as it \n "
295 "mimi-cs printf(\"Licence\"); and so on\n quot \n ");
299 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot test"
300 " © string con- \377\377 tains losts of \377 test nbsp "
301 " body compli- cated / COMMENT s and funny / Words as it "
302 "mimi-cs printf Licence and so on quot ");
303 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
327 fer= g_strdup_printf(
"This- is the-test str- ing");
328 buf= g_strdup_printf(
"This\377\377is the-test str\377\377ing");
330 CU_ASSERT_STRING_EQUAL(buf, fer);
344 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , "
345 "test © string con- \377\377 tains losts of \377 test"
346 " nbsp body compli- cated / COMMENT s and funny / Words as it "
347 "mimi-cs printf Licence and so on quot ");
350 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test © "
351 "string con- \377\377 tains losts of \377 test nbsp body "
352 "compli\377\377\377cated / COMMENT s and funny / Words as it "
353 "mimi-cs printf Licence and so on quot ");
354 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
369 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , "
370 "test © string con- \377\377 tains losts of \377 test "
371 "nbsp body compli\377\377\377cated / COMMENT s and funny / Words as it "
372 "mimi-cs printf Licence and so on quot ");
376 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test "
377 "© string con \377\377 tains losts of \377 test nbsp body "
378 "compli\377\377\377cated COMMENT s and funny Words as it "
379 "mimi-cs printf Licence and so on quot ");
380 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
398 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , test © "
399 "string con \377\377 tains losts of \377 test nbsp body "
400 "compli\377\377\377cated COMMENT s and funny Words as it "
401 "mimi-cs printf Licence and so on quot ");
404 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test © "
405 "string con \377\377 tains losts of \377 test nbsp body "
406 "compli\377\377\377cated COMMENT s and funny Words as it "
407 "mimi-cs Licence and so on quot ");
408 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
423 char* textBuffer= g_strdup_printf(
" quot the big (C) and long quot , test © "
424 "string con \377\377 tains losts of \377 test nbsp body "
425 "compli\377\377\377cated COMMENT s and funny Words as it "
426 "mimi-cs Licence and so on quot ");
429 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long \377\377\377quot \377\377\377\377\377\377, "
430 "test © \377string \377\377con \377\377\377\377tains losts \377of \377\377\377\377\377\377\377\377\377\377"
431 "test \377\377nbsp \377\377\377body \377\377\377compli\377\377\377cated \377\377\377COMMENT s "
432 "and funny \377\377\377Words as it \377\377mimi-cs \377\377\377\377\377\377\377\377Licence \377\377\377"
433 "and so on \377\377quot \377\377");
435 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
448 char* textBuffer= g_strdup_printf(
" quot the big (C) and long \377\377\377quot \377\377\377\377\377\377, "
449 "test © \377string \377\377con \377\377\377\377tains losts \377of \377\377\377\377\377\377\377\377\377\377"
450 "test \377\377nbsp \377\377\377body \377\377\377compli\377\377\377cated \377\377\377COMMENT s "
451 "and funny \377\377\377Words as it \377\377mimi-cs \377\377\377\377\377\377\377\377Licence \377\377\377"
452 "and so on \377\377quot \377\377");
456 char* te22Buffer = g_strdup_printf(
" quot the big (C) and long quot , test © string "
457 "con tains losts of test nbsp body complicated COMMENT s and funny Words as "
458 "it mimi-cs Licence and so on quot ");
460 CU_ASSERT_STRING_EQUAL(te22Buffer, textBuffer);
465 CU_TestInfo doctorBuffer_testcases[] =
void removeBackslashesAndGTroffIndicators(char *buf)
Remove groff/troff font-size indicators, the literal string backslash-n and all backslahes,...
void doctorBuffer(char *buf, int isML, int isPS, int isCR)
Convert a buffer of multiple stuff to text-only, separated by spaces.
int compressDoctoredBuffer(char *textBuffer)
garbage collect: eliminate all INVISIBLE characters in the buffer
void convertSpaceToInvisible(char *buf)
void removePunctuation(char *buf)
Clean up miscellaneous punctuation.
void removeLineComments(char *buf)
Remove comments that start at the beginning of a line.
void cleanUpPostscript(char *buf)
Remove newlines from buffer.
void convertWhitespaceToSpaceAndRemoveSpecialChars(char *buf, int isCR)
Convert white-space to real spaces, and remove unnecessary punctuation.
void ignoreFunctionCalls(char *buf)
Ignore function calls to print routines.
void removeHtmlComments(char *buf)
Remove HTML comments from buffer without removing comment text.
void licenseInit()
license initialization
int idxGrep_recordPosition(int index, char *data, int flags)
compile a regex, perform the search and record findings
int idxGrep_recordPositionDoctored(int index, char *data, int flags)
compile a regex, perform the search and record findings
FUNCTION void freeAndClearScan(struct curScan *thisScan)
Clean-up all the per scan data structures, freeing any old data.
FUNCTION void initializeCurScan(struct curScan *cur)
Initialize the scanner.
FUNCTION void addLicence(GArray *theMatches, char *licenceName)
Add a license to the matches array.
FUNCTION MatchPositionAndType * getMatchfromHighlightInfo(GArray *in, int index)
Get the MatchPositionAndType for a given index in highlight array.
FUNCTION LicenceAndMatchPositions * getLicenceAndMatchPositions(GArray *in, int index)
Get the LicenceAndMatchPositions for a given index in match array.
GArray * matchPositions
Match positions.
int start
Start position of match.
int end
End position of match.
void test_9_convertSpaceToInvisible()
Test for convertSpaceToInvisible()
void test_5_convertWhitespaceToSpaceAndRemoveSpecialChars()
Test for convertWhitespaceToSpaceAndRemoveSpecialChars()
void test_6a_dehyphen()
Test for dehyphen()
void test_10_compressDoctoredBuffer()
Test for compressDoctoredBuffer()
void test_7_removePunctuation()
Test for removePunctuation()
void test_doctorBuffer_uncollapse()
Test for uncollapsePosition()
void test_3_cleanUpPostscript()
Test for cleanUpPostscript()
static void report_Match(char *buf)
Helper function to match licenses and highlight info.
void test_1_removeHtmlComments()
Test for removeHtmlComments()
void test_2_removeLineComments()
Test for removeLineComments()
void test_6_dehyphen()
Test for dehyphen()
void test_doctorBuffer_fromFile()
Test for idxGrep_recordPosition()
void test_doctorBuffer()
Test for doctorBuffer()
void test_8_ignoreFunctionCalls()
Test for ignoreFunctionCalls()
void test_4_removeBackslashesAndGTroffIndicators()
Test for removeBackslashesAndGTroffIndicators()