28 #include "nomos_utils.h"
31 #include "nomos_regex.h"
33 #include <_autodefs.h>
35 #define HASHES "#####################"
36 #define DEBCPYRIGHT "debian/copyright"
41 static void licenseStringChecks();
42 static void findLines(
char *,
char *,
int,
int,
list_t *);
54 extern void memStats();
63 #define MAX(a, b) ((a) > (b) ? a : b)
64 #define MIN(a, b) ((a) < (b) ? a : b)
82 traceFunc(
"== licenseInit()\n");
86 strcpy(some,
"=SOME=");
88 strcpy(year,
"=YEAR=");
98 for (i = 0; i < NFOOTPRINTS; i++) {
101 if (licSpec[i].text.csData ==
NULL_STR) {
104 if ((licSpec[i].text.csLen == 1) && (*(licSpec[i].
text.
csData) ==
'.')) {
108 else if ((licSpec[i].seed.csLen == licSpec[i].
text.
csLen) && !memcmp(
109 licSpec[i].seed.csData, licSpec[i].
text.
csData, len)) {
119 fixSearchString(buf,
sizeof(buf), i,
YES);
130 LOG_FATAL(
"Cannot enqueue search-cache item \"%s\"",
licText[i].tseed)
140 if (strcmp(
licText[i].tseed,
"=NULL=") == 0) {
162 memcpy(buf, licSpec[i].text.csData, (
size_t)(len + 1));
174 fixSearchString(buf,
sizeof(buf), i,
NO);
178 if (p->ssComp < (ssAbove * 100) + ssBelow) {
179 p->ssComp = (ssAbove * 100) + ssBelow;
191 for (i = 0; i < NFOOTPRINTS; i++) {
194 LOG_NOTICE(
"License[%d] configured with NULL seed", i)
200 LOG_NOTICE(
"License[%d] seed == regex", i)
204 licText[i].nAbove = p->ssComp / 100;
205 licText[i].nBelow = p->ssComp % 100;
214 for (i = 0; i < NFOOTPRINTS; i++) {
238 if (i >= _CR_first && i <= _CR_last) {
245 #define LINE_BYTES 50
286 traceFunc(
"== searchStrategy(%d(%s), \"%s\", %d)\n", index,
287 _SEED(index), regex, aboveCalc);
293 LOG_NOTICE(
"Lic[%d] has NULL seed", index)
297 if (regex ==
NULL_STR || strlen(regex) == 0) {
299 Assert(
NO,
"searchStrategy(%d) called with NULL data", index);
303 if (strcmp(
s, regex) == 0) {
306 bytes = words = lines = 0;
307 (void) strcpy(seed,
s);
308 while (seed[strlen(seed) - 1] ==
' ') {
313 if (
strGrep(seed, regex, REG_ICASE) == 0) {
315 printf(
"DEBUG: seed(%d) no hit in regex!\n", index);
321 for (minLines = 0; cp != NULL;
start = cp + 1) {
322 matchWild = matchSeed = 0;
326 matchWild = (strcmp(
start, any) == 0 || strcmp(
start, some) == 0
327 || strcmp(
start, few));
328 matchSeed = strcmp(
start, seed) == 0;
331 words += (matchWild ?
WC_WORDS : 1);
344 printf(
"ABOVE: .... bytes=%d, words=%d; max(%d,%d)+%d == %d\n",
348 return (words == 0 ? 0 : lines);
352 matchWild = matchSeed = 0;
356 matchWild = (strcmp(
start, any) == 0 || strcmp(
start, some) == 0
357 || strcmp(
start, few));
358 matchSeed = strcmp(
start, seed) == 0;
365 words += (matchWild ?
WC_WORDS : 1);
373 printf(
"BELOW: .... bytes=%d, words=%d; max(%d,%d)+%d == %d\n",
380 static void fixSearchString(
char *
s,
int size,
int i,
int wildcardBad)
387 traceFunc(
"== fixSearchString(\"%s\", %d, %d, %d)\n",
s, size, i,
404 while (isspace(*cp)) {
407 if (strncmp(cp, any,
sizeof(any)-1) == 0 ||
408 strncmp(cp, some,
sizeof(some)-1) == 0 ||
409 strncmp(cp, few,
sizeof(few)-1) == 0) {
410 printf(
"string %d == \"%s\"\n", i, cp);
411 LOG_FATAL(
"Text-spec %d begins with a wild-card", i)
418 (void) sprintf(wildCard,
" %s", any);
419 len = strlen(wildCard);
420 for (cp =
s;
strGrep(wildCard, cp, 0); ) {
422 LOG_FATAL(
"OOPS, regex %d, wild-card not allowed here", i)
426 LOG_FATAL(
"String %d ends in a wild-card", i)
429 else if (*(cp+cur.regm.rm_eo) ==
' ') {
431 printf(
"BEFORE(any): %s\n",
s);
433 cp += cur.regm.rm_so;
436 memmove(cp, cp+len-1, strlen(cp+len)+2);
438 printf(
"_AFTER(any): %s\n",
s);
442 LOG_NOTICE(
"Wild-card \"%s\" sub-string, phrase %d", wildCard, i)
443 cp += cur.regm.rm_eo;
449 (void) sprintf(wildCard,
" %s", some);
450 len = strlen(wildCard);
451 for (cp =
s;
strGrep(wildCard, cp, 0); ) {
453 LOG_FATAL(
"OOPS, regex %d, wild-card not allowed here", i)
457 LOG_FATAL(
"String %d ends in a wild-card", i)
460 else if (*(cp+cur.regm.rm_eo) ==
' ') {
462 printf(
"BEFORE(some): %s\n",
s);
464 cp += cur.regm.rm_so;
472 memmove(cp, cp+len-6, strlen(cp+len)+7);
474 printf(
"_AFTER(some): %s\n",
s);
478 LOG_NOTICE(
"Wild-card \"%s\" sub-string, phrase %d", wildCard, i)
479 cp += cur.regm.rm_eo;
485 (void) sprintf(wildCard,
" %s", few);
486 len = strlen(wildCard);
487 for (cp =
s;
strGrep(wildCard, cp, 0); ) {
489 LOG_FATAL(
"OOPS, regex %d, wild-card not allowed here", i)
493 LOG_FATAL(
"String %d ends in a wild-card", i)
496 else if (*(cp+cur.regm.rm_eo) ==
' ') {
498 printf(
"BEFORE(few): %s\n",
s);
500 cp += cur.regm.rm_so;
508 memmove(cp, cp+len-6, strlen(cp+len)+7);
510 printf(
"_AFTER(few): %s\n",
s);
514 LOG_NOTICE(
"Wild-card \"%s\" sub-string, phrase %d", wildCard, i)
515 cp += cur.regm.rm_eo;
525 if (strlen(
s)+25 >= size) {
526 LOG_FATAL(
"buffer overflow, text-spec %d", i)
529 cp = (
char *)(
s+cur.regm.rm_so);
531 printf(
"BEFORE: %s\n",
s);
533 memmove(cp+25, cp+6, strlen(cp+len)+1);
534 memset(cp+6,
'_', 19);
536 printf(
"_MOVED: %s\n",
s);
538 *cp = *(cp+4) = *(cp+9) = *(cp+14) = *(cp+19) =
'[';
541 *(cp+5) = *(cp+10) = *(cp+15) =
'0';
542 *(cp+6) = *(cp+11) = *(cp+16) =
'-';
543 *(cp+7) = *(cp+12) = *(cp+17) =
'9';
544 *(cp+3) = *(cp+8) = *(cp+13) = *(cp+18) = *(cp+23) =
']';
550 printf(
"_AFTER: %s\n",
s);
560 if (*(p->
str) ==
'/')
562 strcpy(scp->fullpath, p->
str);
563 scp->nameOffset = (size_t) (cur.targetLen + 1);
568 strncpy(scp->fullpath, cur.
cwd,
sizeof(scp->fullpath)-1);
569 strncat(scp->fullpath,
"/",
sizeof(scp->fullpath)-1);
570 strncat(scp->fullpath, p->
str,
sizeof(scp->fullpath)-1);
571 scp->nameOffset = (size_t) (cur.cwdLen + 1);
613 cp = createRelativePath(p, scp);
616 printf(
"licenseScan: scan %s\n",
617 (
char *)(scp->fullpath+scp->nameOffset));
629 scp->size = cur.stbuf.st_size;
646 assert(NKEYWORDS >=
sizeof(scp->kwbm));
648 for (scp->kwbm = c = 0; c < NKEYWORDS; c++)
652 scp->kwbm |= (1 << c);
655 printf(
"Keyword %d (\"%s\"): YES\n", c,
_REGEX(c+_KW_first));
661 printf(
"%s = %d\n", (
char *)(scp->fullpath+scp->nameOffset),
684 if (scores->
score == 0)
708 for (scp = scores, i = nCand = 0; i < nFiles; i++, scp++)
710 scp->relpath = (
char *) (scp->fullpath + scp->nameOffset);
714 if (
idxGrep(_FN_DEBCPYRT, scp->relpath, REG_ICASE)) {
718 else if (scp->
score >= lowWater) {
727 printf(
"%s [score: %d], %07o\n", scp->fullpath,
728 scp->
score, scp->kwbm);
754 int counts[NKEYWORDS + 1];
762 traceFunc(
"== licenseScan(%p, %d)\n", l);
766 printf(
"... allocating %d bytes for scanres_t[] array\n",
767 sizeof(*scp)*licenseList->
used);
770 scores = (
scanres_t *) memAlloc(
sizeof(*scp) * licenseList->
used, MTAG_SCANRES);
771 memset((
void *) counts, 0, (
size_t) ((NKEYWORDS + 1) *
sizeof(
int)));
777 traceFunc(
"=> invoking qsort(): callback == scoreCompare()\n");
780 nFilesInList = licenseList->
used;
781 qsort(scores, (
size_t) nFilesInList,
sizeof(*scp),
scoreCompare);
794 if (scores->licenses) free(scores->licenses);
795 memFree((
char *) scores,
"scores table");
826 return (-strcmp(sc1->fullpath, sc2->fullpath));
836 traceFunc(
"== noLicenseFound\n");
839 (void) strcpy(cur.
compLic, LS_NOSUM);
857 printf(
" Highlighting Info at");
859 for (currentKeyw=0; currentKeyw < keyWords->len; ++currentKeyw ) {
861 printf(
" Keyword at %i, length %i, index = 0,", ourMatchv->
start, ourMatchv->
end - ourMatchv->
start );
864 for (currentLicence = 0; currentLicence < theMatches->len; ++currentLicence)
872 printf(
" License #%s# at %i, length %i, index = %i,", theLicence->
licenceName , ourMatchv->
start, ourMatchv->
end - ourMatchv->
start, ourMatchv->
index );
888 char miscbuf[myBUFSIZ];
893 (void) strcpy(miscbuf,
"Matches: ");
895 for (base = c = 0; c < NKEYWORDS; c++)
897 if (scores[idx].kwbm & (1 << c))
901 miscbuf[offset++] =
',';
902 miscbuf[offset++] =
' ';
904 offset += sprintf(miscbuf + offset,
"%s",
_REGEX(c + _KW_first));
908 printf(
"%s\n", miscbuf);
941 g_array_free(cur.docBufferPositionsAndOffsets, TRUE);
942 cur.docBufferPositionsAndOffsets = g_array_new(FALSE, FALSE,
sizeof(
pairPosOff));
945 for (cur.currentLicenceIndex = 0; cur.currentLicenceIndex < cur.
theMatches->len; ++cur.currentLicenceIndex)
954 for (myIndex = 0; myIndex < currentLicence->
indexList->len; ++myIndex)
956 int currentIndex = g_array_index(currentLicence->
indexList,
int, myIndex);
957 if (currentIndex == lastindex)
continue;
959 lastindex = currentIndex;
988 int highScore = scores->
score;
989 int isFileMarkupLanguage = 0;
996 char realPathOfTarget[PATH_MAX];
999 traceFunc(
"== saveLicenseData(%p, %d, %d, %d, %d)\n", scores, nCand,
1008 printf(
"saveLicenseData: %d candidates\n", nCand);
1021 for (idx = 0; i <= nCand; idx++) {
1025 if (scores[idx].flag == 0) {
1028 (void) sprintf(scores[idx].linkname,
"Link%03d.txt", i++);
1030 printf(
"name: %s\n[%s]\n", scores[idx].relpath, scores[idx].fullpath);
1038 fileName = scores[idx].fullpath;
1040 printf(
"File name: %s\n", fileName);
1049 size = scores[idx].size;
1050 if (scores[idx].dataOffset) {
1051 textp += scores[idx].dataOffset;
1066 printf(
"File score: %d (0x%06x)\n",
1067 (scores[idx].kwbm ? scores[idx].score : scores[idx].kwbm),
1069 if (scores[idx].kwbm) {
1088 #if defined(DEBUG) || defined(DOCTOR_DEBUG) || defined(LTSR_DEBUG) \
1089 || defined(BATCH_DEBUG) || defined(PARSE_STOPWATCH) || defined(MEMSTATS) \
1090 || defined(MEM_DEBUG) || defined(UNKNOWN_CHECK_DEBUG)
1091 printf(
"*** PROCESS File: %s\n", scores[idx].relpath);
1092 printf(
"... %d bytes, score %d\n", scores[idx].size, scores[idx].score);
1095 isFileMarkupLanguage =
idxGrep(_UTIL_MARKUP, textp, REG_ICASE | REG_EXTENDED);
1098 printf(
"idxGrep(ML) returns %d\n", isFileMarkupLanguage);
1099 if (isFileMarkupLanguage)
1102 printf(
"isMarkUp@%d: [", cur.regm.rm_so);
1103 for (n = cur.regm.rm_so; n <= cur.regm.rm_eo; n++) {
1104 printf(
"%c", *(textp+n));
1116 printf(
"idxGrep(PS) returns %d\n", isPS);
1119 printf(
"isPostScript@%d: [", cur.regm.rm_so);
1127 fileName =
parseLicenses(textp, size, &scores[idx], isFileMarkupLanguage, isPS);
1128 scores[idx].licenses =
copyString(fileName, MTAG_FILELIC);
1131 Assert(
NO,
"Expected non-null parseLicenses return!");
1133 if (scores[idx].licenses ==
NULL_STR) {
1134 Assert(
NO,
"Expected non-null license summary!");
1140 #ifdef FLAG_NO_COPYRIGHT
1141 if (gl.
flags & FL_NOCOPYRIGHT) {
1142 p =
listGetItem(&cur.nocpyrtList, scores[idx].relpath);
1144 p->num = scores[idx].
score;
1148 memFree(cur.licPara, MTAG_TEXTPARA);
1164 p =
listGetItem(&cur.lList, scores[idx].licenses);
1181 listSort(&cur.lList, SORT_BY_COUNT_DSC);
1184 if (cur.lList.
used == 0) {
1185 Assert(
NO,
"No entries in license-list");
1194 if (cur.parseList.
used == 0) {
1223 printf(
"File %s contains license(s) %s", realPathOfTarget, cur.
compLic);
1227 printf(
"File %s contains license(s) %s", basename(cur.
targetFile), cur.
compLic);
1272 traceFunc(
"== makeLicenseSummary(%p, %d, %p, %d)\n", l, highScore,
1277 (void) strcpy(target, LS_NOSUM);
1296 if (goodStuff && (p->iLevel <= IL_LOW)) {
1300 target[len++] =
',';
1303 new = sprintf(target + len,
"%s", p->
str);
1304 if ((len +=
new) > size) {
1305 LOG_FATAL(
"Buffer-overwrite, marginal license components")
1313 #ifdef LICENSE_DEBUG
1319 traceFunc(
"== dumpLicenses()\n");
1322 for (i = 0; i < NFOOTPRINTS; i++) {
1323 printf(
"License[%d]: seedlen=%d, regexlen=%d\n", i,
1324 licSpec[i].seed.csLen, licSpec[i].text.csLen);
1326 printf(
"[NFOOTPRINTS = %d\n", NFOOTPRINTS);
void doctorBuffer(char *buf, int isML, int isPS, int isCR)
Convert a buffer of multiple stuff to text-only, separated by spaces.
int s
The socket that the CLI will use to communicate.
void writeJson()
Write the scan output as a JSON.
static void printHighlightInfo(GArray *keyWords, GArray *theMatches)
Print highlight info about matches.
static gint compare_integer(gconstpointer a, gconstpointer b)
Compare two integers.
static void saveLicenseData(scanres_t *, int, int, int)
Save/creates all the license-data in a specific directory temp directory?
int fiterResultsOfKeywordScan(int lowWater, scanres_t *scores, int nFiles)
Run through the list once more.
void scanForKeywordsAndSetScore(scanres_t *scores, list_t *licenseList)
static void printKeyWordMatches(scanres_t *scores, int idx)
Prints keywords match to STDOUT.
#define MIN(a, b)
Min of two.
static int searchStrategy(int, char *, int)
void licenseScan(list_t *licenseList)
scan the list for a license(s)
void licenseInit()
license initialization
static void noLicenseFound()
Mark curent scan as LS_NOSUM (No_license_found)
void relaxScoreCriterionForSingleFile(scanres_t *scores)
Reset scores to 1 if it is 0.
static void makeLicenseSummary(list_t *, int, char *, int)
Construct a 'computed license'. Wherever possible, leave off the entries for None and LikelyNot; thos...
static void rescanOriginalTextForFoundLicences(char *textp, int isFileMarkupLanguage, int isPS)
Rescan original content for the licenses already found.
static int scoreCompare(const void *, const void *)
Compare two scores.
#define MAX(a, b)
Max of two.
void listDump(list_t *l, int verbose)
print the passed in list
item_t * listGetItem(list_t *l, char *s)
get an item from the itemlist. If the item is not in the itemlist, then add it to the itemlist.
void listInit(list_t *l, int size, char *label)
intialize a list, if the list is not empty, empty it (initialize it to zero's).
item_t * listIterate(list_t *l)
return a pointer to listitem, returns a NULL_ITEM when no more items to return.
void listSort(list_t *l, int sortType)
Sort the list as per the sortType passed.
void listClear(list_t *l, int deallocFlag)
Destroy list_t.
void munmapFile(void *ptr)
char * pathBasename(char *path)
Get the basename from a file path.
char * copyString(char *s, char *label)
Create a copy of a string.
void Assert(int fatalFlag, const char *fmt,...)
Raise an assert.
char * wordCount(char *textp)
VERY simple line count, does NOT have to be perfect!
char * mmapFile(char *pathname)
Blarg. Files that are EXACTLY a multiple of the system pagesize do not get a NULL on the end of the b...
#define NULL_ITEM
NULL item.
#define NULL_STR
NULL string.
void Bail(int exitval)
Close connections and exit.
#define NULL_CHAR
NULL character.
int optionIsSet(int val)
Check if an CLI option is set.
int idxGrep_recordPosition(int index, char *data, int flags)
compile a regex, perform the search and record findings
int idxGrep(int index, char *data, int flags)
compile a regex, and perform the search (on data?)
int strGrep(char *regex, char *data, int flags)
General-purpose grep function, used for one-time-only searches.
int idxGrep_recordPositionDoctored(int index, char *data, int flags)
compile a regex, perform the search and record findings
FUNCTION MatchPositionAndType * getMatchfromHighlightInfo(GArray *in, int index)
Get the MatchPositionAndType for a given index in highlight array.
FUNCTION LicenceAndMatchPositions * getLicenceAndMatchPositions(GArray *in, int index)
Get the LicenceAndMatchPositions for a given index in match array.
char * parseLicenses(char *filetext, int size, scanres_t *scp, int isML, int isPS)
Parse a file to check all the possible licenses and add them to matches.
start($application)
start the application Assumes application is restartable via /etc/init.d/<script>....
GArray * matchPositions
Match positions.
GArray * indexList
License indexes.
char * licenceName
License names.
int start
Start position of match.
int index
Enums from index (Entrynumber) in STRINGS.in.
int end
End position of match.
GArray * keywordPositions
char targetFile[myBUFSIZ]
searchString_t text
License text.
searchString_t seed
License seed.
char * regex
License regex.
char * tseed
unencrypted license text
list_t type structure used to keep various lists. (e.g. there are multiple lists).
tricky data structure used for a list of 'items'
int score
License match score.
char * csData
String data.