28 #include "nomos_utils.h" 
   31 #include "nomos_regex.h" 
   33 #include <_autodefs.h> 
   35 #define HASHES    "#####################" 
   36 #define DEBCPYRIGHT "debian/copyright" 
   41 static void licenseStringChecks();
 
   42 static void findLines(
char *, 
char *, 
int, 
int, 
list_t *);
 
   54 extern void memStats();
 
   63 #define MAX(a, b) ((a) > (b) ? a : b)  
   64 #define MIN(a, b) ((a) < (b) ? a : b)  
   82   traceFunc(
"== licenseInit()\n");
 
   86   strcpy(some, 
"=SOME=");
 
   88   strcpy(year, 
"=YEAR=");
 
   98   for (i = 0; i < NFOOTPRINTS; i++) {
 
  101     if (licSpec[i].text.csData == 
NULL_STR) {
 
  104     if ((licSpec[i].text.csLen == 1) && (*(licSpec[i].
text.
csData) == 
'.')) {
 
  108     else if ((licSpec[i].seed.csLen == licSpec[i].
text.
csLen) && !memcmp(
 
  109         licSpec[i].seed.csData, licSpec[i].
text.
csData, len)) {
 
  119     fixSearchString(buf, 
sizeof(buf), i, 
YES);
 
  130       LOG_FATAL(
"Cannot enqueue search-cache item \"%s\"", 
licText[i].tseed)
 
  140     if (strcmp(
licText[i].tseed, 
"=NULL=") == 0) { 
 
  162       memcpy(buf, licSpec[i].text.csData, (
size_t)(len + 1));
 
  174       fixSearchString(buf, 
sizeof(buf), i, 
NO);
 
  178     if (p->ssComp < (ssAbove * 100) + ssBelow) {
 
  179       p->ssComp = (ssAbove * 100) + ssBelow;
 
  191   for (i = 0; i < NFOOTPRINTS; i++) {
 
  194       LOG_NOTICE(
"License[%d] configured with NULL seed", i)
 
  200       LOG_NOTICE(
"License[%d] seed == regex", i)
 
  204     licText[i].nAbove = p->ssComp / 100;
 
  205     licText[i].nBelow = p->ssComp % 100;
 
  214   for (i = 0; i < NFOOTPRINTS; i++) {
 
  238     if (i >= _CR_first && i <= _CR_last) {
 
  245 #define LINE_BYTES  50   
  286   traceFunc(
"== searchStrategy(%d(%s), \"%s\", %d)\n", index,
 
  287       _SEED(index), regex, aboveCalc);
 
  293     LOG_NOTICE(
"Lic[%d] has NULL seed", index)
 
  297   if (regex == 
NULL_STR || strlen(regex) == 0) {
 
  299     Assert(
NO, 
"searchStrategy(%d) called with NULL data", index);
 
  303   if (strcmp(
s, regex) == 0) {
 
  306   bytes = words = lines = 0;
 
  307   (void) strcpy(seed, 
s);
 
  308   while (seed[strlen(seed) - 1] == 
' ') {
 
  313     if (
strGrep(seed, regex, REG_ICASE) == 0) {
 
  315       printf(
"DEBUG: seed(%d) no hit in regex!\n", index);
 
  321     for (minLines = 0; cp != NULL; 
start = cp + 1) {
 
  322       matchWild = matchSeed = 0;
 
  326       matchWild = (strcmp(
start, any) == 0 || strcmp(
start, some) == 0
 
  327           || strcmp(
start, few));
 
  328       matchSeed = strcmp(
start, seed) == 0;
 
  331         words += (matchWild ? 
WC_WORDS : 1);
 
  344     printf(
"ABOVE: .... bytes=%d, words=%d; max(%d,%d)+%d == %d\n",
 
  348     return (words == 0 ? 0 : lines);
 
  352     matchWild = matchSeed = 0;
 
  356     matchWild = (strcmp(
start, any) == 0 || strcmp(
start, some) == 0
 
  357         || strcmp(
start, few));
 
  358     matchSeed = strcmp(
start, seed) == 0;
 
  365       words += (matchWild ? 
WC_WORDS : 1);
 
  373   printf(
"BELOW: .... bytes=%d, words=%d; max(%d,%d)+%d == %d\n",
 
  380 static void fixSearchString(
char *
s, 
int size, 
int i, 
int wildcardBad)
 
  387   traceFunc(
"== fixSearchString(\"%s\", %d, %d, %d)\n", 
s, size, i,
 
  404   while (isspace(*cp)) {
 
  407   if (strncmp(cp, any, 
sizeof(any)-1) == 0 ||
 
  408       strncmp(cp, some, 
sizeof(some)-1) == 0 ||
 
  409       strncmp(cp, few, 
sizeof(few)-1) == 0) {
 
  410     printf(
"string %d == \"%s\"\n", i, cp);
 
  411     LOG_FATAL(
"Text-spec %d begins with a wild-card", i)
 
  418   (void) sprintf(wildCard, 
" %s", any);
 
  419   len = strlen(wildCard);
 
  420   for (cp = 
s; 
strGrep(wildCard, cp, 0); ) {
 
  422       LOG_FATAL(
"OOPS, regex %d, wild-card not allowed here", i)
 
  426       LOG_FATAL(
"String %d ends in a wild-card", i)
 
  429     else if (*(cp+cur.regm.rm_eo) == 
' ') {
 
  431       printf(
"BEFORE(any): %s\n", 
s);
 
  433       cp += cur.regm.rm_so;
 
  436       memmove(cp, cp+len-1, strlen(cp+len)+2);
 
  438       printf(
"_AFTER(any): %s\n", 
s);
 
  442       LOG_NOTICE(
"Wild-card \"%s\" sub-string, phrase %d", wildCard, i)
 
  443                             cp += cur.regm.rm_eo;
 
  449   (void) sprintf(wildCard, 
" %s", some);
 
  450   len = strlen(wildCard);
 
  451   for (cp = 
s; 
strGrep(wildCard, cp, 0); ) {
 
  453       LOG_FATAL(
"OOPS, regex %d, wild-card not allowed here", i)
 
  457       LOG_FATAL(
"String %d ends in a wild-card", i)
 
  460     else if (*(cp+cur.regm.rm_eo) == 
' ') {
 
  462       printf(
"BEFORE(some): %s\n", 
s);
 
  464       cp += cur.regm.rm_so;
 
  472       memmove(cp, cp+len-6, strlen(cp+len)+7);
 
  474       printf(
"_AFTER(some): %s\n", 
s);
 
  478       LOG_NOTICE(
"Wild-card \"%s\" sub-string, phrase %d", wildCard, i)
 
  479                             cp += cur.regm.rm_eo;
 
  485   (void) sprintf(wildCard, 
" %s", few);
 
  486   len = strlen(wildCard);
 
  487   for (cp = 
s; 
strGrep(wildCard, cp, 0); ) {
 
  489       LOG_FATAL(
"OOPS, regex %d, wild-card not allowed here", i)
 
  493       LOG_FATAL(
"String %d ends in a wild-card", i)
 
  496     else if (*(cp+cur.regm.rm_eo) == 
' ') {
 
  498       printf(
"BEFORE(few): %s\n", 
s);
 
  500       cp += cur.regm.rm_so;
 
  508       memmove(cp, cp+len-6, strlen(cp+len)+7);
 
  510       printf(
"_AFTER(few): %s\n", 
s);
 
  514       LOG_NOTICE(
"Wild-card \"%s\" sub-string, phrase %d", wildCard, i)
 
  515                             cp += cur.regm.rm_eo;
 
  525     if (strlen(
s)+25 >= size) { 
 
  526       LOG_FATAL(
"buffer overflow, text-spec %d", i)
 
  529     cp = (
char *)(
s+cur.regm.rm_so);
 
  531     printf(
"BEFORE: %s\n", 
s);
 
  533     memmove(cp+25, cp+6, strlen(cp+len)+1); 
 
  534     memset(cp+6, 
'_', 19);
 
  536     printf(
"_MOVED: %s\n", 
s);
 
  538     *cp = *(cp+4) = *(cp+9) = *(cp+14) = *(cp+19) = 
'[';
 
  541     *(cp+5) = *(cp+10) = *(cp+15) = 
'0';
 
  542     *(cp+6) = *(cp+11) = *(cp+16) = 
'-';
 
  543     *(cp+7) = *(cp+12) = *(cp+17) = 
'9';
 
  544     *(cp+3) = *(cp+8) = *(cp+13) = *(cp+18) = *(cp+23) = 
']';
 
  550     printf(
"_AFTER: %s\n", 
s);
 
  560   if (*(p->
str) == 
'/')
 
  562     strcpy(scp->fullpath, p->
str);
 
  563     scp->nameOffset = (size_t) (cur.targetLen + 1);
 
  568     strncpy(scp->fullpath, cur.
cwd, 
sizeof(scp->fullpath)-1);
 
  569     strncat(scp->fullpath, 
"/", 
sizeof(scp->fullpath)-1);
 
  570     strncat(scp->fullpath, p->
str, 
sizeof(scp->fullpath)-1);
 
  571     scp->nameOffset = (size_t) (cur.cwdLen + 1);
 
  613     cp = createRelativePath(p, scp);
 
  616     printf(
"licenseScan: scan %s\n",
 
  617         (
char *)(scp->fullpath+scp->nameOffset));
 
  629     scp->size = cur.stbuf.st_size; 
 
  646     assert(NKEYWORDS >= 
sizeof(scp->kwbm));
 
  648     for (scp->kwbm = c = 0; c < NKEYWORDS; c++)
 
  652         scp->kwbm |= (1 << c);  
 
  655         printf(
"Keyword %d (\"%s\"): YES\n", c, 
_REGEX(c+_KW_first));
 
  661     printf(
"%s = %d\n", (
char *)(scp->fullpath+scp->nameOffset),
 
  684   if (scores->
score == 0)
 
  708   for (scp = scores, i = nCand = 0; i < nFiles; i++, scp++)
 
  710     scp->relpath = (
char *) (scp->fullpath + scp->nameOffset);
 
  714       if (
idxGrep(_FN_DEBCPYRT, scp->relpath, REG_ICASE)) {
 
  718     else if (scp->
score >= lowWater) {
 
  727       printf(
"%s [score: %d], %07o\n", scp->fullpath,
 
  728           scp->
score, scp->kwbm);
 
  754   int counts[NKEYWORDS + 1];
 
  762   traceFunc(
"== licenseScan(%p, %d)\n", l);
 
  766   printf(
"... allocating %d bytes for scanres_t[] array\n",
 
  767       sizeof(*scp)*licenseList->
used);
 
  770   scores = (
scanres_t *) memAlloc(
sizeof(*scp) * licenseList->
used, MTAG_SCANRES);
 
  771   memset((
void *) counts, 0, (
size_t) ((NKEYWORDS + 1) * 
sizeof(
int)));
 
  777   traceFunc(
"=> invoking qsort(): callback == scoreCompare()\n");
 
  780   nFilesInList = licenseList->
used;
 
  781   qsort(scores, (
size_t) nFilesInList, 
sizeof(*scp), 
scoreCompare);
 
  794   if (scores->licenses) free(scores->licenses);
 
  795   memFree((
char *) scores, 
"scores table");
 
  826     return (-strcmp(sc1->fullpath, sc2->fullpath));
 
  836   traceFunc(
"== noLicenseFound\n");
 
  839   (void) strcpy(cur.
compLic, LS_NOSUM);
 
  857     printf(
" Highlighting Info at");
 
  859     for (currentKeyw=0; currentKeyw < keyWords->len; ++currentKeyw ) {
 
  861       printf(
" Keyword at %i, length %i, index = 0,",  ourMatchv->
start, ourMatchv->
end - ourMatchv->
start );
 
  864     for (currentLicence = 0; currentLicence < theMatches->len; ++currentLicence)
 
  872         printf(
" License #%s# at %i, length %i, index = %i,", theLicence->
licenceName , ourMatchv->
start, ourMatchv->
end - ourMatchv->
start,    ourMatchv->
index  );
 
  888   char miscbuf[myBUFSIZ];
 
  893   (void) strcpy(miscbuf, 
"Matches: ");
 
  895   for (base = c = 0; c < NKEYWORDS; c++)
 
  897     if (scores[idx].kwbm & (1 << c))
 
  901         miscbuf[offset++] = 
',';
 
  902         miscbuf[offset++] = 
' ';
 
  904       offset += sprintf(miscbuf + offset, 
"%s", 
_REGEX(c + _KW_first));
 
  908   printf(
"%s\n", miscbuf);
 
  941     g_array_free(cur.docBufferPositionsAndOffsets, TRUE);
 
  942     cur.docBufferPositionsAndOffsets = g_array_new(FALSE, FALSE, 
sizeof(
pairPosOff));
 
  945     for (cur.currentLicenceIndex = 0; cur.currentLicenceIndex < cur.
theMatches->len; ++cur.currentLicenceIndex)
 
  954       for (myIndex = 0; myIndex < currentLicence->
indexList->len; ++myIndex)
 
  956         int currentIndex = g_array_index(currentLicence->
indexList, 
int, myIndex);
 
  957         if (currentIndex == lastindex) 
continue;
 
  959         lastindex = currentIndex;
 
  988   int highScore = scores->
score;
 
  989   int isFileMarkupLanguage = 0;
 
  996   char realPathOfTarget[PATH_MAX];
 
  999   traceFunc(
"== saveLicenseData(%p, %d, %d, %d, %d)\n", scores, nCand,
 
 1008   printf(
"saveLicenseData: %d candidates\n", nCand);
 
 1021   for (idx = 0; i <= nCand; idx++) {
 
 1025     if (scores[idx].flag == 0) {
 
 1028     (void) sprintf(scores[idx].linkname, 
"Link%03d.txt", i++);
 
 1030     printf(
"name: %s\n[%s]\n", scores[idx].relpath, scores[idx].fullpath);
 
 1038     fileName = scores[idx].fullpath;
 
 1040       printf(
"File name: %s\n", fileName);
 
 1049     size = scores[idx].size;
 
 1050     if (scores[idx].dataOffset) {
 
 1051       textp += scores[idx].dataOffset;
 
 1066       printf(
"File score: %d (0x%06x)\n",
 
 1067           (scores[idx].kwbm ? scores[idx].score : scores[idx].kwbm),
 
 1069       if (scores[idx].kwbm) {
 
 1088 #if defined(DEBUG) || defined(DOCTOR_DEBUG) || defined(LTSR_DEBUG)  \ 
 1089     || defined(BATCH_DEBUG) || defined(PARSE_STOPWATCH) || defined(MEMSTATS) \ 
 1090     || defined(MEM_DEBUG) || defined(UNKNOWN_CHECK_DEBUG) 
 1091     printf(
"*** PROCESS File: %s\n", scores[idx].relpath);
 
 1092     printf(
"... %d bytes, score %d\n", scores[idx].size, scores[idx].score);
 
 1095     isFileMarkupLanguage = 
idxGrep(_UTIL_MARKUP, textp, REG_ICASE | REG_EXTENDED);
 
 1098     printf(
"idxGrep(ML) returns %d\n", isFileMarkupLanguage);
 
 1099     if (isFileMarkupLanguage)
 
 1102       printf(
"isMarkUp@%d: [", cur.regm.rm_so);
 
 1103       for (n = cur.regm.rm_so; n <= cur.regm.rm_eo; n++) {
 
 1104         printf(
"%c", *(textp+n));
 
 1116     printf(
"idxGrep(PS) returns %d\n", isPS);
 
 1119       printf(
"isPostScript@%d: [", cur.regm.rm_so);
 
 1127     fileName = 
parseLicenses(textp, size, &scores[idx], isFileMarkupLanguage, isPS);
 
 1128     scores[idx].licenses = 
copyString(fileName, MTAG_FILELIC);
 
 1131       Assert(
NO, 
"Expected non-null parseLicenses return!");
 
 1133     if (scores[idx].licenses == 
NULL_STR) {
 
 1134       Assert(
NO, 
"Expected non-null license summary!");
 
 1140 #ifdef  FLAG_NO_COPYRIGHT 
 1141     if (gl.
flags & FL_NOCOPYRIGHT) {
 
 1142       p = 
listGetItem(&cur.nocpyrtList, scores[idx].relpath);
 
 1144       p->num = scores[idx].
score;
 
 1148       memFree(cur.licPara, MTAG_TEXTPARA); 
 
 1164     p = 
listGetItem(&cur.lList, scores[idx].licenses);
 
 1181   listSort(&cur.lList, SORT_BY_COUNT_DSC);
 
 1184   if (cur.lList.
used == 0) {
 
 1185     Assert(
NO, 
"No entries in license-list");
 
 1194   if (cur.parseList.
used == 0) {
 
 1223           printf(
"File %s contains license(s) %s", realPathOfTarget, cur.
compLic);
 
 1227           printf(
"File %s contains license(s) %s", basename(cur.
targetFile), cur.
compLic);
 
 1272   traceFunc(
"== makeLicenseSummary(%p, %d, %p, %d)\n", l, highScore,
 
 1277     (void) strcpy(target, LS_NOSUM);
 
 1296     if (goodStuff && (p->iLevel <= IL_LOW)) { 
 
 1300       target[len++] = 
',';
 
 1303     new = sprintf(target + len, 
"%s", p->
str);
 
 1304     if ((len += 
new) > size) {
 
 1305       LOG_FATAL(
"Buffer-overwrite, marginal license components")
 
 1313 #ifdef  LICENSE_DEBUG 
 1319   traceFunc(
"== dumpLicenses()\n");
 
 1322   for (i = 0; i < NFOOTPRINTS; i++) {
 
 1323     printf(
"License[%d]: seedlen=%d, regexlen=%d\n", i,
 
 1324         licSpec[i].seed.csLen, licSpec[i].text.csLen);
 
 1326   printf(
"[NFOOTPRINTS = %d\n", NFOOTPRINTS);
 
void doctorBuffer(char *buf, int isML, int isPS, int isCR)
Convert a buffer of multiple stuff to text-only, separated by spaces.
 
int s
The socket that the CLI will use to communicate.
 
void writeJson()
Write the scan output as a JSON.
 
static void printHighlightInfo(GArray *keyWords, GArray *theMatches)
Print highlight info about matches.
 
static gint compare_integer(gconstpointer a, gconstpointer b)
Compare two integers.
 
static void saveLicenseData(scanres_t *, int, int, int)
Save/creates all the license-data in a specific directory temp directory?
 
int fiterResultsOfKeywordScan(int lowWater, scanres_t *scores, int nFiles)
Run through the list once more.
 
void scanForKeywordsAndSetScore(scanres_t *scores, list_t *licenseList)
 
static void printKeyWordMatches(scanres_t *scores, int idx)
Prints keywords match to STDOUT.
 
#define MIN(a, b)
Min of two.
 
static int searchStrategy(int, char *, int)
 
void licenseScan(list_t *licenseList)
scan the list for a license(s)
 
void licenseInit()
license initialization
 
static void noLicenseFound()
Mark curent scan as LS_NOSUM (No_license_found)
 
void relaxScoreCriterionForSingleFile(scanres_t *scores)
Reset scores to 1 if it is 0.
 
static void makeLicenseSummary(list_t *, int, char *, int)
Construct a 'computed license'. Wherever possible, leave off the entries for None and LikelyNot; thos...
 
static void rescanOriginalTextForFoundLicences(char *textp, int isFileMarkupLanguage, int isPS)
Rescan original content for the licenses already found.
 
static int scoreCompare(const void *, const void *)
Compare two scores.
 
#define MAX(a, b)
Max of two.
 
void listDump(list_t *l, int verbose)
print the passed in list
 
item_t * listGetItem(list_t *l, char *s)
get an item from the itemlist. If the item is not in the itemlist, then add it to the itemlist.
 
void listInit(list_t *l, int size, char *label)
intialize a list, if the list is not empty, empty it (initialize it to zero's).
 
item_t * listIterate(list_t *l)
return a pointer to listitem, returns a NULL_ITEM when no more items to return.
 
void listSort(list_t *l, int sortType)
Sort the list as per the sortType passed.
 
void listClear(list_t *l, int deallocFlag)
Destroy list_t.
 
void munmapFile(void *ptr)
 
char * pathBasename(char *path)
Get the basename from a file path.
 
char * copyString(char *s, char *label)
Create a copy of a string.
 
void Assert(int fatalFlag, const char *fmt,...)
Raise an assert.
 
char * wordCount(char *textp)
VERY simple line count, does NOT have to be perfect!
 
char * mmapFile(char *pathname)
Blarg. Files that are EXACTLY a multiple of the system pagesize do not get a NULL on the end of the b...
 
#define NULL_ITEM
NULL item.
 
#define NULL_STR
NULL string.
 
void Bail(int exitval)
Close connections and exit.
 
#define NULL_CHAR
NULL character.
 
int optionIsSet(int val)
Check if an CLI option is set.
 
int idxGrep_recordPosition(int index, char *data, int flags)
compile a regex, perform the search and record findings
 
int idxGrep(int index, char *data, int flags)
compile a regex, and perform the search (on data?)
 
int strGrep(char *regex, char *data, int flags)
General-purpose grep function, used for one-time-only searches.
 
int idxGrep_recordPositionDoctored(int index, char *data, int flags)
compile a regex, perform the search and record findings
 
FUNCTION MatchPositionAndType * getMatchfromHighlightInfo(GArray *in, int index)
Get the MatchPositionAndType for a given index in highlight array.
 
FUNCTION LicenceAndMatchPositions * getLicenceAndMatchPositions(GArray *in, int index)
Get the LicenceAndMatchPositions for a given index in match array.
 
char * parseLicenses(char *filetext, int size, scanres_t *scp, int isML, int isPS)
Parse a file to check all the possible licenses and add them to matches.
 
start($application)
start the application Assumes application is restartable via /etc/init.d/<script>....
 
GArray * matchPositions
Match positions.
 
GArray * indexList
License indexes.
 
char * licenceName
License names.
 
int start
Start position of match.
 
int index
Enums from index (Entrynumber) in STRINGS.in.
 
int end
End position of match.
 
GArray * keywordPositions
 
char targetFile[myBUFSIZ]
 
searchString_t text
License text.
 
searchString_t seed
License seed.
 
char * regex
License regex.
 
char * tseed
unencrypted license text
 
list_t type structure used to keep various lists. (e.g. there are multiple lists).
 
tricky data structure used for a list of 'items'
 
int score
License match score.
 
char * csData
String data.