FOSSology
4.4.0
Open Source License Compliance by Open Source Software
|
searches for licenses More...
#include <ctype.h>
#include "nomos.h"
#include "parse.h"
#include "list.h"
#include "util.h"
#include "nomos_regex.h"
#include "nomos_utils.h"
#include <_autodefs.h>
Go to the source code of this file.
Functions | |
local static functions | |
Local (static) Functions | |
int | findPhrase (int index, char *filetext, int size, int isML, int isPS, int qType) |
Check for the presence of a phrase in a file by first searching for the search key provided. More... | |
int | famOPENLDAP (char *filetext, int size, int isML, int isPS) |
Utility function to search for OpenLDAP licenses. So many different footprints are used by OpenLDAP, we had to either duplicate code in several places, or funnel it all into one function. | |
int | checkUnclassified (char *filetext, int size, int score, int isML, int isPS, int nw) |
This function is called when all the above license-checks don't turn up anything useful. Now we need to determine if the current file likely contains a license or not. More... | |
int | checkPublicDomain (char *, int, int, int, int, int) |
static int | dbgIdxGrep (int licTextIdx, char *buf, int show) |
Debugging call for idxGrep() More... | |
void | checkCornerCases (char *filetext, int size, int score, int kwbm, int isML, int isPS, int nw, int force) |
If we call this function, we still don't know anything about a license. More... | |
void | checkFileReferences (char *filetext, int size, int score, int kwbm, int isML, int isPS) |
Generic license-phrases referring to other files or running commands. | |
void | addRef (char *str, int interest) |
This function fills in a character-buffer for a license of a CURRENT file being evaluated, and enqueues a list if components to help make a package-level summary. | |
void | locateRegex (char *text, item_t *op, int index, int size, int sso, int seo) |
Locate a regex in a given file. More... | |
void | saveRegexLocation (int index, int offset, int length, int saveCache) |
Save a regex in whereList. More... | |
void | saveUnclBufLocation (int) |
void | saveLicenseParagraph (char *, int, int, int) |
char * | cplVersion (char *filetext, int size, int isML, int isPS) |
Check for CPL versions. More... | |
static char * | gplVersion (char *filetext, int size, int isML, int isPS) |
Check for GPL versions. More... | |
char * | lgplVersion (char *filetext, int size, int isML, int isPS) |
Check for LGPL versions. More... | |
char * | agplVersion (char *filetext, int size, int isML, int isPS) |
Check for AGPL versions. More... | |
char * | gfdlVersion (char *filetext, int size, int isML, int isPS) |
Check for GFDL versions. More... | |
char * | lpplVersion (char *filetext, int size, int isML, int isPS) |
Check for LPPL versions. More... | |
char * | mplNplVersion (char *filetext, int size, int isML, int isPS) |
Check for MPL|NPL versions. More... | |
char * | pythonVersion (char *filetext, int size, int isML, int isPS) |
Check for python versions. More... | |
static char * | realVersion (char *filetext, int size, int isML, int isPS, int ref) |
Check for RPSL versions. More... | |
static char * | sisslVersion (char *filetext, int size, int isML, int isPS) |
Check for SISSL versions. More... | |
char * | aslVersion (char *filetext, int size, int isML, int isPS) |
Check for ASL Apache versions. More... | |
char * | cddlVersion (char *filetext, int size, int isML, int isPS) |
Check for CDDL versions. More... | |
char * | ccVersion (char *filetext, int size, int isML, int isPS) |
Check for CC_BY-X versions. More... | |
char * | oslVersion (char *filetext, int size, int isML, int isPS) |
Check for OSL versions. More... | |
char * | aflVersion (char *filetext, int size, int isML, int isPS) |
Check for AFL versions. More... | |
static int | match3 (int, char *, int, int, int, int) |
void | spdxReference (char *, int, int, int) |
void | copyleftExceptions (char *, int, int, int) |
Variables | |
local variables | |
File local variables | |
static char | licStr [myBUFSIZ] |
static char | ltsr [NFOOTPRINTS] |
static char | name [256] |
static char | lmem [_msize] |
static list_t | searchList |
static list_t | whereList |
static list_t | whCacheList |
static int | refOffset |
static int | maxInterest |
static int | pd |
static int | crCheck |
static int | checknw |
static int | lDebug = 0 |
static int | lDiags = 0 |
license definitions | |
Instead of keeping a potentially-growing list of variables used to recall specific flags/text, etc., manage it in an array. A little slower, sure, but it keeps the number of variables we allocate to a more-reasonable minimum. | |
#define | _mGPL 0 |
#define | _mLGPL 1 |
#define | _mGFDL 2 |
#define | _mQPL 3 |
#define | _mPYTHON 4 |
#define | _mPYTH_TEXT 5 |
#define | _mAPACHE 6 |
#define | _mHP 7 |
#define | _mPHP 8 |
#define | _mMIT 9 |
#define | _mXOPEN 10 |
#define | _mREDHAT 11 |
#define | _mISC 12 |
#define | _mCMU 13 |
#define | _mOSF 14 |
#define | _mSUN 15 |
#define | _mALADDIN 16 |
#define | _mCUPS 17 |
#define | _fOPENLDAP 18 |
#define | _fBSD 19 |
#define | _fGPL 20 |
#define | _mCDDL 21 |
#define | _mLIBRE 22 |
#define | _mGSOAP 23 |
#define | _mMPL 24 |
#define | _fATTRIB 25 |
#define | _fREAL 26 |
#define | _fIETF 27 |
#define | _fDOC 28 |
#define | _fMSCORP 29 |
#define | _fW3C 30 |
#define | _mAPTANA 31 |
#define | _tOPENLDAP 32 |
#define | _mNTP 33 |
#define | _fIP 34 |
#define | _fANTLR 35 |
#define | _fCCBY 36 |
#define | _fZPL 37 |
#define | _fCLA 38 |
#define | _fODBL 39 |
#define | _fPDDL 40 |
#define | _fRUBY 41 |
#define | _fSAX 42 |
#define | _fAPL 43 |
#define | _fARTISTIC 44 |
#define | _fCITRIX 45 |
#define | _fPURDUE 46 |
#define | _fUNICODE 47 |
#define | _fOFL 48 |
#define | _mAPACHE10 49 |
#define | _mAPACHE11 50 |
#define | _mWORDNET 51 |
#define | _fNCSA 52 |
#define | _fTCL 53 |
#define | _fIJG 54 |
#define | _msize _fIJG+1 |
struct { | |
char * base | |
int sso | |
int seo | |
int index | |
} | kludge |
void | preloadResults (char *filetext, char *ltsr) |
micro function definitions | |
#define | PARSE_ARGS filetext, size, isML, isPS |
Arguments to parse. | |
#define | LVAL(x) (ltsr[x] & LTSR_RMASK) |
Check LTSR_RMASK on lstr[x]. | |
#define | SEEN(x) (ltsr[x] & LTSR_SMASK) |
Check LTSR_SMASK on lstr[x]. | |
#define | INFILE(x) fileHasPatt(x, PARSE_ARGS, 0) |
Calls fileHasPatt() | |
#define | NOT_INFILE(x) !( fileHasPatt(x, PARSE_ARGS, 0) && clearLastElementOfLicenceBuffer() ) |
Calls fileHasPatt() | |
#define | RM_INFILE(x) fileHasPatt(x, PARSE_ARGS, 1) |
Calls fileHasPatt() with qType 1. | |
#define | GPL_INFILE(x) fileHasPatt(x, PARSE_ARGS, 2) |
Calls fileHasPatt() with qType 2. | |
#define | PERL_INFILE(x) fileHasPatt(x, PARSE_ARGS, 3) |
Calls fileHasPatt() with qType 3. | |
#define | NY_INFILE(x) fileHasPatt(x, PARSE_ARGS, 4) |
Calls fileHasPatt() with qType 4. | |
#define | X_INFILE(x, y) fileHasPatt(x, PARSE_ARGS, y) |
Calls fileHasPatt() with qType y. | |
#define | DEBUG_INFILE(x) printf(" Regex[%d] = \"%s\"\nINFILE(%d) = %d\n", x, _REGEX(x), x, INFILE(x)); |
Debug print. | |
#define | HASREGEX(x, cp) idxGrep(x, cp, REG_ICASE|REG_EXTENDED) |
Calls idxGrep() | |
#define | HASREGEX_RI(x, cp) idxGrep_recordIndex(x, cp, REG_ICASE|REG_EXTENDED) |
Calls idxGrep_recordIndex() | |
#define | HASTEXT(x, fl) idxGrep_recordIndex(x, filetext, REG_ICASE|fl) |
Calls idxGrep_recordIndex() | |
#define | URL_INFILE(x) (INFILE(x) || fileHasPatt(x, PARSE_ARGS, -1)) |
Check in file with qType 0|1. | |
#define | CANSKIP(i, x, y, z) ((i >= y) && (i <= z) && !(kwbm & (1 << (x - _KW_first)))) |
#define | HASKW(x, y) (x & (1 << (y - _KW_first))) |
#define | TRYGROUP(x) x(PARSE_ARGS) |
#define | LOWINTEREST(x) addRef(x, IL_LOW) |
#define | MEDINTEREST(x) addRef(x, IL_MED) |
#define | INTERESTING(x) addRef(x, IL_HIGH) |
#define | ASLVERS() aslVersion(PARSE_ARGS) |
#define | CCVERS() ccVersion(PARSE_ARGS) |
#define | AFLVERS() aflVersion(PARSE_ARGS) |
#define | OSLVERS() oslVersion(PARSE_ARGS) |
#define | CPLVERS() cplVersion(PARSE_ARGS) |
#define | GPLVERS() gplVersion(PARSE_ARGS) |
#define | LGPLVERS() lgplVersion(PARSE_ARGS) |
#define | AGPLVERS() agplVersion(PARSE_ARGS) |
#define | GFDLVERS() gfdlVersion(PARSE_ARGS) |
#define | CDDLVERS() cddlVersion(PARSE_ARGS) |
#define | LPPLVERS() lpplVersion(PARSE_ARGS) |
#define | MPLVERS() mplNplVersion(PARSE_ARGS) |
#define | PYTHVERS() pythonVersion(PARSE_ARGS) |
#define | SISSLVERS() sisslVersion(PARSE_ARGS) |
#define | REALVERS(x) realVersion(PARSE_ARGS, x) |
#define | PR_REGEX(x) printf("check %d = %s\n", x, _REGEX(x)); |
#define | mCR_CMU() (INFILE(_CR_CMU_1) || INFILE(_CR_CMU_2)) |
#define | mCR_EDIN() (INFILE(_CR_EDINBURGH_1) || INFILE(_CR_EDINBURGH_2)) |
#define | mCR_FSF() (INFILE(_CR_FSF1) || INFILE(_CR_FSF2)) |
#define | mCR_HP() (INFILE(_CR_HP_1)|| INFILE(_CR_HP_2) || INFILE(_CR_DEC) || INFILE(_CR_EDS)) |
#define | mCR_IETF() (INFILE(_CR_IETF_1) || INFILE(_CR_IETF_2)) |
#define | mCR_MIT() (INFILE(_CR_MIT1) || INFILE(_CR_MIT2)) |
#define | mCR_X11() (INFILE(_CR_X11) || INFILE(_CR_XFREE86)) |
#define | mCR_IPTC() (INFILE(_CR_IPTC1) || INFILE(_CR_IPTC2)) |
#define | SPDXREF() spdxReference(PARSE_ARGS) |
#define | EXCEPTIONS() copyleftExceptions(PARSE_ARGS) |
static int | fileHasPatt (int licTextIdx, char *filetext, int size, int isML, int isPS, int qType) |
Checks for a phrase in a file. More... | |
char * | parseLicenses (char *filetext, int size, scanres_t *scp, int isML, int isPS) |
Parse a file to check all the possible licenses and add them to matches. More... | |
searches for licenses
The main workhorse of nomos. This file contains most of the logic for finding licenses in nomos.
Definition in file parse.c.
char * aflVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
char * agplVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
char * aslVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
char * ccVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
char * cddlVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
void checkCornerCases | ( | char * | filetext, |
int | size, | ||
int | score, | ||
int | kwbm, | ||
int | isML, | ||
int | isPS, | ||
int | nw, | ||
int | force | ||
) |
If we call this function, we still don't know anything about a license.
In fact, there may be NO license. Look for copyrights, references to the word "trademark", "patent", etc. (and possibly other trivial (or borderline-insignificant) legal stuff in this file.
Trademark detection removed. It gave too many false positives.Code left because more experiences are needed about the consequences.
int checkUnclassified | ( | char * | filetext, |
int | size, | ||
int | score, | ||
int | isML, | ||
int | isPS, | ||
int | nw | ||
) |
This function is called when all the above license-checks don't turn up anything useful. Now we need to determine if the current file likely contains a license or not.
Basic strategy is to look for 4 classes (groups) of words all within the same paragraph-or-two. Here we estimate the size of a paragaph to be 6 lines of legal text. To be conservative, we'll look for 6 contiguous lines ABOVE AND BELOW the line that matches our first search. In order words, we're using "grep -A6 -B6 pattern textfile".
A paragraph containing legal-VERBS, legal-DOCUMENTS, legal-NOUNS, and legal-PERMISSIONS are quite likely to be a license. This doesn't have to be 100% accurate but it IS nice to know whether a file that fails the known-license-footprints really contains a license or not. Knowing so makes the legal department's job easier.
Some text-files are determined by this function to contain some sort of license, but really only deal with the notion of a public-domain claim. If we find one here, report it; this way we don't bother calling the corner-case license-check function.
void copyleftExceptions | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
char * cplVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
|
static |
Debugging call for idxGrep()
Function calls idxGrep() and print the regex match using printRegexMatch()
licTextIdx | license index |
buf | |
show |
|
static |
int findPhrase | ( | int | index, |
char * | filetext, | ||
int | size, | ||
int | isML, | ||
int | isPS, | ||
int | qType | ||
) |
Check for the presence of a phrase in a file by first searching for the search key provided.
Cache the search results of, as we are very likely to be looking up the same word/phrase again.
index | index of the phrase to be searched for |
filetext | the text to search |
size | the size of file |
isML | Is HTML/XML file? |
isPS | Is postscript file? |
qtype | ?? |
char * gfdlVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
|
static |
char * lgplVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
void locateRegex | ( | char * | text, |
item_t * | op, | ||
int | index, | ||
int | size, | ||
int | sso, | ||
int | seo | ||
) |
Locate a regex in a given file.
Function first looks in raw text, then goes for doctored buffer if not found in the file.
Save location using saveRegexLocation()
char * lpplVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
char * mplNplVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
char * oslVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
char* parseLicenses | ( | char * | filetext, |
int | size, | ||
scanres_t * | scp, | ||
int | isML, | ||
int | isPS | ||
) |
Parse a file to check all the possible licenses and add them to matches.
The function calls fileHasPatt() if the file contains a pattern defined in STRINGS.in. If a match is found, then it can call idxGrep_recordIndex() to check if the file has some additional text and finally adds the license using addRef(). The results found are also stored in licStr as a comma separated list.
The function first check if a file contains an interesting string which can denote a license. If it is found then the heuristics are done in detail to find the exact license match. For more info please refer to nomos wiki
filetext | File content | |
size | File size | |
[out] | scp | Scan results |
isML | Source is HTML/XML | |
isPS | Source is PostScript |
char * pythonVersion | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
|
static |
void saveRegexLocation | ( | int | index, |
int | offset, | ||
int | length, | ||
int | saveCache | ||
) |
|
static |
void spdxReference | ( | char * | filetext, |
int | size, | ||
int | isML, | ||
int | isPS | ||
) |
struct { ... } kludge |
Regex match related data
|
static |
|
static |
|
static |
|
static |