FOSSology  4.7.0-rc1
Open Source License Compliance by Open Source Software
nomos.c
Go to the documentation of this file.
1 /*
2  SPDX-FileCopyrightText: © 2006-2015 Hewlett-Packard Development Company, L.P.
3  SPDX-FileCopyrightText: © 2014, 2018 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
16 /* CDB - What is this define for??? */
17 #ifndef _GNU_SOURCE
18 #define _GNU_SOURCE
19 #endif /* not defined _GNU_SOURCE */
20 
21 #include "nomos.h"
22 #include "nomos_utils.h"
23 
24 extern licText_t licText[]; /* Defined in _autodata.c */
25 struct globals gl;
26 struct curScan cur;
27 
28 char debugStr[myBUFSIZ];
29 char dbErrString[myBUFSIZ];
30 
31 size_t hashEntries;
32 
33 int schedulerMode = 0;
34 int Verbose = 0;
36 #define FUNCTION
37 
38 #ifdef COMMIT_HASH_S
39 char BuildVersion[]="nomos build version: " VERSION_S " r(" COMMIT_HASH_S ").\n";
40 #else
41 char BuildVersion[] = "nomos build version: NULL.\n";
42 #endif
43 
44 /* We're being run from the scheduler */
45 /* nomos agent starting up in scheduler mode... */
46 /* \ref http://www.fossology.org/projects/fossology/wiki/Nomos_Test_Cases*/
47 
60 void arsNomos(cacheroot_t* cacheroot, bool ignoreFilesWithMimeType) {
61  int i;
62  int upload_pk = 0;
63  int numrows;
64  int ars_pk = 0;
65  int user_pk = 0;
66  char *AgentARSName = "nomos_ars";
67  PGresult *result;
68 
69  char *repFile;
70 
71  schedulerMode = 1;
72  /* get user_pk for user who queued the agent */
73  user_pk = fo_scheduler_userID();
74  /* read upload_pk from scheduler */
75  while (fo_scheduler_next())
76  {
78  if (upload_pk == 0)
79  continue;
80  /* Check Permissions */
81  if (GetUploadPerm(gl.pgConn, upload_pk, user_pk) < PERM_WRITE)
82  {
83  LOG_ERROR("You have no update permissions on upload %d", upload_pk);
84  continue;
85  }
86  result = checkDuplicateReq(gl.pgConn, upload_pk, gl.agentPk);
87  if (fo_checkPQresult(gl.pgConn, result, NULL, __FILE__, __LINE__))
88  Bail(-__LINE__);
89  if (PQntuples(result) != 0)
90  {
91  LOG_NOTICE("Ignoring requested nomos analysis of upload %d - Results are already in database.", upload_pk);
92  PQclear(result);
93  continue;
94  }
95  PQclear(result);
96 
97  /* Record analysis start in nomos_ars, the nomos audit trail. */
98  ars_pk = fo_WriteARS(gl.pgConn, ars_pk, upload_pk, gl.agentPk, AgentARSName, 0, 0);
99 
100  result = getSelectedPFiles(gl.pgConn, upload_pk, gl.agentPk, ignoreFilesWithMimeType);
101  if (fo_checkPQresult(gl.pgConn, result, NULL, __FILE__, __LINE__))
102  Bail(-__LINE__);
103  numrows = PQntuples(result);
104  /* process all files in this upload */
105  for (i = 0; i < numrows; i++)
106  {
107  initializeCurScan(&cur);
108  strncpy(cur.pFile, PQgetvalue(result, i, 1), sizeof(cur.pFile) - 1);
109  cur.pFile[sizeof(cur.pFile) - 1] = '\0';
110  cur.pFileFk = atoi(PQgetvalue(result, i, 0));
111  repFile = fo_RepMkPath("files", cur.pFile);
112  if (!repFile)
113  {
114  LOG_FATAL("Nomos unable to open pfile_pk: %ld, file: %s", cur.pFileFk, cur.pFile);
115  Bail(-__LINE__);
116  }
117  /* make sure this is a regular file, ignore if not */
118  if (!isFILE(repFile))
119  continue;
120  processFile(repFile);
122  if (recordScanToDB(cacheroot, &cur))
123  {
124  LOG_FATAL("nomos terminating upload %d scan due to previous errors.", upload_pk);
125  Bail(-__LINE__);
126  }
127  freeAndClearScan(&cur);
128  }
129  PQclear(result);
130  /* Record analysis success in nomos_ars. */
131  fo_WriteARS(gl.pgConn, ars_pk, upload_pk, gl.agentPk, AgentARSName, 0, 1);
132  }
133 }
134 
143 void list_dir (const char * dir_name, int process_count, int *distribute_count, FILE **pFile)
144 {
145  struct dirent *dirent_handler;
146  DIR *dir_handler;
147 
148  if ((dir_handler = opendir(dir_name)) == NULL)
149  {
150  fprintf(stderr, "Can't open: %s, error: %s\n", dir_name, strerror(errno));
151  return;
152  }
153 
154  char filename_buf[PATH_MAX] = {}; // store one file path
155  struct stat stat_buf ;
156  int file_number = 0;
157  while ((dirent_handler = readdir(dir_handler)) != NULL)
158  {
159  /* get the file path, form the file path /dir_name/file_name,
160  e.g. dir_name is '/tmp' file_name is 'test_file_1.txt', form one path '/tmp/test_file_1.txt' */
161  snprintf(filename_buf, sizeof(filename_buf), "%s/%s", dir_name, dirent_handler->d_name);
162 
163  if (stat(filename_buf, &stat_buf) == -1) // if can access the current file, return
164  {
165  LOG_FATAL("Unable to stat file: %s, error message: %s\n", filename_buf, strerror(errno)) ;
166  closedir(dir_handler);
167  return;
168  }
169 
170  /* 1) do not travel '..', '.' directory
171  2) when the file type is directory, travel it
172  3) when the file type is reguler file, write it into temp files on average (value from -n) */
173  if (strcmp (dirent_handler->d_name, "..") != 0 && strcmp (dirent_handler->d_name, ".") != 0)
174  {
175  /* the file type is a directory (exclude '..' and '.') */
176  if ((stat_buf.st_mode & S_IFMT) == S_IFDIR)
177  {
178  list_dir(filename_buf, process_count, distribute_count, pFile); // deep into this directory and travel it
179  }
180  else {
181  strncat(filename_buf, "\n", PATH_MAX - 1); // add one new line character by the end of one file path, one line is one file path
182  /* write on average process_count */
183  file_number = *distribute_count%process_count;
184  fwrite (filename_buf, sizeof(char), strlen(filename_buf), pFile[file_number]);
185  (*distribute_count)++; // increase the file count
186 
187  if (process_count == *distribute_count) *distribute_count = 0; // reset list_file_count each cycle
188  continue;
189  }
190  }
191  }
192  closedir(dir_handler);
193 }
194 
201 void read_file_grab_license(int file_number, FILE **pFile)
202 {
203  char *line = NULL;
204  size_t len = 0;
205  int lenth_tmp = 0;
206  ssize_t read = 0;
207 
208  /*read line by line, then start to scan licenses */
209  while ((read = getline(&line, &len, pFile[file_number])) != -1) {
210  if (line && line[0]) // line is not empty
211  {
212  lenth_tmp = strlen(line);
213  /* trim the line */
214  while(isspace(line[lenth_tmp - 1])) line[--lenth_tmp] = 0; // right trim
215  while(isspace(*line)) ++line; // left trim
216  //printf("line is:%s, getpid() is:%d\n", line, getpid());
217  }
218  initializeCurScan(&cur);
219  processFile(line); // start to scan licenses
220  } // while
221 
222  if (line) free(line);
223 }
224 
231 void myFork(int proc_num, FILE **pFile) {
232  pid_t pid;
233  pid = fork();
234 
235  if (pid < 0)
236  {
237  LOG_FATAL("fork failed\n");
238  }
239  else if (pid == 0) { // Child process, every single process runs on one temp path file
240  read_file_grab_license(proc_num, pFile); // grabbing licenses on /tmp/foss-XXXXXX
241  return;
242  }
243  else if (pid > 0) {
244  // if pid != 0, we're in the parent
245  // let's call ourself again, decreasing the counter, until it reaches 1.
246  if (proc_num > 1) {
247  myFork(proc_num - 1, pFile);
248  }
249  else
250  {
251  read_file_grab_license(0, pFile); // main(parent) process run on /tmp/foss-XXXXXX
252  }
253  }
254 }
255 
259 int main(int argc, char **argv)
260 {
261  int i;
262  int c;
263  int file_count = 0;
264  char *cp;
265  char sErrorBuf[1024];
266  char *agent_desc = "License Scanner";
267  char **files_to_be_scanned;
268  char *COMMIT_HASH = NULL;
269  char *VERSION = NULL;
270  char agent_rev[myBUFSIZ];
272  char *scanning_directory= NULL;
273  int process_count = 0;
274  bool ignoreFilesWithMimeType = false;
275 
276  /* connect to the scheduler */
277  fo_scheduler_connect(&argc, argv, &(gl.pgConn));
279 
280 #ifdef PROC_TRACE
281  traceFunc("== main(%d, %p)\n", argc, argv);
282 #endif /* PROC_TRACE */
283 
284 #ifdef MEMORY_TRACING
285  mcheck(0);
286 #endif /* MEMORY_TRACING */
287 #ifdef GLOBAL_DEBUG
288  gl.DEEBUG = gl.MEM_DEEBUG = 0;
289 #endif /* GLOBAL_DEBUG */
290 
291  files_to_be_scanned = calloc(argc, sizeof(char *));
292 
293  COMMIT_HASH = fo_sysconfig("nomos", "COMMIT_HASH");
294  VERSION = fo_sysconfig("nomos", "VERSION");
295  snprintf(agent_rev, sizeof(agent_rev), "%s.%s", VERSION, COMMIT_HASH);
296 
297  gl.agentPk = fo_GetAgentKey(gl.pgConn, basename(argv[0]), 0, agent_rev, agent_desc);
298 
299  /* Record the progname name */
300  if ((cp = strrchr(*argv, '/')) == NULL_STR)
301  {
302  strncpy(gl.progName, *argv, sizeof(gl.progName)-1);
303  }
304  else
305  {
306  while (*cp == '.' || *cp == '/')
307  cp++;
308  strncpy(gl.progName, cp, sizeof(gl.progName)-1);
309  }
310 
311  if (putenv("LANG=C") < 0)
312  {
313  char * estr = strerror_r(errno, sErrorBuf, sizeof(sErrorBuf));
314  LOG_FATAL("Cannot set LANG=C in environment. Error: %s", estr)
315  Bail(-__LINE__);
316  }
317 
318  /* Save the current directory */
319  if (getcwd(gl.initwd, sizeof(gl.initwd)) == NULL_STR)
320  {
321  char *estr = strerror_r(errno, sErrorBuf, sizeof(sErrorBuf));
322  LOG_FATAL("Cannot obtain starting directory. Error: %s", estr)
323  Bail(-__LINE__);
324  }
325 
326  /* default paragraph size (# of lines to scan above and below the pattern) */
327  gl.uPsize = 6;
328 
329  /* Build the license ref cache to hold 2**11 (2048) licenses.
330  This MUST be a power of 2.
331  */
332  cacheroot.maxnodes = 2 << 11;
333  cacheroot.nodes = calloc(cacheroot.maxnodes, sizeof(cachenode_t));
334  if (!initLicRefCache(&cacheroot))
335  {
336  LOG_FATAL("Nomos could not allocate %d cacheroot nodes.", cacheroot.maxnodes)
337  Bail(-__LINE__);
338  }
339 
340  /* Process command line options */
341  while ((c = getopt(argc, argv, "VJSNvhiIlc:d:n:")) != -1)
342  {
343  switch (c) {
344  case 'c': break; /* handled by fo_scheduler_connect() */
345  case 'i':
346  /* "Initialize" */
347  Bail(0); /* DB was opened above, now close it and exit */
348  case 'l':
349  /* set long command line output */
350  gl.progOpts |= OPTS_LONG_CMD_OUTPUT;
351  break;
352  case 'v':
353  Verbose++; break;
354  case 'J':
355  gl.progOpts |= OPTS_JSON_OUTPUT;
356  break;
357  case 'S':
358  gl.progOpts |= OPTS_HIGHLIGHT_STDOUT;
359  break;
360  case 'N':
361  gl.progOpts |= OPTS_NO_HIGHLIGHTINFO;
362  break;
363  case 'V':
364  printf("%s", BuildVersion);
365  Bail(0);
366  case 'd': /* diretory to scan */
367  gl.progOpts |= OPTS_SCANNING_DIRECTORY;
368  scanning_directory = optarg;
369  struct stat dir_sta;
370  int ret = stat(scanning_directory, &dir_sta);
371  if (-1 == ret || S_IFDIR != (dir_sta.st_mode & S_IFMT))
372  {
373  if (-1 == ret) printf("stat('%s') error message: %s.\n",scanning_directory, strerror(errno));
374  else printf("Warning: '%s' from -d is not a good directory(dir_sta.st_mode & S_IFMT = %o).\n", scanning_directory, dir_sta.st_mode & S_IFMT);
375  Usage(argv[0]);
376  Bail(-__LINE__);
377  }
378  break;
379  case 'n': /* spawn mutiple processes to scan */
380  process_count = atoi(optarg);
381  break;
382  case 'I':
383  ignoreFilesWithMimeType = true;
384  break;
385  case 'h':
386  default:
387  Usage(argv[0]);
388  Bail(-__LINE__);
389  }
390  }
391 
392 
393  /* Copy filename args (if any) into array */
394  for (i = optind; i < argc; i++)
395  {
396  files_to_be_scanned[file_count] = argv[i];
397  file_count++;
398  }
399 
400  licenseInit();
401  gl.flags = 0;
402 
403  if (file_count == 0 && !scanning_directory)
404  {
405  arsNomos(&cacheroot, ignoreFilesWithMimeType);
406  }
407  else
408  { /******** Files on the command line ********/
409  FILE **pFile = NULL; // store temp file descriptors
410  char (*pTempFileName)[50] = NULL; // store temp file names, they are looking like /tmp/foss-XXXXXX
411  pid_t mainPid = 0; // main process id
412  cur.cliMode = 1;
413 
414  /* when scanning_directory is real direcotry, scan license in parallel */
415  if (scanning_directory) {
416  if (process_count < 2) process_count = 2; // the least count is 2, at least has one child process
417  if (mutexJson == NULL && optionIsSet(OPTS_JSON_OUTPUT))
418  {
419  initializeJson();
420  printf("{\n\"results\":[\n");
421  fflush(0);
422  }
423  pFile = malloc(process_count*(sizeof(FILE*)));
424  pTempFileName = malloc(process_count*sizeof(char[50]));
425  int i = 0;
426  int file_descriptor = 0;
427  for(i = 0; i < process_count; i++)
428  {
429  /* create temp file */
430  char file_template[] = "/tmp/foss-XXXXXX"; // 'XXXXXX' will be replaced after mkstemp
431  file_descriptor = mkstemp(file_template);
432 
433  /* get the temp path file distriptors */
434  pFile[i] = fdopen(file_descriptor, "w"); // open the files to write later
435  if (!pFile[i])
436  {
437  LOG_FATAL("failed to open %s, %s\n", file_template, strerror(errno));
438  }
439  strncpy(pTempFileName[i], file_template, sizeof(pTempFileName[i]) - 1);
440  pTempFileName[i][sizeof(pTempFileName[i]) - 1] = '\0'; // store temp file names
441  }
442 
443  /* walk through the specified directory to get all the file(file path) and
444  store into mutiple files - /tmp/foss-XXXXXX */
445  int distribute_count = 0; // record how many files are found in one directory
446  list_dir(scanning_directory, process_count, &distribute_count, pFile); // list and store files into /tmp/foss-XXXXXX in one directory
447 
448  /* after the walking through and writing job is done, close all the temp path file distriptors.
449  then open the temp path files to read */
450  for(i = 0; i < process_count; i++)
451  {
452  if (pFile[i]) fclose(pFile[i]); // write all the paths
453  pFile[i] = fopen(pTempFileName[i], "r"); // open the temp files to read
454  }
455 
456  /* create process_count - 1 child processes(please do not forget we always have the main process) */
457  mainPid = getpid(); // get main process id
458  myFork(process_count - 1, pFile); // spawn process_count - 1 chile processes and grab licenses through process_count processes
459  int status = 0;
460  pid_t wpid = 0;
461  if (mainPid == getpid())
462  {
463  /* wait all processes done. */
464  while(1){
465  wpid = wait(&status);
466  if (-1 == wpid) break;
467  }
468 
469  /* close the opening files, then delete the temp path files */
470  for(i = 0; i < process_count; i++)
471  {
472  if (pFile[i])
473  {
474  fclose(pFile[i]);
475  unlink(pTempFileName[i]);
476  }
477  }
478 
479  if (optionIsSet(OPTS_JSON_OUTPUT))
480  {
481  printf("]\n}\n");
482  destroyJson();
483  }
484 
485  /* free memeory */
486  free(pFile);
487  free(pTempFileName);
488  }
489  }
490  else {
491  if (0 != process_count)
492  {
493  printf("Warning: -n {nprocs} ONLY works with -d {directory}.\n");
494  }
495  if (optionIsSet(OPTS_JSON_OUTPUT))
496  {
497  initializeJson();
498  printf("{\n\"results\":[\n");
499  fflush(0);
500  }
501  for (i = 0; i < file_count; i++) {
502  initializeCurScan(&cur);
503  processFile(files_to_be_scanned[i]);
504  recordScanToDB(&cacheroot, &cur);
505  freeAndClearScan(&cur);
506  }
507  if (optionIsSet(OPTS_JSON_OUTPUT))
508  {
509  printf("]\n}\n");
510  destroyJson();
511  }
512  }
513  }
514 
515  lrcache_free(&cacheroot); // for valgrind
516 
517  /* Normal Exit */
518  Bail(0);
519 
520  /* this will never execute but prevents a compiler warning about reaching
521  the end of a non-void function */
522  return (0);
523 }
char BuildVersion[]
Definition: buckets.c:68
Usage()
Print Usage statement.
Definition: fo_dbcheck.php:63
void initializeJson()
Definition: json_writer.c:131
void destroyJson()
Definition: json_writer.c:140
FUNCTION int GetUploadPerm(PGconn *pgConn, long UploadPk, int user_pk)
Get users permission to this upload.
Definition: libfossagent.c:378
PGresult * checkDuplicateReq(PGconn *pgConn, int uploadPk, int agentPk)
Get the upload_pk and agent_pk to find out the agent has already scanned the package.
Definition: libfossagent.c:442
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
Definition: libfossagent.c:214
FUNCTION int fo_GetAgentKey(PGconn *pgConn, const char *agent_name, long Upload_pk, const char *rev, const char *agent_desc)
Get the latest enabled agent key (agent_pk) from the database.
Definition: libfossagent.c:158
PGresult * getSelectedPFiles(PGconn *pgConn, int uploadPk, int agentPk, bool ignoreFilesWithMimeType)
Get the upload_pk, agent_pk and ignoreFilesWithMimeType to get all the file Ids for nomos.
Definition: libfossagent.c:469
int fo_checkPQresult(PGconn *pgConn, PGresult *result, char *sql, char *FileID, int LineNumb)
Check the result status of a postgres SELECT.
Definition: libfossdb.c:170
#define PERM_WRITE
Read-Write permission.
Definition: libfossology.h:33
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
Definition: libfossrepo.c:352
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
int fo_scheduler_userID()
Gets the id of the user that created the job that the agent is running.
char * fo_scheduler_current()
Get the last read string from the scheduler.
char * fo_scheduler_next()
Get the next data to process from the scheduler.
void fo_scheduler_connect(int *argc, char **argv, PGconn **db_conn)
Establish a connection between an agent and the scheduler.
FUNCTION void lrcache_free(cacheroot_t *pcroot)
Free the hash table.
Definition: liccache.c:72
void licenseInit()
license initialization
Definition: licenses.c:70
fo_dbManager * fo_dbManager_new(PGconn *dbConnection)
Create and initialize new fo_dbManager object.
Definition: standalone.c:34
int isFILE(char *pathname)
Check if an inode is a file.
Definition: util.c:1353
licText_t licText[]
void arsNomos(cacheroot_t *cacheroot, bool ignoreFilesWithMimeType)
Make entry in ars table for audit.
Definition: nomos.c:60
int main(int argc, char **argv)
Definition: nomos.c:259
char debugStr[myBUFSIZ]
Debug string.
Definition: nomos.c:28
int schedulerMode
Definition: nomos.c:33
void read_file_grab_license(int file_number, FILE **pFile)
read line by line, then call processFile to grab license line by line
Definition: nomos.c:201
int Verbose
Verbose level.
Definition: nomos.c:34
size_t hashEntries
Hash entries.
Definition: nomos.c:31
char dbErrString[myBUFSIZ]
DB error string.
Definition: nomos.c:29
void myFork(int proc_num, FILE **pFile)
the recursive create process and process grabbing licenses
Definition: nomos.c:231
void list_dir(const char *dir_name, int process_count, int *distribute_count, FILE **pFile)
list all files and store file paths from the specified directory
Definition: nomos.c:143
Nomos header file.
#define NULL_STR
NULL string.
Definition: nomos.h:235
void Bail(int exitval)
Close connections and exit.
Definition: nomos_utils.c:538
int optionIsSet(int val)
Check if an CLI option is set.
Definition: nomos_utils.c:567
FUNCTION void freeAndClearScan(struct curScan *thisScan)
Clean-up all the per scan data structures, freeing any old data.
Definition: nomos_utils.c:953
sem_t * mutexJson
Mutex to handle JSON writes.
Definition: nomos_utils.c:24
FUNCTION void initializeCurScan(struct curScan *cur)
Initialize the scanner.
Definition: nomos_utils.c:938
FUNCTION int recordScanToDB(cacheroot_t *pcroot, struct curScan *scanRecord)
Write out the information about the scan to the FOSSology database.
Definition: nomos_utils.c:862
FUNCTION int initLicRefCache(cacheroot_t *pcroot)
build a cache the license ref db table.
Definition: nomos_utils.c:263
const char * upload_pk
Definition: sqlstatements.h:82
int maxnodes
No. of nodes in the list.
Definition: liccache.h:42
cachenode_t * nodes
Array of nodes.
Definition: liccache.h:43
Struct that tracks state related to current file being scanned.
Definition: nomos.h:391
char pFile[myBUFSIZ]
Definition: nomos.h:397
long pFileFk
Definition: nomos.h:396
int cliMode
Definition: nomos.h:412
Structure holding data truly global in that it remains consistent for each file scanned.
Definition: nomos.h:344
PGconn * pgConn
DB Connection.
Definition: nomos.h:362
int agentPk
Agent id.
Definition: nomos.h:359
fo_dbManager * dbManager
FOSSology DB manager.
Definition: nomos.h:363
int progOpts
CLI options.
Definition: nomos.h:347
int uPsize
Size.
Definition: nomos.h:349
char initwd[myBUFSIZ]
CDB, would like to workaround/eliminate.
Definition: nomos.h:345
int flags
Flags.
Definition: nomos.h:348
char progName[64]
Program name.
Definition: nomos.h:346
FUNCTION int processFile(PGconn *pgConn, pbucketdef_t bucketDefArray, puploadtree_t puploadtree, int agent_pk, int hasPrules)
Process a file.
Definition: walk.c:167