FOSSology  4.4.0
Open Source License Compliance by Open Source Software
nomos.c
Go to the documentation of this file.
1 /*
2  SPDX-FileCopyrightText: © 2006-2015 Hewlett-Packard Development Company, L.P.
3  SPDX-FileCopyrightText: © 2014, 2018 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
16 /* CDB - What is this define for??? */
17 #ifndef _GNU_SOURCE
18 #define _GNU_SOURCE
19 #endif /* not defined _GNU_SOURCE */
20 
21 #include "nomos.h"
22 #include "nomos_utils.h"
23 
24 extern licText_t licText[]; /* Defined in _autodata.c */
25 struct globals gl;
26 struct curScan cur;
27 
28 char debugStr[myBUFSIZ];
29 char dbErrString[myBUFSIZ];
30 
31 size_t hashEntries;
32 
33 int schedulerMode = 0;
34 int Verbose = 0;
36 #define FUNCTION
37 
38 #ifdef COMMIT_HASH_S
39 char BuildVersion[]="nomos build version: " VERSION_S " r(" COMMIT_HASH_S ").\n";
40 #else
41 char BuildVersion[] = "nomos build version: NULL.\n";
42 #endif
43 
44 /* We're being run from the scheduler */
45 /* nomos agent starting up in scheduler mode... */
46 /* \ref http://www.fossology.org/projects/fossology/wiki/Nomos_Test_Cases*/
47 
60 void arsNomos(cacheroot_t* cacheroot, bool ignoreFilesWithMimeType) {
61  int i;
62  int upload_pk = 0;
63  int numrows;
64  int ars_pk = 0;
65  int user_pk = 0;
66  char *AgentARSName = "nomos_ars";
67  PGresult *result;
68 
69  char *repFile;
70 
71  schedulerMode = 1;
72  /* get user_pk for user who queued the agent */
73  user_pk = fo_scheduler_userID();
74  /* read upload_pk from scheduler */
75  while (fo_scheduler_next())
76  {
78  if (upload_pk == 0)
79  continue;
80  /* Check Permissions */
81  if (GetUploadPerm(gl.pgConn, upload_pk, user_pk) < PERM_WRITE)
82  {
83  LOG_ERROR("You have no update permissions on upload %d", upload_pk);
84  continue;
85  }
86  result = checkDuplicateReq(gl.pgConn, upload_pk, gl.agentPk);
87  if (fo_checkPQresult(gl.pgConn, result, NULL, __FILE__, __LINE__))
88  Bail(-__LINE__);
89  if (PQntuples(result) != 0)
90  {
91  LOG_NOTICE("Ignoring requested nomos analysis of upload %d - Results are already in database.", upload_pk);
92  PQclear(result);
93  continue;
94  }
95  PQclear(result);
96 
97  /* Record analysis start in nomos_ars, the nomos audit trail. */
98  ars_pk = fo_WriteARS(gl.pgConn, ars_pk, upload_pk, gl.agentPk, AgentARSName, 0, 0);
99 
100  result = getSelectedPFiles(gl.pgConn, upload_pk, gl.agentPk, ignoreFilesWithMimeType);
101  if (fo_checkPQresult(gl.pgConn, result, NULL, __FILE__, __LINE__))
102  Bail(-__LINE__);
103  numrows = PQntuples(result);
104  /* process all files in this upload */
105  for (i = 0; i < numrows; i++)
106  {
107  initializeCurScan(&cur);
108  strcpy(cur.pFile, PQgetvalue(result, i, 1));
109  cur.pFileFk = atoi(PQgetvalue(result, i, 0));
110  repFile = fo_RepMkPath("files", cur.pFile);
111  if (!repFile)
112  {
113  LOG_FATAL("Nomos unable to open pfile_pk: %ld, file: %s", cur.pFileFk, cur.pFile);
114  Bail(-__LINE__);
115  }
116  /* make sure this is a regular file, ignore if not */
117  if (!isFILE(repFile))
118  continue;
119  processFile(repFile);
121  if (recordScanToDB(cacheroot, &cur))
122  {
123  LOG_FATAL("nomos terminating upload %d scan due to previous errors.", upload_pk);
124  Bail(-__LINE__);
125  }
126  freeAndClearScan(&cur);
127  }
128  PQclear(result);
129  /* Record analysis success in nomos_ars. */
130  fo_WriteARS(gl.pgConn, ars_pk, upload_pk, gl.agentPk, AgentARSName, 0, 1);
131  }
132 }
133 
142 void list_dir (const char * dir_name, int process_count, int *distribute_count, FILE **pFile)
143 {
144  struct dirent *dirent_handler;
145  DIR *dir_handler;
146 
147  if ((dir_handler = opendir(dir_name)) == NULL)
148  {
149  fprintf(stderr, "Can't open: %s, error: %s\n", dir_name, strerror(errno));
150  return;
151  }
152 
153  char filename_buf[PATH_MAX] = {}; // store one file path
154  struct stat stat_buf ;
155  int file_number = 0;
156  while ((dirent_handler = readdir(dir_handler)) != NULL)
157  {
158  /* get the file path, form the file path /dir_name/file_name,
159  e.g. dir_name is '/tmp' file_name is 'test_file_1.txt', form one path '/tmp/test_file_1.txt' */
160  sprintf( filename_buf , "%s/%s",dir_name, dirent_handler->d_name);
161 
162  if (stat(filename_buf, &stat_buf) == -1) // if can access the current file, return
163  {
164  LOG_FATAL("Unable to stat file: %s, error message: %s\n", filename_buf, strerror(errno)) ;
165  closedir(dir_handler);
166  return;
167  }
168 
169  /* 1) do not travel '..', '.' directory
170  2) when the file type is directory, travel it
171  3) when the file type is reguler file, write it into temp files on average (value from -n) */
172  if (strcmp (dirent_handler->d_name, "..") != 0 && strcmp (dirent_handler->d_name, ".") != 0)
173  {
174  /* the file type is a directory (exclude '..' and '.') */
175  if ((stat_buf.st_mode & S_IFMT) == S_IFDIR)
176  {
177  list_dir(filename_buf, process_count, distribute_count, pFile); // deep into this directory and travel it
178  }
179  else {
180  strncat(filename_buf, "\n", PATH_MAX - 1); // add one new line character by the end of one file path, one line is one file path
181  /* write on average process_count */
182  file_number = *distribute_count%process_count;
183  fwrite (filename_buf, sizeof(char), strlen(filename_buf), pFile[file_number]);
184  (*distribute_count)++; // increase the file count
185 
186  if (process_count == *distribute_count) *distribute_count = 0; // reset list_file_count each cycle
187  continue;
188  }
189  }
190  }
191  closedir(dir_handler);
192 }
193 
200 void read_file_grab_license(int file_number, FILE **pFile)
201 {
202  char *line = NULL;
203  size_t len = 0;
204  int lenth_tmp = 0;
205  ssize_t read = 0;
206 
207  /*read line by line, then start to scan licenses */
208  while ((read = getline(&line, &len, pFile[file_number])) != -1) {
209  if (line && line[0]) // line is not empty
210  {
211  lenth_tmp = strlen(line);
212  /* trim the line */
213  while(isspace(line[lenth_tmp - 1])) line[--lenth_tmp] = 0; // right trim
214  while(isspace(*line)) ++line; // left trim
215  //printf("line is:%s, getpid() is:%d\n", line, getpid());
216  }
217  initializeCurScan(&cur);
218  processFile(line); // start to scan licenses
219  } // while
220 
221  if (line) free(line);
222 }
223 
230 void myFork(int proc_num, FILE **pFile) {
231  pid_t pid;
232  pid = fork();
233 
234  if (pid < 0)
235  {
236  LOG_FATAL("fork failed\n");
237  }
238  else if (pid == 0) { // Child process, every single process runs on one temp path file
239  read_file_grab_license(proc_num, pFile); // grabbing licenses on /tmp/foss-XXXXXX
240  return;
241  }
242  else if (pid > 0) {
243  // if pid != 0, we're in the parent
244  // let's call ourself again, decreasing the counter, until it reaches 1.
245  if (proc_num > 1) {
246  myFork(proc_num - 1, pFile);
247  }
248  else
249  {
250  read_file_grab_license(0, pFile); // main(parent) process run on /tmp/foss-XXXXXX
251  }
252  }
253 }
254 
258 int main(int argc, char **argv)
259 {
260  int i;
261  int c;
262  int file_count = 0;
263  char *cp;
264  char sErrorBuf[1024];
265  char *agent_desc = "License Scanner";
266  char **files_to_be_scanned;
267  char *COMMIT_HASH = NULL;
268  char *VERSION = NULL;
269  char agent_rev[myBUFSIZ];
271  char *scanning_directory= NULL;
272  int process_count = 0;
273  bool ignoreFilesWithMimeType = false;
274 
275  /* connect to the scheduler */
276  fo_scheduler_connect(&argc, argv, &(gl.pgConn));
278 
279 #ifdef PROC_TRACE
280  traceFunc("== main(%d, %p)\n", argc, argv);
281 #endif /* PROC_TRACE */
282 
283 #ifdef MEMORY_TRACING
284  mcheck(0);
285 #endif /* MEMORY_TRACING */
286 #ifdef GLOBAL_DEBUG
287  gl.DEEBUG = gl.MEM_DEEBUG = 0;
288 #endif /* GLOBAL_DEBUG */
289 
290  files_to_be_scanned = calloc(argc, sizeof(char *));
291 
292  COMMIT_HASH = fo_sysconfig("nomos", "COMMIT_HASH");
293  VERSION = fo_sysconfig("nomos", "VERSION");
294  sprintf(agent_rev, "%s.%s", VERSION, COMMIT_HASH);
295 
296  gl.agentPk = fo_GetAgentKey(gl.pgConn, basename(argv[0]), 0, agent_rev, agent_desc);
297 
298  /* Record the progname name */
299  if ((cp = strrchr(*argv, '/')) == NULL_STR)
300  {
301  strncpy(gl.progName, *argv, sizeof(gl.progName)-1);
302  }
303  else
304  {
305  while (*cp == '.' || *cp == '/')
306  cp++;
307  strncpy(gl.progName, cp, sizeof(gl.progName)-1);
308  }
309 
310  if (putenv("LANG=C") < 0)
311  {
312  char * estr = strerror_r(errno, sErrorBuf, sizeof(sErrorBuf));
313  LOG_FATAL("Cannot set LANG=C in environment. Error: %s", estr)
314  Bail(-__LINE__);
315  }
316 
317  /* Save the current directory */
318  if (getcwd(gl.initwd, sizeof(gl.initwd)) == NULL_STR)
319  {
320  char *estr = strerror_r(errno, sErrorBuf, sizeof(sErrorBuf));
321  LOG_FATAL("Cannot obtain starting directory. Error: %s", estr)
322  Bail(-__LINE__);
323  }
324 
325  /* default paragraph size (# of lines to scan above and below the pattern) */
326  gl.uPsize = 6;
327 
328  /* Build the license ref cache to hold 2**11 (2048) licenses.
329  This MUST be a power of 2.
330  */
331  cacheroot.maxnodes = 2 << 11;
332  cacheroot.nodes = calloc(cacheroot.maxnodes, sizeof(cachenode_t));
333  if (!initLicRefCache(&cacheroot))
334  {
335  LOG_FATAL("Nomos could not allocate %d cacheroot nodes.", cacheroot.maxnodes)
336  Bail(-__LINE__);
337  }
338 
339  /* Process command line options */
340  while ((c = getopt(argc, argv, "VJSNvhiIlc:d:n:")) != -1)
341  {
342  switch (c) {
343  case 'c': break; /* handled by fo_scheduler_connect() */
344  case 'i':
345  /* "Initialize" */
346  Bail(0); /* DB was opened above, now close it and exit */
347  case 'l':
348  /* set long command line output */
349  gl.progOpts |= OPTS_LONG_CMD_OUTPUT;
350  break;
351  case 'v':
352  Verbose++; break;
353  case 'J':
354  gl.progOpts |= OPTS_JSON_OUTPUT;
355  break;
356  case 'S':
357  gl.progOpts |= OPTS_HIGHLIGHT_STDOUT;
358  break;
359  case 'N':
360  gl.progOpts |= OPTS_NO_HIGHLIGHTINFO;
361  break;
362  case 'V':
363  printf("%s", BuildVersion);
364  Bail(0);
365  case 'd': /* diretory to scan */
366  gl.progOpts |= OPTS_SCANNING_DIRECTORY;
367  scanning_directory = optarg;
368  struct stat dir_sta;
369  int ret = stat(scanning_directory, &dir_sta);
370  if (-1 == ret || S_IFDIR != (dir_sta.st_mode & S_IFMT))
371  {
372  if (-1 == ret) printf("stat('%s') error message: %s.\n",scanning_directory, strerror(errno));
373  else printf("Warning: '%s' from -d is not a good directory(dir_sta.st_mode & S_IFMT = %o).\n", scanning_directory, dir_sta.st_mode & S_IFMT);
374  Usage(argv[0]);
375  Bail(-__LINE__);
376  }
377  break;
378  case 'n': /* spawn mutiple processes to scan */
379  process_count = atoi(optarg);
380  break;
381  case 'I':
382  ignoreFilesWithMimeType = true;
383  break;
384  case 'h':
385  default:
386  Usage(argv[0]);
387  Bail(-__LINE__);
388  }
389  }
390 
391 
392  /* Copy filename args (if any) into array */
393  for (i = optind; i < argc; i++)
394  {
395  files_to_be_scanned[file_count] = argv[i];
396  file_count++;
397  }
398 
399  licenseInit();
400  gl.flags = 0;
401 
402  if (file_count == 0 && !scanning_directory)
403  {
404  arsNomos(&cacheroot, ignoreFilesWithMimeType);
405  }
406  else
407  { /******** Files on the command line ********/
408  FILE **pFile = NULL; // store temp file descriptors
409  char (*pTempFileName)[50] = NULL; // store temp file names, they are looking like /tmp/foss-XXXXXX
410  pid_t mainPid = 0; // main process id
411  cur.cliMode = 1;
412 
413  /* when scanning_directory is real direcotry, scan license in parallel */
414  if (scanning_directory) {
415  if (process_count < 2) process_count = 2; // the least count is 2, at least has one child process
416  if (mutexJson == NULL && optionIsSet(OPTS_JSON_OUTPUT))
417  {
418  initializeJson();
419  printf("{\n\"results\":[\n");
420  fflush(0);
421  }
422  pFile = malloc(process_count*(sizeof(FILE*)));
423  pTempFileName = malloc(process_count*sizeof(char[50]));
424  int i = 0;
425  int file_descriptor = 0;
426  for(i = 0; i < process_count; i++)
427  {
428  /* create temp file */
429  char file_template[] = "/tmp/foss-XXXXXX"; // 'XXXXXX' will be replaced after mkstemp
430  file_descriptor = mkstemp(file_template);
431 
432  /* get the temp path file distriptors */
433  pFile[i] = fdopen(file_descriptor, "w"); // open the files to write later
434  if (!pFile[i])
435  {
436  LOG_FATAL("failed to open %s, %s\n", file_template, strerror(errno));
437  }
438  strcpy(pTempFileName[i], file_template); // store temp file names
439  }
440 
441  /* walk through the specified directory to get all the file(file path) and
442  store into mutiple files - /tmp/foss-XXXXXX */
443  int distribute_count = 0; // record how many files are found in one directory
444  list_dir(scanning_directory, process_count, &distribute_count, pFile); // list and store files into /tmp/foss-XXXXXX in one directory
445 
446  /* after the walking through and writing job is done, close all the temp path file distriptors.
447  then open the temp path files to read */
448  for(i = 0; i < process_count; i++)
449  {
450  if (pFile[i]) fclose(pFile[i]); // write all the paths
451  pFile[i] = fopen(pTempFileName[i], "r"); // open the temp files to read
452  }
453 
454  /* create process_count - 1 child processes(please do not forget we always have the main process) */
455  mainPid = getpid(); // get main process id
456  myFork(process_count - 1, pFile); // spawn process_count - 1 chile processes and grab licenses through process_count processes
457  int status = 0;
458  pid_t wpid = 0;
459  if (mainPid == getpid())
460  {
461  /* wait all processes done. */
462  while(1){
463  wpid = wait(&status);
464  if (-1 == wpid) break;
465  }
466 
467  /* close the opening files, then delete the temp path files */
468  for(i = 0; i < process_count; i++)
469  {
470  if (pFile[i])
471  {
472  fclose(pFile[i]);
473  unlink(pTempFileName[i]);
474  }
475  }
476 
477  if (optionIsSet(OPTS_JSON_OUTPUT))
478  {
479  printf("]\n}\n");
480  destroyJson();
481  }
482 
483  /* free memeory */
484  free(pFile);
485  free(pTempFileName);
486  }
487  }
488  else {
489  if (0 != process_count)
490  {
491  printf("Warning: -n {nprocs} ONLY works with -d {directory}.\n");
492  }
493  if (optionIsSet(OPTS_JSON_OUTPUT))
494  {
495  initializeJson();
496  printf("{\n\"results\":[\n");
497  fflush(0);
498  }
499  for (i = 0; i < file_count; i++) {
500  initializeCurScan(&cur);
501  processFile(files_to_be_scanned[i]);
502  recordScanToDB(&cacheroot, &cur);
503  freeAndClearScan(&cur);
504  }
505  if (optionIsSet(OPTS_JSON_OUTPUT))
506  {
507  printf("]\n}\n");
508  destroyJson();
509  }
510  }
511  }
512 
513  lrcache_free(&cacheroot); // for valgrind
514 
515  /* Normal Exit */
516  Bail(0);
517 
518  /* this will never execute but prevents a compiler warning about reaching
519  the end of a non-void function */
520  return (0);
521 }
char BuildVersion[]
Definition: buckets.c:68
Usage()
Print Usage statement.
Definition: fo_dbcheck.php:63
void initializeJson()
Definition: json_writer.c:100
void destroyJson()
Definition: json_writer.c:109
FUNCTION int GetUploadPerm(PGconn *pgConn, long UploadPk, int user_pk)
Get users permission to this upload.
Definition: libfossagent.c:378
PGresult * checkDuplicateReq(PGconn *pgConn, int uploadPk, int agentPk)
Get the upload_pk and agent_pk to find out the agent has already scanned the package.
Definition: libfossagent.c:442
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
Definition: libfossagent.c:214
FUNCTION int fo_GetAgentKey(PGconn *pgConn, const char *agent_name, long Upload_pk, const char *rev, const char *agent_desc)
Get the latest enabled agent key (agent_pk) from the database.
Definition: libfossagent.c:158
PGresult * getSelectedPFiles(PGconn *pgConn, int uploadPk, int agentPk, bool ignoreFilesWithMimeType)
Get the upload_pk, agent_pk and ignoreFilesWithMimeType to get all the file Ids for nomos.
Definition: libfossagent.c:469
int fo_checkPQresult(PGconn *pgConn, PGresult *result, char *sql, char *FileID, int LineNumb)
Check the result status of a postgres SELECT.
Definition: libfossdb.c:170
#define PERM_WRITE
Read-Write permission.
Definition: libfossology.h:33
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
Definition: libfossrepo.c:352
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
int fo_scheduler_userID()
Gets the id of the user that created the job that the agent is running.
char * fo_scheduler_current()
Get the last read string from the scheduler.
char * fo_scheduler_next()
Get the next data to process from the scheduler.
void fo_scheduler_connect(int *argc, char **argv, PGconn **db_conn)
Establish a connection between an agent and the scheduler.
FUNCTION void lrcache_free(cacheroot_t *pcroot)
Free the hash table.
Definition: liccache.c:72
void licenseInit()
license initialization
Definition: licenses.c:70
fo_dbManager * fo_dbManager_new(PGconn *dbConnection)
Create and initialize new fo_dbManager object.
Definition: standalone.c:33
int isFILE(char *pathname)
Check if an inode is a file.
Definition: util.c:1340
licText_t licText[]
void arsNomos(cacheroot_t *cacheroot, bool ignoreFilesWithMimeType)
Make entry in ars table for audit.
Definition: nomos.c:60
int main(int argc, char **argv)
Definition: nomos.c:258
char debugStr[myBUFSIZ]
Debug string.
Definition: nomos.c:28
int schedulerMode
Definition: nomos.c:33
void read_file_grab_license(int file_number, FILE **pFile)
read line by line, then call processFile to grab license line by line
Definition: nomos.c:200
int Verbose
Verbose level.
Definition: nomos.c:34
size_t hashEntries
Hash entries.
Definition: nomos.c:31
char dbErrString[myBUFSIZ]
DB error string.
Definition: nomos.c:29
void myFork(int proc_num, FILE **pFile)
the recursive create process and process grabbing licenses
Definition: nomos.c:230
void list_dir(const char *dir_name, int process_count, int *distribute_count, FILE **pFile)
list all files and store file paths from the specified directory
Definition: nomos.c:142
Nomos header file.
#define NULL_STR
NULL string.
Definition: nomos.h:235
void Bail(int exitval)
Close connections and exit.
Definition: nomos_utils.c:533
int optionIsSet(int val)
Check if an CLI option is set.
Definition: nomos_utils.c:560
FUNCTION void freeAndClearScan(struct curScan *thisScan)
Clean-up all the per scan data structures, freeing any old data.
Definition: nomos_utils.c:941
sem_t * mutexJson
Mutex to handle JSON writes.
Definition: nomos_utils.c:21
FUNCTION void initializeCurScan(struct curScan *cur)
Initialize the scanner.
Definition: nomos_utils.c:926
FUNCTION int recordScanToDB(cacheroot_t *pcroot, struct curScan *scanRecord)
Write out the information about the scan to the FOSSology database.
Definition: nomos_utils.c:850
FUNCTION int initLicRefCache(cacheroot_t *pcroot)
build a cache the license ref db table.
Definition: nomos_utils.c:260
const char * upload_pk
Definition: sqlstatements.h:82
int maxnodes
No. of nodes in the list.
Definition: liccache.h:42
cachenode_t * nodes
Array of nodes.
Definition: liccache.h:43
Struct that tracks state related to current file being scanned.
Definition: nomos.h:391
char pFile[myBUFSIZ]
Definition: nomos.h:397
long pFileFk
Definition: nomos.h:396
int cliMode
Definition: nomos.h:412
Structure holding data truly global in that it remains consistent for each file scanned.
Definition: nomos.h:344
PGconn * pgConn
DB Connection.
Definition: nomos.h:362
int agentPk
Agent id.
Definition: nomos.h:359
fo_dbManager * dbManager
FOSSology DB manager.
Definition: nomos.h:363
int progOpts
CLI options.
Definition: nomos.h:347
int uPsize
Size.
Definition: nomos.h:349
char initwd[myBUFSIZ]
CDB, would like to workaround/eliminate.
Definition: nomos.h:345
int flags
Flags.
Definition: nomos.h:348
char progName[64]
Program name.
Definition: nomos.h:346
FUNCTION int processFile(PGconn *pgConn, pbucketdef_t bucketDefArray, puploadtree_t puploadtree, int agent_pk, int hasPrules)
Process a file.
Definition: walk.c:167