FOSSology  4.4.0
Open Source License Compliance by Open Source Software
regexscan-Stage3.c
Go to the documentation of this file.
1 /*
2  regexscan: Scan file(s) for regular expression(s)
3 
4  SPDX-FileCopyrightText: © 2013 Hewlett-Packard Development Company, L.P.
5 
6  SPDX-License-Identifier: GPL-2.0-only
7 */
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <unistd.h>
26 #include <string.h>
27 #include <ctype.h>
28 #include <signal.h>
29 #include <libgen.h>
30 
31 #include <regex.h>
32 #include <stdbool.h>
33 
34 #include "libfossology.h"
35 
36 #define MAXCMD 4096
37 char SQL[256];
38 
39 #define myBUFSIZ 2048
40 
41 /*
42 #ifdef COMMIT_HASH
43 char BuildVersion[]="Build version: " COMMIT_HASH ".\n";
44 #endif
45 */
46 
47 PGconn *pgConn = NULL;
48 
59 int regexScan(regex_t *regex, char *regexStr, FILE *scanFilePtr, char *fileName)
60 {
61  int retCode;
62 
63 // regex_t regex;
64  bool match = false; /* regex match found indicator */
65 
66  char msgBuff[250];
67  char textBuff[2000]; /* line buffer for regex match processing */
68 
69  regmatch_t rm[1];
70  int lineCount = 0;
71 
72  /* Now scan the file for regex line by line */
73  while (fgets(textBuff, 1024, scanFilePtr) != NULL)
74  {
75  lineCount++; /* Another line read */
76  retCode = regexec(regex, textBuff, 1, rm, 0); /* nmatch = 1, matchptr = rm */
77  if (!retCode)
78  {
79  sprintf(msgBuff, "%s: regex found at line %d at position %d. -> %.*s \n",
80  fileName, lineCount, rm[0].rm_so+1, rm[0].rm_eo-rm[0].rm_so, textBuff + rm[0].rm_so);
81  puts(msgBuff);
82  if (!match)
83  {
84  match = true; /* Indicate we've had at least one match */
85  }
86  }
87  else if (retCode == REG_NOMATCH)
88  {
89  /* Skip the "no match" retCode */
90  }
91  else
92  {
93  regerror(retCode, regex, msgBuff, sizeof(msgBuff));
94  fprintf(stderr, "Out of memory? - regex match failure: %s\n", msgBuff);
95  fclose(scanFilePtr);
96  return 3;
97  }
98  }
99 
100  /* Report if no matches found */
101  if (!match)
102  {
103  sprintf(msgBuff, "%s: %s not found\n", fileName, regexStr);
104  puts(msgBuff);
105  }
106 
107  /* clean up and exit */
108 // regfree(&regex);
109  fclose(scanFilePtr);
110  return 0;
111 }
112 
121 int pfileNumToNames(char *pfileNum, char *pfileRepoName, char *pfileRealName)
122 {
123  char sqlSelect[256];
124  PGresult *result;
125 
126  /* Attempt to locate the appropriate pFile_pk record */
127  sprintf(sqlSelect, "SELECT pfile_sha1, pfile_md5, pfile_size, ufile_name FROM pfile, uploadtree WHERE pfile_fk = pfile_pk and pfile_pk = '%s'", pfileNum);
128  result = PQexec(pgConn, sqlSelect);
129 
130  if (fo_checkPQresult(pgConn, result, sqlSelect, __FILE__, __LINE__)) return 0;
131 
132  /* confirm a sane result set */
133  if (PQntuples(result) == 0)
134  {
135  PQclear(result);
136 
137  /* Not found */
138  fprintf(stderr, "Database does not contain pfile_pk: %s\n", pfileNum);
139  return 1;
140  }
141  else if (PQntuples(result) != 1)
142  {
143  PQclear(result);
144 
145  /* Not found */
146  fprintf(stderr, "Database contains multiple pfile_pk: %s\n", pfileNum);
147  return 2;
148  }
149  /* We've managed to locate the one and only pfile_pk record. Build the filePath string */
150  /* Concatenate first row fields 0, 1 and 2 */
151  sprintf(pfileRepoName, "%s.%s.%s", PQgetvalue(result, 0, 0), PQgetvalue(result, 0, 1), PQgetvalue(result, 0, 2));
152  /* and extract the actual filename from field 4 - uploadtree.ufile_name */
153  sprintf(pfileRealName, "%s", PQgetvalue(result, 0, 3));
154 
155 // fprintf(stderr, "fileName is:%s\n", pFileName);
156  PQclear(result);
157  return 0;
158 }
159 
160 
169 int regexScanUpload(char *uploadNum, char *regexStr)
170 {
171  char sqlSelect[256];
172  PGresult *result, *pfileResult;
173 
174  int fileCount, i, retCode;
175 
176  char fileRealName[1000];
177  char fileRepoName[1000];
178 
179  FILE *scanFilePtr;
180 
181  regex_t regex;
182 
183  /* Ensure uploadNum is "valid" then obtain a list of pfile entries and scan them */
184  sprintf(sqlSelect, "SELECT upload_pk, upload_mode, upload_filename from upload where upload_pk = '%s'", uploadNum);
185  result = PQexec(pgConn, sqlSelect);
186 
187  if (fo_checkPQresult(pgConn, result, sqlSelect, __FILE__, __LINE__)) return 0;
188 
189  /* confirm a sane result set */
190  if (PQntuples(result) == 0)
191  {
192  fprintf(stderr, "No uploads appear to be available here!\n");
193  PQclear(result);
194  return 0; /* nothing found to scan */
195  }
196 
197  /* Next ensure that uploadNum was successfully uploaded */
198  /* We'll only look at upload_pk entries that have successfully run ununpack (64) and adj2nest (32) */
199  if ((atoi(PQgetvalue(result, 0, 1)) & 96) != 96)
200  {
201  fprintf(stderr, "Upload %s was not successfully processed after upload!\n", uploadNum);
202  PQclear(result);
203  return 0; /* nothing found to scan */
204  }
205 
206  /* Now get our list of required pfile entries for this upload */
207  sprintf(sqlSelect, "SELECT uploadtree.pfile_fk, ufile_name from uploadtree, upload"
208  " where upload_fk = upload_pk and uploadtree.pfile_fk <> 0 and ufile_mode = 32768 and upload_pk = '%s'", uploadNum);
209  result = PQexec(pgConn, sqlSelect);
210 
211  if (fo_checkPQresult(pgConn, result, sqlSelect, __FILE__, __LINE__)) return 0;
212 
213  fileCount = PQntuples(result);
214 // fprintf(stderr, "Located %d files to process.\n", fileCount);
215 
216  /* Compile the regex for improved performance */
217  retCode = regcomp(&regex, regexStr, REG_ICASE+REG_EXTENDED);
218  if (retCode)
219  {
220  fprintf(stderr, "regex %s failed to compile\n", regexStr);
221  return 1;
222  }
223 
224  /* Scan the files we've found for this upload */
225  for (i=0; i<fileCount; i++)
226  {
227  /* Attempt to locate the appropriate pFile_pk record */
228  sprintf(sqlSelect, "SELECT pfile_sha1, pfile_md5, pfile_size, ufile_name"
229  " FROM pfile, uploadtree WHERE pfile_fk = pfile_pk and pfile_pk = '%s'", PQgetvalue(result, i, 0));
230  pfileResult = PQexec(pgConn, sqlSelect);
231 
232  if (fo_checkPQresult(pgConn, pfileResult, sqlSelect, __FILE__, __LINE__)) return 0;
233 
234  /* confirm a sane result set */
235  if (PQntuples(pfileResult) == 1)
236  {
237  /* For each pfile value grind through the regex scan process */
238 
239  /* Locate and construct the appropriate full name from pfile table based upon pfile_pk value */
240  if (pfileNumToNames(PQgetvalue(result, i, 0), fileRepoName, fileRealName) != 0)
241  {
242  fprintf(stderr, "ERROR: Unable to locate pfile_pk '%s'\n", PQgetvalue(result, i, 0));
243  return 0;
244  }
245 
246  /* Use fo_RepFread() for access. It uses fo_RepMkPath() to map name to full path. */
247  scanFilePtr = fo_RepFread("files", fileRepoName);
248  if (!scanFilePtr)
249  {
250  fprintf(stderr, "ERROR: Unable to open '%s/%s'\n", "files", fileRepoName);
251  return 0;
252  }
253 
254  /* Call scan function. Note that we'll need to "Humanize" the fileName at some point. */
255  regexScan(&regex, regexStr, scanFilePtr, fileRealName);
256  }
257  else
258  {
259  fprintf(stderr, "WARNING: File: %s - Located %d instances of pfile_pk %s ! Size = %s bytes!\n",
260  PQgetvalue(result, i, 1), PQntuples(pfileResult), PQgetvalue(result, i, 0), PQgetvalue(pfileResult, i, 2));
261  }
262  }
263  /* return the number of scanned files */
264  return i;
265 }
266 
267 
272 void Usage (char *Name)
273 {
274  printf("Usage: %s [options] [id [id ...]]\n",Name);
275  printf(" -i :: initialize the database, then exit.\n");
276  printf(" -c SYSCONFDIR :: FOSSology configuration directory.\n");
277  printf(" -h :: show available command line options.\n");
278  printf(" -v :: increase agent logging verbosity.\n");
279  printf(" -r :: regex expression to load from command line.\n");
280  printf(" filename :: filename to process with regex.\n");
281 } /* Usage() */
282 
283 /*********************************************************/
284 int main (int argc, char *argv[])
285 {
286  int nonoptargs;
287  int c, retCode;
288 
289  regex_t regex;
290 
291  char regexStr[1024]; /* string storage for the regex expression */
292  bool regexSet = false;
293 
294  char fileName[1000];
295  FILE *scanFilePtr;
296 
297  char uploadNum[10];
298 
299  int scannedCount = 0;
300 
301  int user_pk;
302  long UploadPK=-1;
303 
304  char *COMMIT_HASH;
305  char *VERSION;
306  char agent_rev[myBUFSIZ];
307 
308  /* connect to scheduler. Noop if not run from scheduler. */
309  fo_scheduler_connect(&argc, argv, &pgConn);
310 
311 /*
312  Version reporting.
313 */
314  COMMIT_HASH = fo_sysconfig("regexscan", "COMMIT_HASH");
315  VERSION = fo_sysconfig("regexscan", "VERSION");
316  sprintf(agent_rev, "%s.%s", VERSION, COMMIT_HASH);
317 #ifdef REGEX_DEBUG
318  fprintf(stdout, "regexscan reports version info as '%s.%s'.\n", VERSION, COMMIT_HASH);
319 #endif
320 
321  /* Process command-line */
322  while((c = getopt(argc,argv,"chir:v")) != -1)
323  {
324  switch(c)
325  {
326  case 'c':
327  break; /* handled by fo_scheduler_connect() */
328  case 'i':
329  PQfinish(pgConn);
330  return(0);
331  case 'r':
332  sprintf(regexStr, "%s", optarg);
333  regexSet = true;
334  break;
335  case 'v':
336  agent_verbose++;
337  break;
338  case 'h':
339  default:
340  Usage(argv[0]);
341  fflush(stdout);
342  PQfinish(pgConn);
343  exit(-1);
344  }
345  }
346 
347  /* Sanity check for regex value required here. */
348  if (!regexSet)
349  {
350  fprintf (stderr, "No regex value has been requested!\n");
351  PQfinish(pgConn);
353  return 1;
354  }
355 
356  /* process filename after switches. How many non-option arguments are there ? */
357  nonoptargs = argc - optind; /* total argument count minus the option count */
358 
359  if (nonoptargs == 0)
360  {
361  /* Assume it was a scheduler call */
362  user_pk = fo_scheduler_userID();
363 
364  while(fo_scheduler_next())
365  {
366  UploadPK = atol(fo_scheduler_current());
367 
368  printf("UploadPK is: %ld\n", UploadPK);
369  sprintf(uploadNum, "%ld", UploadPK);
370  scannedCount = regexScanUpload(uploadNum, regexStr);
371  if (scannedCount == 0)
372  {
373  fprintf(stderr, "Failed to successfully scan: upload - %s!\n", uploadNum);
374  }
375  }
376  }
377  else
378  {
379  /* File access initialization - For Stage 3 use first arg as fileName */
380  sprintf(fileName, "%s", argv[optind]); /* Grab first non-switch argument as filename */
381 
382  scanFilePtr = fopen(fileName, "r");
383  if (!scanFilePtr)
384  {
385  fprintf(stderr, "ERROR: Unable to open '%s'\n", fileName);
386  PQfinish(pgConn);
388  }
389 
390  /* Compile the regex for improved performance */
391  retCode = regcomp(&regex, regexStr, REG_ICASE+REG_EXTENDED);
392  if (retCode)
393  {
394  fprintf(stderr, "regex %s failed to compile\n", regexStr);
395  PQfinish(pgConn);
397  }
398 
399  /* Now call the function that scans a file for a regex */
400  retCode = regexScan(&regex, (char *)regexStr, scanFilePtr, (char *)fileName);
401 // retCode = regexScan(uploadNum, regexStr);
402  if (retCode != 0)
403  {
404  fprintf(stderr, "Failed to successfully scan: %s!\n", fileName);
405  }
406 
407  }
408 
409  PQfinish(pgConn);
411 
412  return 0;
413 } /* main() */
414 
int fo_checkPQresult(PGconn *pgConn, PGresult *result, char *sql, char *FileID, int LineNumb)
Check the result status of a postgres SELECT.
Definition: libfossdb.c:170
The main FOSSology C library.
FILE * fo_RepFread(char *Type, char *Filename)
Perform an fopen for reading only.
Definition: libfossrepo.c:613
void fo_scheduler_disconnect(int retcode)
Disconnect the scheduler connection.
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
int agent_verbose
Common verbose flags for the agents, this is used so that the scheduler can change the verbose level ...
int fo_scheduler_userID()
Gets the id of the user that created the job that the agent is running.
char * fo_scheduler_current()
Get the last read string from the scheduler.
char * fo_scheduler_next()
Get the next data to process from the scheduler.
void fo_scheduler_connect(int *argc, char **argv, PGconn **db_conn)
Establish a connection between an agent and the scheduler.
void Usage(char *Name)
Usage description for this regexscan agent.
int regexScanUpload(char *uploadNum, char *regexStr)
Scan an Upload for a regex - regular expression. gets a list of files in an upload and calls regexS...
int regexScan(regex_t *regex, char *regexStr, FILE *scanFilePtr, char *fileName)
Scan a file for a regex - regular expression. the regex is compiled in this function for performanc...
char SQL[256]
For DB.
PGconn * pgConn
Database connection.
int pfileNumToNames(char *pfileNum, char *pfileRepoName, char *pfileRealName)
Creates filenames from pfile_pk value.
Store the results of a regex match.
Definition: scanners.hpp:28