FOSSology  4.6.0
Open Source License Compliance by Open Source Software
copyrightUtils.cc
Go to the documentation of this file.
1 /*
2  SPDX-FileCopyrightText: © 2014-2018,2022, Siemens AG
3  Author: Daniele Fognini, Johannes Najjar
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
12 #include "copyrightUtils.hpp"
13 #include <boost/program_options.hpp>
14 
15 #include <iostream>
16 #include <sstream>
17 
18 using namespace std;
19 
25 int queryAgentId(PGconn* dbConn)
26 {
27  char* COMMIT_HASH = fo_sysconfig(AGENT_NAME, "COMMIT_HASH");
28  char* VERSION = fo_sysconfig(AGENT_NAME, "VERSION");
29  char* agentRevision;
30  if (!asprintf(&agentRevision, "%s.%s", VERSION, COMMIT_HASH))
31  {
32  exit(-1);
33  };
34 
35  int agentId = fo_GetAgentKey(dbConn,
36  AGENT_NAME, 0, agentRevision, AGENT_DESC);
37  free(agentRevision);
38 
39  if (agentId > 0)
40  {
41  return agentId;
42  }
43  else
44  {
45  exit(1);
46  }
47 }
48 
53 int writeARS(int agentId, int arsId, int uploadId, int success, const fo::DbManager& dbManager)
54 {
55  return fo_WriteARS(dbManager.getConnection(), arsId, uploadId, agentId, AGENT_ARS, NULL, success);
56 }
57 
62 void bail(int exitval)
63 {
64  fo_scheduler_disconnect(exitval);
65  exit(exitval);
66 }
67 
78 bool parseCliOptions(int argc, char** argv, CliOptions& dest,
79  std::vector<std::string>& fileNames, std::string& directoryToScan)
80 {
81  unsigned type = 0;
82 
83  boost::program_options::options_description desc(IDENTITY ": recognized options");
84  desc.add_options()
85  ("help,h", "shows help")
86  (
87  "type,T",
88  boost::program_options::value<unsigned>(&type)
89  ->default_value(ALL_TYPES),
90  "type of regex to try"
91  ) // TODO change and add help based on IDENTITY
92  (
93  "verbose,v", "increase verbosity"
94  )
95  (
96  "regex",
97  boost::program_options::value<vector<string> >(),
98  "user defined Regex to search: [{name=cli}@@][{matchingGroup=0}@@]{regex} e.g. 'linux@@1@@(linus) torvalds'"
99  )
100  (
101  "files",
102  boost::program_options::value< vector<string> >(),
103  "files to scan"
104  )
105  (
106  "json,J", "output JSON"
107  )
108  (
109  "ignoreFilesWithMimeType,I", "ignoreFilesWithMimeType"
110  )
111  (
112  "config,c", boost::program_options::value<string>(), "path to the sysconfigdir"
113  )
114  (
115  "scheduler_start", "specifies, that the command was called by the scheduler"
116  )
117  (
118  "userID", boost::program_options::value<int>(), "the id of the user that created the job (only in combination with --scheduler_start)"
119  )
120  (
121  "groupID", boost::program_options::value<int>(), "the id of the group of the user that created the job (only in combination with --scheduler_start)"
122  )
123  (
124  "jobId", boost::program_options::value<int>(), "the id of the job (only in combination with --scheduler_start)"
125  )
126  (
127  "directory,d", boost::program_options::value<string>(), "directory to scan (recursive)"
128  )
129  ;
130 
131  boost::program_options::positional_options_description p;
132  p.add("files", -1);
133 
134  boost::program_options::variables_map vm;
135 
136  try
137  {
138  boost::program_options::store(
139  boost::program_options::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
140 
141  type = vm["type"].as<unsigned>();
142 
143  if ((vm.count("help") > 0) || (type > ALL_TYPES))
144  {
145  cout << desc << endl;
146  exit(0);
147  }
148 
149  if (vm.count("files"))
150  {
151  fileNames = vm["files"].as<std::vector<string> >();
152  }
153 
154  unsigned long verbosity = vm.count("verbose");
155  bool json = vm.count("json") > 0 ? true : false;
156  bool ignoreFilesWithMimeType = vm.count("ignoreFilesWithMimeType") > 0 ? true : false;
157 
158  dest = CliOptions(verbosity, type, json, ignoreFilesWithMimeType);
159 
160  if (vm.count("regex"))
161  {
162  const std::vector<std::string>& userRegexesFmts = vm["regex"].as<vector<std::string> >();
163  for (auto it = userRegexesFmts.begin(); it != userRegexesFmts.end(); ++it) {
164  scanner* sc = makeRegexScanner(*it, "cli");
165  if (!(sc))
166  {
167  cout << "cannot parse regex format : " << *it << endl;
168  return false;
169  }
170  else
171  {
172  dest.addScanner(sc);
173  }
174  }
175  }
176 
177  if (vm.count("directory"))
178  {
179  if (vm.count("files"))
180  {
181  cout << "cannot pass files and directory at the same time" << endl;
182  cout << desc << endl;
183  fileNames.clear();
184  return false;
185  }
186  directoryToScan = vm["directory"].as<std::string>();
187  }
188 
189  return true;
190  }
191  catch (boost::bad_any_cast&) {
192  cout << "wrong parameter type" << endl;
193  cout << desc << endl;
194  return false;
195  }
196  catch (boost::program_options::error&)
197  {
198  cout << "wrong command line arguments" << endl;
199  cout << desc << endl;
200  return false;
201  }
202 }
203 
209 {
210  unsigned types = state.getCliOptions().getOptType();
211 #ifdef IDENTITY_COPYRIGHT
212  if (types & 1<<0)
213  //state.addMatcher(RegexMatcher(regCopyright::getType(), regCopyright::getRegex()));
214  state.addScanner(new hCopyrightScanner());
215 
216  if (types & 1<<1)
217  state.addScanner(new regexScanner("url", "copyright"));
218 
219  if (types & 1<<2)
220  state.addScanner(new regexScanner("email", "copyright", 1));
221 
222  if (types & 1<<3)
223  state.addScanner(new regexScanner("author", "copyright"));
224 #endif
225 
226 #ifdef IDENTITY_IPRA
227  if (types & 1<<0)
228  state.addScanner(new regexScanner("ipra", "ipra"));
229 #endif
230 
231 #ifdef IDENTITY_ECC
232  if (types & 1<<0)
233  state.addScanner(new regexScanner("ecc", "ecc"));
234 #endif
235 
236 #ifdef IDENTITY_KW
237  if (types & 1<<0)
238  state.addScanner(new regexScanner("keyword", "keyword"));
239 #endif
240 }
241 
248 scanner* makeRegexScanner(const std::string& regexDesc, const std::string& defaultType) {
249  #define RGX_FMT_SEPARATOR "@@"
250  auto fmtRegex = rx::regex(
251  "(?:([[:alpha:]]+)" RGX_FMT_SEPARATOR ")?(?:([[:digit:]]+)" RGX_FMT_SEPARATOR ")?(.*)",
252  rx::regex_constants::icase
253  );
254 
255  rx::match_results<std::string::const_iterator> match;
256  if (rx::regex_match(regexDesc.begin(), regexDesc.end(), match, fmtRegex))
257  {
258  std::string type(match.length(1) > 0 ? match.str(1) : defaultType.c_str());
259 
260  int regId = match.length(2) > 0 ? std::stoi(std::string(match.str(2))) : 0;
261 
262  if (match.length(3) == 0)
263  return 0; // nullptr
264 
265  std::istringstream stream;
266  stream.str(type + "=" + match.str(3));
267  return new regexScanner(type, stream, regId);
268  }
269  return 0; // nullptr
270 }
271 
280 {
281  CopyrightState state(std::move(cliOptions));
282  addDefaultScanners(state);
283 
284  return state;
285 }
286 
298 bool saveToDatabase(const string& s, const list<match>& matches, unsigned long pFileId,
299  int agentId, const CopyrightDatabaseHandler& copyrightDatabaseHandler,
300  int uploadId, const string& uploadTreeTableName)
301 {
302  if (!copyrightDatabaseHandler.begin())
303  {
304  return false;
305  }
306 
307  size_t count = 0;
308  for (auto m = matches.begin(); m != matches.end(); ++m)
309  {
310 
311  DatabaseEntry entry;
312  entry.agent_fk = agentId;
313  entry.content = cleanMatch(s, *m);
314  entry.copy_endbyte = m->end;
315  entry.copy_startbyte = m->start;
316  entry.pfile_fk = pFileId;
317  entry.type = m->type;
318  entry.is_enabled = m->is_enabled;
319 
320  if (entry.content.length() != 0)
321  {
322  ++count;
323  if (!copyrightDatabaseHandler.insertInDatabase(entry))
324  {
325  copyrightDatabaseHandler.rollback();
326  return false;
327  }
328  if (!entry.is_enabled)
329  {
330  // Copyright was removed by a cleanup rule: create a copyright_event row
331  // with is_enabled=false (the column default) so the UI shows it in the
332  // deactivated section automatically.
333  if (!copyrightDatabaseHandler.insertDeactivatedEvents(entry, uploadId, uploadTreeTableName))
334  {
335  copyrightDatabaseHandler.rollback();
336  return false;
337  }
338  }
339  }
340  }
341 
342  return copyrightDatabaseHandler.commit();
343 }
344 
355 void matchFileWithLicenses(const string& sContent, unsigned long pFileId,
356  CopyrightState const& state, int agentId, CopyrightDatabaseHandler& databaseHandler,
357  int uploadId, const string& uploadTreeTableName)
358 {
359  list<match> l;
360  const list<unptr::shared_ptr<scanner>>& scanners = state.getScanners();
361  for (auto sc = scanners.begin(); sc != scanners.end(); ++sc)
362  {
363  (*sc)->ScanString(sContent, l);
364  }
365  saveToDatabase(sContent, l, pFileId, agentId, databaseHandler, uploadId, uploadTreeTableName);
366 }
367 
383 void matchPFileWithLicenses(CopyrightState const& state, int agentId,
384  unsigned long pFileId, CopyrightDatabaseHandler& databaseHandler,
385  int uploadId, const string& uploadTreeTableName)
386 {
387  char* pFile = databaseHandler.getPFileNameForFileId(pFileId);
388 
389  if (!pFile)
390  {
391  cout << "File not found " << pFileId << endl;
392  bail(8);
393  }
394 
395  char* fileName = NULL;
396  {
397 #pragma omp critical (repo_mk_path)
398  fileName = fo_RepMkPath("files", pFile);
399  }
400  if (fileName)
401  {
402  string s;
403  ReadFileToString(fileName, s);
404 
405  matchFileWithLicenses(s, pFileId, state, agentId, databaseHandler, uploadId, uploadTreeTableName);
406 
407  free(fileName);
408  free(pFile);
409  }
410  else
411  {
412  cout << "PFile not found in repo " << pFileId << endl;
413  bail(7);
414  }
415 }
416 
429 bool processUploadId(const CopyrightState& state, int agentId, int uploadId, CopyrightDatabaseHandler& databaseHandler, bool ignoreFilesWithMimeType)
430 {
431  vector<unsigned long> fileIds = databaseHandler.queryFileIdsForUpload(agentId, uploadId, ignoreFilesWithMimeType);
432  string uploadTreeTableName = databaseHandler.queryUploadTreeTableName(uploadId);
433 
434 #pragma omp parallel num_threads(THREADS)
435  {
436  CopyrightDatabaseHandler threadLocalDatabaseHandler(databaseHandler.spawn());
437 
438  size_t pFileCount = fileIds.size();
439 #pragma omp for
440  for (size_t it = 0; it < pFileCount; ++it)
441  {
442  unsigned long pFileId = fileIds[it];
443 
444  if (pFileId == 0)
445  {
446  continue;
447  }
448 
449  matchPFileWithLicenses(state, agentId, pFileId, threadLocalDatabaseHandler, uploadId, uploadTreeTableName);
450 
452  }
453  }
454 
455  return true;
456 }
457 
464 pair<string, list<match>> processSingleFile(const CopyrightState& state,
465  const string fileName)
466 {
467  const list<unptr::shared_ptr<scanner>>& scanners = state.getScanners();
468  list<match> matchList;
469 
470  // Read file into one string
471  string s;
472  if (!ReadFileToString(fileName, s))
473  {
474  // File error
475  s = "";
476  }
477  else
478  {
479  for (auto sc = scanners.begin(); sc != scanners.end(); ++sc)
480  {
481  (*sc)->ScanString(s, matchList);
482  }
483  }
484  return make_pair(s, matchList);
485 }
486 
494 void appendToJson(const std::string fileName,
495  const std::pair<string, list<match>> resultPair, bool &printComma)
496 {
497  Json::Value result;
498 #if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16))
499  // Use FastWriter for versions below 1.4.0
500  Json::FastWriter jsonWriter;
501 #else
502  // Since version 1.4.0, FastWriter is deprecated and replaced with
503  // StreamWriterBuilder
504  Json::StreamWriterBuilder jsonWriter;
505  jsonWriter["commentStyle"] = "None";
506  jsonWriter["indentation"] = "";
507 #endif
508 
509  if (resultPair.first.empty())
510  {
511  result["file"] = fileName;
512  result["results"] = "Unable to read file";
513  }
514  else
515  {
516  list<match> resultList = resultPair.second;
517  Json::Value results;
518  for (auto m : resultList)
519  {
520  Json::Value j;
521  j["start"] = m.start;
522  j["end"] = m.end;
523  j["type"] = m.type;
524  j["content"] = cleanMatch(resultPair.first, m);
525  results.append(j);
526  }
527  result["file"] = fileName;
528  result["results"] = results;
529  }
530  // Thread-Safety: output all matches JSON at once to STDOUT
531 #pragma omp critical (jsonPrinter)
532  {
533  if (printComma)
534  {
535  cout << "," << endl;
536  }
537  else
538  {
539  printComma = true;
540  }
541  string jsonString;
542 #if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16))
543  // For version below 1.4.0, every writer append `\n` at end.
544  // Find and replace it.
545  jsonString = jsonWriter.write(result);
546  jsonString.replace(jsonString.find("\n"), string("\n").length(), "");
547 #else
548  // For version >= 1.4.0, \n is not appended.
549  jsonString = Json::writeString(jsonWriter, result);
550 #endif
551  cout << " " << jsonString << flush;
552  }
553 }
554 
560 void printResultToStdout(const std::string fileName,
561  const std::pair<string, list<match>> resultPair)
562 {
563  if (resultPair.first.empty())
564  {
565  cout << fileName << " :: Unable to read file" << endl;
566  return;
567  }
568  stringstream ss;
569  ss << fileName << " ::" << endl;
570  // Output matches
571  list<match> resultList = resultPair.second;
572  for (auto m = resultList.begin(); m != resultList.end(); ++m)
573  {
574  ss << "\t[" << m->start << ':' << m->end << ':' << m->type << "] '"
575  << cleanMatch(resultPair.first, *m)
576  << "'" << endl;
577  }
578  // Thread-Safety: output all matches (collected in ss) at once to cout
579  cout << ss.str();
580 }
Store the options sent through the CLI.
unsigned int getOptType() const
Get the opt type set by CliOptions.
Manages database related requests for agent.
Definition: database.hpp:54
std::vector< unsigned long > queryFileIdsForUpload(int agentId, int uploadId, bool ignoreFilesWithMimeType)
Get the list of pfile ids on which the given agent has no findings for a given upload.
Definition: database.cc:307
bool insertInDatabase(DatabaseEntry &entry) const
Insert a finding in database.
Definition: database.cc:354
CopyrightDatabaseHandler spawn() const
Spawn/fork a new database handler and return it.
Definition: database.cc:42
bool insertDeactivatedEvents(const DatabaseEntry &entry, int uploadId, const std::string &uploadTreeTableName) const
Insert one deactivated finding per uploadtree node for a given upload, for a given finding.
Definition: database.cc:387
Holds information about state of one agent.
const CliOptions & getCliOptions() const
Get the CliOptions set by user.
void addScanner(scanner *scanner)
Add scanner to state.
const std::list< unptr::shared_ptr< scanner > > & getScanners() const
Get available scanner s.
Maps agent data to database schema.
Definition: database.hpp:25
int copy_startbyte
Definition: database.hpp:44
bool is_enabled
Definition: database.hpp:46
std::string content
Definition: database.hpp:31
std::string type
Type of statement found.
Definition: database.hpp:43
int copy_endbyte
Definition: database.hpp:45
std::string queryUploadTreeTableName(int uploadId)
Get the upload tree table name for a given upload id.
bool commit() const
COMMIT a transaction block in DB.
bool begin() const
BEGIN a transaction block in DB.
char * getPFileNameForFileId(unsigned long pfileId) const
Get the file name of a give pfile id.
bool rollback() const
ROLLBACK a transaction block in DB.
DB wrapper for agents.
Implementation of scanner class for copyright.
Definition: copyscan.hpp:39
Provides a regex scanner using predefined regexs.
Definition: regscan.hpp:21
Abstract class to provide interface to scanners.
Definition: scanners.hpp:59
scanner * makeRegexScanner(const std::string &regexDesc, const std::string &defaultType)
Make a boost regex scanner object based on regex desc and type.
bool parseCliOptions(int argc, char **argv, CliOptions &dest, std::vector< std::string > &fileNames, std::string &directoryToScan)
Parse the options sent by CLI to CliOptions object.
pair< string, list< match > > processSingleFile(const CopyrightState &state, const string fileName)
bool saveToDatabase(const string &s, const list< match > &matches, unsigned long pFileId, int agentId, const CopyrightDatabaseHandler &copyrightDatabaseHandler, int uploadId, const string &uploadTreeTableName)
Save findings to the database if agent was called by scheduler.
int queryAgentId(PGconn *dbConn)
Get agent id, exit if agent id is incorrect.
static void addDefaultScanners(CopyrightState &state)
Add default scanners to the agent state.
int writeARS(int agentId, int arsId, int uploadId, int success, const fo::DbManager &dbManager)
Call C function fo_WriteARS() and translate the arguments.
void matchFileWithLicenses(const string &sContent, unsigned long pFileId, CopyrightState const &state, int agentId, CopyrightDatabaseHandler &databaseHandler, int uploadId, const string &uploadTreeTableName)
Scan a given file with all available scanners and save findings to database.
CopyrightState getState(CliOptions &&cliOptions)
Create a new state for the current agent based on CliOptions.
bool processUploadId(const CopyrightState &state, int agentId, int uploadId, CopyrightDatabaseHandler &databaseHandler, bool ignoreFilesWithMimeType)
Process a given upload id, scan from statements and add to database.
void appendToJson(const std::string fileName, const std::pair< string, list< match >> resultPair, bool &printComma)
void printResultToStdout(const std::string fileName, const std::pair< string, list< match >> resultPair)
void bail(int exitval)
Disconnect with scheduler returning an error code and exit.
void matchPFileWithLicenses(CopyrightState const &state, int agentId, unsigned long pFileId, CopyrightDatabaseHandler &databaseHandler, int uploadId, const string &uploadTreeTableName)
Get the file contents, scan for statements and save findings to database.
int s
The socket that the CLI will use to communicate.
Definition: fo_cli.c:37
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
Definition: libfossagent.c:214
FUNCTION int fo_GetAgentKey(PGconn *pgConn, const char *agent_name, long Upload_pk, const char *rev, const char *agent_desc)
Get the latest enabled agent key (agent_pk) from the database.
Definition: libfossagent.c:158
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
Definition: libfossrepo.c:352
void fo_scheduler_disconnect(int retcode)
Disconnect the scheduler connection.
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:16
bool ReadFileToString(const string &fileName, string &out)
Utility: read file to string from scanners.h.
Definition: scanners.cc:21
Store the results of a regex match.
Definition: scanners.hpp:28