1 /*
2  SPDX-FileCopyrightText: © 2014-2018,2022, Siemens AG
3  Author: Daniele Fognini, Johannes Najjar
5  SPDX-License-Identifier: GPL-2.0-only
6 */
12 #include "copyrightUtils.hpp"
13 #include <boost/program_options.hpp>
15 #include <iostream>
16 #include <sstream>
18 using namespace std;
25 int queryAgentId(PGconn* dbConn)
26 {
27  char* COMMIT_HASH = fo_sysconfig(AGENT_NAME, "COMMIT_HASH");
28  char* VERSION = fo_sysconfig(AGENT_NAME, "VERSION");
29  char* agentRevision;
30  if (!asprintf(&agentRevision, "%s.%s", VERSION, COMMIT_HASH))
31  {
32  exit(-1);
33  };
35  int agentId = fo_GetAgentKey(dbConn,
36  AGENT_NAME, 0, agentRevision, AGENT_DESC);
37  free(agentRevision);
39  if (agentId > 0)
40  {
41  return agentId;
42  }
43  else
44  {
45  exit(1);
46  }
47 }
53 int writeARS(int agentId, int arsId, int uploadId, int success, const fo::DbManager& dbManager)
54 {
55  return fo_WriteARS(dbManager.getConnection(), arsId, uploadId, agentId, AGENT_ARS, NULL, success);
56 }
62 void bail(int exitval)
63 {
64  fo_scheduler_disconnect(exitval);
65  exit(exitval);
66 }
78 bool parseCliOptions(int argc, char** argv, CliOptions& dest,
79  std::vector<std::string>& fileNames, std::string& directoryToScan)
80 {
81  unsigned type = 0;
83  boost::program_options::options_description desc(IDENTITY ": recognized options");
84  desc.add_options()
85  ("help,h", "shows help")
86  (
87  "type,T",
88  boost::program_options::value<unsigned>(&type)
89  ->default_value(ALL_TYPES),
90  "type of regex to try"
91  ) // TODO change and add help based on IDENTITY
92  (
93  "verbose,v", "increase verbosity"
94  )
95  (
96  "regex",
97  boost::program_options::value<vector<string> >(),
98  "user defined Regex to search: [{name=cli}@@][{matchingGroup=0}@@]{regex} e.g. 'linux@@1@@(linus) torvalds'"
99  )
100  (
101  "files",
102  boost::program_options::value< vector<string> >(),
103  "files to scan"
104  )
105  (
106  "json,J", "output JSON"
107  )
108  (
109  "ignoreFilesWithMimeType,I", "ignoreFilesWithMimeType"
110  )
111  (
112  "config,c", boost::program_options::value<string>(), "path to the sysconfigdir"
113  )
114  (
115  "scheduler_start", "specifies, that the command was called by the scheduler"
116  )
117  (
118  "userID", boost::program_options::value<int>(), "the id of the user that created the job (only in combination with --scheduler_start)"
119  )
120  (
121  "groupID", boost::program_options::value<int>(), "the id of the group of the user that created the job (only in combination with --scheduler_start)"
122  )
123  (
124  "jobId", boost::program_options::value<int>(), "the id of the job (only in combination with --scheduler_start)"
125  )
126  (
127  "directory,d", boost::program_options::value<string>(), "directory to scan (recursive)"
128  )
129  ;
131  boost::program_options::positional_options_description p;
132  p.add("files", -1);
134  boost::program_options::variables_map vm;
136  try
137  {
138  boost::program_options::store(
139  boost::program_options::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
141  type = vm["type"].as<unsigned>();
143  if ((vm.count("help") > 0) || (type > ALL_TYPES))
144  {
145  cout << desc << endl;
146  exit(0);
147  }
149  if (vm.count("files"))
150  {
151  fileNames = vm["files"].as<std::vector<string> >();
152  }
154  unsigned long verbosity = vm.count("verbose");
155  bool json = vm.count("json") > 0 ? true : false;
156  bool ignoreFilesWithMimeType = vm.count("ignoreFilesWithMimeType") > 0 ? true : false;
158  dest = CliOptions(verbosity, type, json, ignoreFilesWithMimeType);
160  if (vm.count("regex"))
161  {
162  const std::vector<std::string>& userRegexesFmts = vm["regex"].as<vector<std::string> >();
163  for (auto it = userRegexesFmts.begin(); it != userRegexesFmts.end(); ++it) {
164  scanner* sc = makeRegexScanner(*it, "cli");
165  if (!(sc))
166  {
167  cout << "cannot parse regex format : " << *it << endl;
168  return false;
169  }
170  else
171  {
172  dest.addScanner(sc);
173  }
174  }
175  }
177  if (vm.count("directory"))
178  {
179  if (vm.count("files"))
180  {
181  cout << "cannot pass files and directory at the same time" << endl;
182  cout << desc << endl;
183  fileNames.clear();
184  return false;
185  }
186  directoryToScan = vm["directory"].as<std::string>();
187  }
189  return true;
190  }
191  catch (boost::bad_any_cast&) {
192  cout << "wrong parameter type" << endl;
193  cout << desc << endl;
194  return false;
195  }
196  catch (boost::program_options::error&)
197  {
198  cout << "wrong command line arguments" << endl;
199  cout << desc << endl;
200  return false;
201  }
202 }
209 {
210  unsigned types = state.getCliOptions().getOptType();
212  if (types & 1<<0)
213  //state.addMatcher(RegexMatcher(regCopyright::getType(), regCopyright::getRegex()));
214  state.addScanner(new hCopyrightScanner());
216  if (types & 1<<1)
217  state.addScanner(new regexScanner("url", "copyright"));
219  if (types & 1<<2)
220  state.addScanner(new regexScanner("email", "copyright", 1));
222  if (types & 1<<3)
223  state.addScanner(new regexScanner("author", "copyright"));
224 #endif
226 #ifdef IDENTITY_IPRA
227  if (types & 1<<0)
228  state.addScanner(new regexScanner("ipra", "ipra"));
229 #endif
231 #ifdef IDENTITY_ECC
232  if (types & 1<<0)
233  state.addScanner(new regexScanner("ecc", "ecc"));
234 #endif
236 #ifdef IDENTITY_KW
237  if (types & 1<<0)
238  state.addScanner(new regexScanner("keyword", "keyword"));
239 #endif
240 }
248 scanner* makeRegexScanner(const std::string& regexDesc, const std::string& defaultType) {
249  #define RGX_FMT_SEPARATOR "@@"
250  auto fmtRegex = rx::regex(
251  "(?:([[:alpha:]]+)" RGX_FMT_SEPARATOR ")?(?:([[:digit:]]+)" RGX_FMT_SEPARATOR ")?(.*)",
252  rx::regex_constants::icase
253  );
255  rx::match_results<std::string::const_iterator> match;
256  if (rx::regex_match(regexDesc.begin(), regexDesc.end(), match, fmtRegex))
257  {
258  std::string type(match.length(1) > 0 ? match.str(1) : defaultType.c_str());
260  int regId = match.length(2) > 0 ? std::stoi(std::string(match.str(2))) : 0;
262  if (match.length(3) == 0)
263  return 0; // nullptr
265  std::istringstream stream;
266  stream.str(type + "=" + match.str(3));
267  return new regexScanner(type, stream, regId);
268  }
269  return 0; // nullptr
270 }
280 {
281  CopyrightState state(std::move(cliOptions));
282  addDefaultScanners(state);
284  return state;
285 }
296 bool saveToDatabase(const string& s, const list<match>& matches, unsigned long pFileId, int agentId, const CopyrightDatabaseHandler& copyrightDatabaseHandler)
297 {
298  if (!copyrightDatabaseHandler.begin())
299  {
300  return false;
301  }
303  size_t count = 0;
304  for (auto m = matches.begin(); m != matches.end(); ++m)
305  {
307  DatabaseEntry entry;
308  entry.agent_fk = agentId;
309  entry.content = cleanMatch(s, *m);
310  entry.copy_endbyte = m->end;
311  entry.copy_startbyte = m->start;
312  entry.pfile_fk = pFileId;
313  entry.type = m->type;
315  if (entry.content.length() != 0)
316  {
317  ++count;
318  if (!copyrightDatabaseHandler.insertInDatabase(entry))
319  {
320  copyrightDatabaseHandler.rollback();
321  return false;
322  };
323  }
324  }
326  return copyrightDatabaseHandler.commit();
327 }
337 void matchFileWithLicenses(const string& sContent, unsigned long pFileId, CopyrightState const& state, int agentId, CopyrightDatabaseHandler& databaseHandler)
338 {
339  list<match> l;
340  const list<unptr::shared_ptr<scanner>>& scanners = state.getScanners();
341  for (auto sc = scanners.begin(); sc != scanners.end(); ++sc)
342  {
343  (*sc)->ScanString(sContent, l);
344  }
345  saveToDatabase(sContent, l, pFileId, agentId, databaseHandler);
346 }
361 void matchPFileWithLicenses(CopyrightState const& state, int agentId, unsigned long pFileId, CopyrightDatabaseHandler& databaseHandler)
362 {
363  char* pFile = databaseHandler.getPFileNameForFileId(pFileId);
365  if (!pFile)
366  {
367  cout << "File not found " << pFileId << endl;
368  bail(8);
369  }
371  char* fileName = NULL;
372  {
373 #pragma omp critical (repo_mk_path)
374  fileName = fo_RepMkPath("files", pFile);
375  }
376  if (fileName)
377  {
378  string s;
379  ReadFileToString(fileName, s);
381  matchFileWithLicenses(s, pFileId, state, agentId, databaseHandler);
383  free(fileName);
384  free(pFile);
385  }
386  else
387  {
388  cout << "PFile not found in repo " << pFileId << endl;
389  bail(7);
390  }
391 }
405 bool processUploadId(const CopyrightState& state, int agentId, int uploadId, CopyrightDatabaseHandler& databaseHandler, bool ignoreFilesWithMimeType)
406 {
407  vector<unsigned long> fileIds = databaseHandler.queryFileIdsForUpload(agentId, uploadId, ignoreFilesWithMimeType);
409 #pragma omp parallel num_threads(THREADS)
410  {
411  CopyrightDatabaseHandler threadLocalDatabaseHandler(databaseHandler.spawn());
413  size_t pFileCount = fileIds.size();
414 #pragma omp for
415  for (size_t it = 0; it < pFileCount; ++it)
416  {
417  unsigned long pFileId = fileIds[it];
419  if (pFileId == 0)
420  {
421  continue;
422  }
424  matchPFileWithLicenses(state, agentId, pFileId, threadLocalDatabaseHandler);
427  }
428  }
430  return true;
431 }
439 pair<string, list<match>> processSingleFile(const CopyrightState& state,
440  const string fileName)
441 {
442  const list<unptr::shared_ptr<scanner>>& scanners = state.getScanners();
443  list<match> matchList;
445  // Read file into one string
446  string s;
447  if (!ReadFileToString(fileName, s))
448  {
449  // File error
450  s = "";
451  }
452  else
453  {
454  for (auto sc = scanners.begin(); sc != scanners.end(); ++sc)
455  {
456  (*sc)->ScanString(s, matchList);
457  }
458  }
459  return make_pair(s, matchList);
460 }
469 void appendToJson(const std::string fileName,
470  const std::pair<string, list<match>> resultPair, bool &printComma)
471 {
472  Json::Value result;
473 #if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16))
474  // Use FastWriter for versions below 1.4.0
475  Json::FastWriter jsonWriter;
476 #else
477  // Since version 1.4.0, FastWriter is deprecated and replaced with
478  // StreamWriterBuilder
479  Json::StreamWriterBuilder jsonWriter;
480  jsonWriter["commentStyle"] = "None";
481  jsonWriter["indentation"] = "";
482 #endif
484  if (resultPair.first.empty())
485  {
486  result["file"] = fileName;
487  result["results"] = "Unable to read file";
488  }
489  else
490  {
491  list<match> resultList = resultPair.second;
492  Json::Value results;
493  for (auto m : resultList)
494  {
495  Json::Value j;
496  j["start"] = m.start;
497  j["end"] = m.end;
498  j["type"] = m.type;
499  j["content"] = cleanMatch(resultPair.first, m);
500  results.append(j);
501  }
502  result["file"] = fileName;
503  result["results"] = results;
504  }
505  // Thread-Safety: output all matches JSON at once to STDOUT
506 #pragma omp critical (jsonPrinter)
507  {
508  if (printComma)
509  {
510  cout << "," << endl;
511  }
512  else
513  {
514  printComma = true;
515  }
516  string jsonString;
517 #if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16))
518  // For version below 1.4.0, every writer append `\n` at end.
519  // Find and replace it.
520  jsonString = jsonWriter.write(result);
521  jsonString.replace(jsonString.find("\n"), string("\n").length(), "");
522 #else
523  // For version >= 1.4.0, \n is not appended.
524  jsonString = Json::writeString(jsonWriter, result);
525 #endif
526  cout << " " << jsonString << flush;
527  }
528 }
535 void printResultToStdout(const std::string fileName,
536  const std::pair<string, list<match>> resultPair)
537 {
538  if (resultPair.first.empty())
539  {
540  cout << fileName << " :: Unable to read file" << endl;
541  return;
542  }
543  stringstream ss;
544  ss << fileName << " ::" << endl;
545  // Output matches
546  list<match> resultList = resultPair.second;
547  for (auto m = resultList.begin(); m != resultList.end(); ++m)
548  {
549  ss << "\t[" << m->start << ':' << m->end << ':' << m->type << "] '"
550  << cleanMatch(resultPair.first, *m)
551  << "'" << endl;
552  }
553  // Thread-Safety: output all matches (collected in ss) at once to cout
554  cout << ss.str();
555 }
