FOSSology  4.6.0
Open Source License Compliance by Open Source Software
scancode_utils.cc
1 /*
2  SPDX-FileCopyrightText: © 2021 Sarita Singh <saritasingh.0425@gmail.com>
3 
4  SPDX-License-Identifier: GPL-2.0-only
5 */
6 
7 #include "scancode_utils.hpp"
8 
9 namespace po = boost::program_options;
10 
15 void bail(int exitval) {
16  fo_scheduler_disconnect(exitval);
17  exit(exitval);
18 }
19 
26  int agentId = queryAgentId(dbManager);
27  return State(agentId);
28 }
29 
36  char *COMMIT_HASH = fo_sysconfig(AGENT_NAME, "COMMIT_HASH");
37  char *VERSION = fo_sysconfig(AGENT_NAME, "VERSION");
38  char *agentRevision;
39 
40  if (!asprintf(&agentRevision, "%s.%s", VERSION, COMMIT_HASH))
41  bail(-1);
42 
43  int agentId = fo_GetAgentKey(dbManager.getConnection(), AGENT_NAME, 0,
44  agentRevision, AGENT_DESC);
45  free(agentRevision);
46 
47  if (agentId <= 0)
48  bail(1);
49 
50  return agentId;
51 }
52 
62 int writeARS(const State &state, int arsId, int uploadId, int success,
64  PGconn *connection = dbManager.getConnection();
65  int agentId = state.getAgentId();
66 
67  return fo_WriteARS(connection, arsId, uploadId, agentId, AGENT_ARS, NULL,
68  success);
69 }
70 
78 bool processUploadId(const State &state, int uploadId,
79  ScancodeDatabaseHandler &databaseHandler, bool ignoreFilesWithMimeType) {
80  vector<unsigned long> fileIds =
81  databaseHandler.queryFileIdsForUpload(uploadId,ignoreFilesWithMimeType);
82 
83  unordered_map<unsigned long, string> fileIdsMap;
84  unordered_map<string, unsigned long> fileIdsMapReverse;
85 
86  bool errors = false;
87 
88  string fileLocation = tmpnam(nullptr);
89  string outputFile = tmpnam(nullptr);
90 
91  size_t pFileCount = fileIds.size();
92  for (size_t it = 0; it < pFileCount; ++it) {
93  unsigned long pFileId = fileIds[it];
94 
95  if (pFileId == 0)
96  continue;
97 
98  mapFileNameWithId(pFileId, fileIdsMap, fileIdsMapReverse, databaseHandler);
99 
101  }
102 
103  writeFileNameToTextFile(fileIdsMap, fileLocation);
104  scanFileWithScancode(state, fileLocation, outputFile);
105 
106  std::ifstream opfile(outputFile);
107  if (!opfile) {
108  std::cerr << "Error opening the JSON file.\n";
109  return false;
110  }
111 
112  vector<string> scanResults;
113  string line;
114  while (getline(opfile, line)) {
115  scanResults.push_back(getScanResult(line));
116  }
117 
118 #pragma omp parallel default(none) \
119  shared(databaseHandler, scanResults, fileIdsMapReverse, state, errors)
120  {
121  ScancodeDatabaseHandler threadLocalDatabaseHandler(databaseHandler.spawn());
122 #pragma omp for
123  for (size_t i = 0; i < scanResults.size(); ++i) {
124  // Process each object
125  Json::CharReaderBuilder json_reader_builder;
126  auto scanner = unique_ptr<Json::CharReader>(json_reader_builder.newCharReader());
127  Json::Value scancodeValue;
128  string errs;
129  const bool isSuccessful = scanner->parse(scanResults[i].c_str(),
130  scanResults[i].c_str() + scanResults[i].length(), &scancodeValue,
131  &errs);
132 
133  if (isSuccessful) {
134  string fileName = scancodeValue["file"].asString();
135  unsigned long fileId = 0; // preserve old behavior
136 
137  auto it = fileIdsMapReverse.find(fileName);
138  if (it != fileIdsMapReverse.end()) {
139  fileId = it->second;
140  }
141  if (!matchFileWithLicenses(state, threadLocalDatabaseHandler,
142  scanResults[i], fileName, fileId)) {
143  errors = true;
144  }
145  }
146  }
147  }
148  if (unlink(outputFile.c_str()) != 0) {
149  LOG_FATAL("Unable to delete file %s \n", outputFile.c_str());
150  }
151 
152  return !errors;
153 }
154 
161 void mapFileNameWithId(unsigned long pFileId,
162  unordered_map<unsigned long, string> &fileIdsMap,
163  unordered_map<string, unsigned long> &fileIdsMapReverse,
164  ScancodeDatabaseHandler &databaseHandler) {
165  char *pFile = databaseHandler.getPFileNameForFileId(pFileId);
166  if (!pFile) {
167  LOG_FATAL("File not found %lu \n", pFileId);
168  bail(8);
169  }
170 
171  char *fileName = NULL;
172  {
173  fileName = fo_RepMkPath("files", pFile);
174  }
175  if (fileName) {
176  fo::File file(pFileId, fileName);
177 
178  fileIdsMap[file.getId()] = file.getFileName();
179  fileIdsMapReverse[file.getFileName()] = file.getId();
180 
181  free(fileName);
182  free(pFile);
183  } else {
184  LOG_FATAL("PFile not found in repo %lu \n", pFileId);
185  bail(7);
186  }
187 }
188 
193 void writeFileNameToTextFile(unordered_map<unsigned long, string> &fileIdsMap, string fileLocation) {
194  std::ofstream outputFile(fileLocation, std::ios::app); // Open in append mode
195 
196  if (!outputFile.is_open()) {
197  LOG_FATAL("Unable to open file");
198  }
199 
200  for (auto const& x : fileIdsMap)
201  {
202  outputFile << x.second <<"\n";
203  }
204 
205  outputFile.close();
206 }
207 
213 string getScanResult(const string& line) {
214  string scanResult;
215 
216  size_t startIndex = 0;
217  size_t braceCount = 0;
218 
219  for (size_t i = 0; i < line.length(); ++i) {
220  char c = line[i];
221 
222  if (c == '{') {
223  if (braceCount == 0) {
224  startIndex = i;
225  }
226  braceCount++;
227  } else if (c == '}') {
228  braceCount--;
229  if (braceCount == 0) {
230  scanResult = line.substr(startIndex, i - startIndex + 1);
231  break;
232  }
233  }
234  }
235  return scanResult;
236 }
237 
248 bool matchFileWithLicenses(const State &state,
249  ScancodeDatabaseHandler &databaseHandler,
250  string scancodeResult, string &filename, unsigned long fileId) {
251 map<string, vector<Match>> scancodeData =
252  extractDataFromScancodeResult(scancodeResult, filename);
253 return saveLicenseMatchesToDatabase(
254  state, scancodeData["scancode_license"], fileId,
255  databaseHandler) &&
256  saveOtherMatchesToDatabase(
257  state, scancodeData["scancode_statement"], fileId,
258  databaseHandler) &&
259  saveOtherMatchesToDatabase(
260  state, scancodeData["scancode_author"], fileId,
261  databaseHandler) &&
262  saveOtherMatchesToDatabase(
263  state, scancodeData["scancode_email"], fileId,
264  databaseHandler) &&
265  saveOtherMatchesToDatabase(
266  state, scancodeData["scancode_url"], fileId,
267  databaseHandler);
268 }
269 
283 bool saveLicenseMatchesToDatabase(const State &state,
284  const vector<Match> &matches,
285  unsigned long pFileId,
286  ScancodeDatabaseHandler &databaseHandler)
287  {
288  for (const auto & match : matches) {
289  databaseHandler.insertOrCacheLicenseIdForName(
290  match.getMatchName(), match.getLicenseFullName(), match.getTextUrl());
291  }
292 
293  if (!databaseHandler.begin()) {
294  return false;
295  }
296  for (const auto & match : matches) {
297  int agentId = state.getAgentId();
298  string rfShortname = match.getMatchName();
299  int percent = match.getPercentage();
300  unsigned start = match.getStartPosition();
301  unsigned length = match.getLength();
302  unsigned long licenseId =
303  databaseHandler.getCachedLicenseIdForName(rfShortname);
304 
305  if (licenseId == 0) {
306  databaseHandler.rollback();
307  LOG_ERROR("cannot get licenseId for shortname '%s' \n",
308  rfShortname.c_str());
309  return false;
310  }
311  if (rfShortname == "No_license_found") {
312  if (!databaseHandler.insertNoResultInDatabase(agentId, pFileId, licenseId)) {
313  databaseHandler.rollback();
314  LOG_ERROR("failing save licenseMatch \n");
315  return false;
316  }
317  } else {
318  long licenseFileId = databaseHandler.saveLicenseMatch(agentId, pFileId,
319  licenseId, percent);
320  if (licenseFileId > 0) {
321  bool highlightRes =
322  databaseHandler.saveHighlightInfo(licenseFileId, start, length);
323  if (!highlightRes) {
324  databaseHandler.rollback();
325  LOG_ERROR("failing save licensehighlight \n");
326  }
327  } else {
328  databaseHandler.rollback();
329  LOG_ERROR("failing save licenseMatch \n");
330  return false;
331  }
332  }
333  }
334  return databaseHandler.commit();
335 }
336 
345 bool saveOtherMatchesToDatabase(const State &state,
346  const vector<Match> &matches,
347  unsigned long pFileId,
348  ScancodeDatabaseHandler &databaseHandler) {
349 
350  if (!databaseHandler.begin())
351  return false;
352 
353  for (const auto & match : matches) {
354  DatabaseEntry entry(match,state.getAgentId(),pFileId);
355 
356  if (!databaseHandler.insertInDatabase(entry))
357  {
358  databaseHandler.rollback();
359  LOG_ERROR("failing save otherMatches \n");
360  return false;
361  }
362  }
363  return databaseHandler.commit();
364 }
365 
366 // clueI add in this command line parser
367 
376 bool parseCommandLine(int argc, char **argv, string &cliOption, bool &ignoreFilesWithMimeType)
377 {
378  po::options_description desc(AGENT_NAME ": available options");
379  desc.add_options()
380  ("help,h", "show this help")
381  ("ignoreFilesWithMimeType,I","ignoreFilesWithMimeType")
382  ("license,l", "scancode license")
383  ("copyright,r", "scancode copyright")
384  ("email,e", "scancode email")
385  ("url,u", "scancode url")
386  ("config,c", po::value<string>(), "path to the sysconfigdir")
387  ("scheduler_start", "specifies, that the command was called by the scheduler")
388  ("userID", po::value<int>(), "the id of the user that created the job (only in combination with --scheduler_start)")
389  ("groupID", po::value<int>(), "the id of the group of the user that created the job (only in combination with --scheduler_start)")
390  ("jobId", po::value<int>(), "the id of the job (only in combination with --scheduler_start)");
391  po::variables_map vm;
392  try
393  {
394  po::store(po::command_line_parser(argc, argv).options(desc).run(), vm);
395  if (vm.count("help") > 0)
396  {
397  cout << desc << "\n";
398  exit(EXIT_SUCCESS);
399  }
400  cliOption = "";
401  cliOption += vm.count("license") > 0 ? "l" : "";
402  cliOption += vm.count("copyright") > 0 ? "c" : "";
403  cliOption += vm.count("email") > 0 ? "e" : "";
404  cliOption += vm.count("url") > 0 ? "u" : "";
405  ignoreFilesWithMimeType =
406  vm.count("ignoreFilesWithMimeType") > 0 ? true : false;
407  }
408  catch (boost::bad_any_cast &)
409  {
410  LOG_FATAL("wrong parameter type\n ");
411  cout << desc << "\n";
412  return false;
413  }
414  catch (po::error &)
415  {
416  LOG_FATAL("wrong command line arguments\n");
417  cout << desc << "\n";
418  return false;
419  }
420  return true;
421 }
bool processUploadId(const CompatibilityState &state, int uploadId, CompatibilityDatabaseHandler &databaseHandler, int groupId)
CompatibilityState getState(DbManager &dbManager, CompatibilityCliOptions &&cliOptions)
Create a new state for the current agent based on CliOptions.
int writeARS(const CompatibilityState &state, int arsId, int uploadId, int success, DbManager &dbManager)
int queryAgentId(DbManager &dbManager)
void bail(int exitval)
Maps agent data to database schema.
Definition: database.hpp:25
bool saveHighlightInfo(long licenseFileId, unsigned start, unsigned length)
save highlight information in the highlight table
unsigned long getCachedLicenseIdForName(std::string const &rfShortName) const
for given short name search license
long saveLicenseMatch(int agentId, long pFileId, long licenseId, int percentMatch)
save license match with license_ref table in license_file table Insert license if already not present...
bool insertInDatabase(DatabaseEntry &entry) const
insert copyright/author in scancode_copyright/scancode_author table
void insertOrCacheLicenseIdForName(std::string const &rfShortName, std::string const &rfFullname, std::string const &rfTexturl)
calling function for selectOrInsertLicenseIdForName
bool insertNoResultInDatabase(int agentId, long pFileId, long licenseId)
Insert null value of license for uploads having no licenses.
std::vector< unsigned long > queryFileIdsForUpload(int uploadId, bool ignoreFilesWithMimeType)
Function to get pfile ID for uploads.
ScancodeDatabaseHandler spawn() const
Instantiate a new object spawn for ScanCode Database handler Used to create new objects for threads.
Definition: state.hpp:16
int getAgentId() const
getter function for agent Id
Definition: state.cc:14
bool commit() const
COMMIT a transaction block in DB.
bool begin() const
BEGIN a transaction block in DB.
char * getPFileNameForFileId(unsigned long pfileId) const
Get the file name of a give pfile id.
bool rollback() const
ROLLBACK a transaction block in DB.
DB wrapper for agents.
Class to handle file related operations.
Definition: files.hpp:26
Abstract class to provide interface to scanners.
Definition: scanners.hpp:52
void matchFileWithLicenses(const string &sContent, unsigned long pFileId, CopyrightState const &state, int agentId, CopyrightDatabaseHandler &databaseHandler)
Scan a given file with all available scanners and save findings to database.
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
Definition: libfossagent.c:214
FUNCTION int fo_GetAgentKey(PGconn *pgConn, const char *agent_name, long Upload_pk, const char *rev, const char *agent_desc)
Get the latest enabled agent key (agent_pk) from the database.
Definition: libfossagent.c:158
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
Definition: libfossrepo.c:352
void fo_scheduler_disconnect(int retcode)
Disconnect the scheduler connection.
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:16
start($application)
start the application Assumes application is restartable via /etc/init.d/<script>....
Definition: pkgConfig.php:1214
Store the results of a regex match.
Definition: scanners.hpp:28