FOSSology  4.4.0
Open Source License Compliance by Open Source Software
scancode_utils.cc
1 /*
2  SPDX-FileCopyrightText: © 2021 Sarita Singh <saritasingh.0425@gmail.com>
3 
4  SPDX-License-Identifier: GPL-2.0-only
5 */
6 
7 #include "scancode_utils.hpp"
8 
9 namespace po = boost::program_options;
10 
15 void bail(int exitval) {
16  fo_scheduler_disconnect(exitval);
17  exit(exitval);
18 }
19 
26  int agentId = queryAgentId(dbManager);
27  return State(agentId);
28 }
29 
36  char *COMMIT_HASH = fo_sysconfig(AGENT_NAME, "COMMIT_HASH");
37  char *VERSION = fo_sysconfig(AGENT_NAME, "VERSION");
38  char *agentRevision;
39 
40  if (!asprintf(&agentRevision, "%s.%s", VERSION, COMMIT_HASH))
41  bail(-1);
42 
43  int agentId = fo_GetAgentKey(dbManager.getConnection(), AGENT_NAME, 0,
44  agentRevision, AGENT_DESC);
45  free(agentRevision);
46 
47  if (agentId <= 0)
48  bail(1);
49 
50  return agentId;
51 }
52 
62 int writeARS(const State &state, int arsId, int uploadId, int success,
64  PGconn *connection = dbManager.getConnection();
65  int agentId = state.getAgentId();
66 
67  return fo_WriteARS(connection, arsId, uploadId, agentId, AGENT_ARS, NULL,
68  success);
69 }
70 
78 bool processUploadId(const State &state, int uploadId,
79  ScancodeDatabaseHandler &databaseHandler, bool ignoreFilesWithMimeType) {
80  vector<unsigned long> fileIds =
81  databaseHandler.queryFileIdsForUpload(uploadId,ignoreFilesWithMimeType);
82 
83  unordered_map<unsigned long, string> fileIdsMap;
84  unordered_map<string, unsigned long> fileIdsMapReverse;
85 
86  bool errors = false;
87 
88  string fileLocation = tmpnam(nullptr);
89  string outputFile = tmpnam(nullptr);
90 
91  size_t pFileCount = fileIds.size();
92  for (size_t it = 0; it < pFileCount; ++it) {
93  unsigned long pFileId = fileIds[it];
94 
95  if (pFileId == 0)
96  continue;
97 
98  mapFileNameWithId(pFileId, fileIdsMap, fileIdsMapReverse, databaseHandler);
99 
101  }
102 
103  writeFileNameToTextFile(fileIdsMap, fileLocation);
104  scanFileWithScancode(state, fileLocation, outputFile);
105 
106  std::ifstream opfile(outputFile);
107  if (!opfile) {
108  std::cerr << "Error opening the JSON file.\n";
109  return false;
110  }
111 
112  vector<string> scanResults;
113  string line;
114  while (getline(opfile, line)) {
115  scanResults.push_back(getScanResult(line));
116  }
117 
118 #pragma omp parallel default(none) \
119  shared(databaseHandler, scanResults, fileIdsMapReverse, state, errors)
120  {
121  ScancodeDatabaseHandler threadLocalDatabaseHandler(databaseHandler.spawn());
122 #pragma omp for
123  for (size_t i = 0; i < scanResults.size(); ++i) {
124  // Process each object
125  Json::CharReaderBuilder json_reader_builder;
126  auto scanner = unique_ptr<Json::CharReader>(json_reader_builder.newCharReader());
127  Json::Value scancodeValue;
128  string errs;
129  const bool isSuccessful = scanner->parse(scanResults[i].c_str(),
130  scanResults[i].c_str() + scanResults[i].length(), &scancodeValue,
131  &errs);
132 
133  if (isSuccessful) {
134  string fileName = scancodeValue["file"].asString();
135  unsigned long fileId = fileIdsMapReverse[fileName];
136  if (!matchFileWithLicenses(state, threadLocalDatabaseHandler,
137  scanResults[i], fileName, fileId)) {
138  errors = true;
139  }
140  }
141  }
142  }
143  if (unlink(outputFile.c_str()) != 0) {
144  LOG_FATAL("Unable to delete file %s \n", outputFile.c_str());
145  }
146 
147  return !errors;
148 }
149 
156 void mapFileNameWithId(unsigned long pFileId,
157  unordered_map<unsigned long, string> &fileIdsMap,
158  unordered_map<string, unsigned long> &fileIdsMapReverse,
159  ScancodeDatabaseHandler &databaseHandler) {
160  char *pFile = databaseHandler.getPFileNameForFileId(pFileId);
161  if (!pFile) {
162  LOG_FATAL("File not found %lu \n", pFileId);
163  bail(8);
164  }
165 
166  char *fileName = NULL;
167  {
168  fileName = fo_RepMkPath("files", pFile);
169  }
170  if (fileName) {
171  fo::File file(pFileId, fileName);
172 
173  fileIdsMap[file.getId()] = file.getFileName();
174  fileIdsMapReverse[file.getFileName()] = file.getId();
175 
176  free(fileName);
177  free(pFile);
178  } else {
179  LOG_FATAL("PFile not found in repo %lu \n", pFileId);
180  bail(7);
181  }
182 }
183 
188 void writeFileNameToTextFile(unordered_map<unsigned long, string> &fileIdsMap, string fileLocation) {
189  std::ofstream outputFile(fileLocation, std::ios::app); // Open in append mode
190 
191  if (!outputFile.is_open()) {
192  LOG_FATAL("Unable to open file");
193  }
194 
195  for (auto const& x : fileIdsMap)
196  {
197  outputFile << x.second <<"\n";
198  }
199 
200  outputFile.close();
201 }
202 
208 string getScanResult(const string& line) {
209  string scanResult;
210 
211  size_t startIndex = 0;
212  size_t braceCount = 0;
213 
214  for (size_t i = 0; i < line.length(); ++i) {
215  char c = line[i];
216 
217  if (c == '{') {
218  if (braceCount == 0) {
219  startIndex = i;
220  }
221  braceCount++;
222  } else if (c == '}') {
223  braceCount--;
224  if (braceCount == 0) {
225  scanResult = line.substr(startIndex, i - startIndex + 1);
226  break;
227  }
228  }
229  }
230  return scanResult;
231 }
232 
243 bool matchFileWithLicenses(const State &state,
244  ScancodeDatabaseHandler &databaseHandler,
245  string scancodeResult, string &filename, unsigned long fileId) {
246 map<string, vector<Match>> scancodeData =
247  extractDataFromScancodeResult(scancodeResult, filename);
248 return saveLicenseMatchesToDatabase(
249  state, scancodeData["scancode_license"], fileId,
250  databaseHandler) &&
251  saveOtherMatchesToDatabase(
252  state, scancodeData["scancode_statement"], fileId,
253  databaseHandler) &&
254  saveOtherMatchesToDatabase(
255  state, scancodeData["scancode_author"], fileId,
256  databaseHandler) &&
257  saveOtherMatchesToDatabase(
258  state, scancodeData["scancode_email"], fileId,
259  databaseHandler) &&
260  saveOtherMatchesToDatabase(
261  state, scancodeData["scancode_url"], fileId,
262  databaseHandler);
263 }
264 
278 bool saveLicenseMatchesToDatabase(const State &state,
279  const vector<Match> &matches,
280  unsigned long pFileId,
281  ScancodeDatabaseHandler &databaseHandler)
282  {
283  for (const auto & match : matches) {
284  databaseHandler.insertOrCacheLicenseIdForName(
285  match.getMatchName(), match.getLicenseFullName(), match.getTextUrl());
286  }
287 
288  if (!databaseHandler.begin()) {
289  return false;
290  }
291  for (const auto & match : matches) {
292  int agentId = state.getAgentId();
293  string rfShortname = match.getMatchName();
294  int percent = match.getPercentage();
295  unsigned start = match.getStartPosition();
296  unsigned length = match.getLength();
297  unsigned long licenseId =
298  databaseHandler.getCachedLicenseIdForName(rfShortname);
299 
300  if (licenseId == 0) {
301  databaseHandler.rollback();
302  LOG_ERROR("cannot get licenseId for shortname '%s' \n",
303  rfShortname.c_str());
304  return false;
305  }
306  if (rfShortname == "No_license_found") {
307  if (!databaseHandler.insertNoResultInDatabase(agentId, pFileId, licenseId)) {
308  databaseHandler.rollback();
309  LOG_ERROR("failing save licenseMatch \n");
310  return false;
311  }
312  } else {
313  long licenseFileId = databaseHandler.saveLicenseMatch(agentId, pFileId,
314  licenseId, percent);
315  if (licenseFileId > 0) {
316  bool highlightRes =
317  databaseHandler.saveHighlightInfo(licenseFileId, start, length);
318  if (!highlightRes) {
319  databaseHandler.rollback();
320  LOG_ERROR("failing save licensehighlight \n");
321  }
322  } else {
323  databaseHandler.rollback();
324  LOG_ERROR("failing save licenseMatch \n");
325  return false;
326  }
327  }
328  }
329  return databaseHandler.commit();
330 }
331 
340 bool saveOtherMatchesToDatabase(const State &state,
341  const vector<Match> &matches,
342  unsigned long pFileId,
343  ScancodeDatabaseHandler &databaseHandler) {
344 
345  if (!databaseHandler.begin())
346  return false;
347 
348  for (const auto & match : matches) {
349  DatabaseEntry entry(match,state.getAgentId(),pFileId);
350 
351  if (!databaseHandler.insertInDatabase(entry))
352  {
353  databaseHandler.rollback();
354  LOG_ERROR("failing save otherMatches \n");
355  return false;
356  }
357  }
358  return databaseHandler.commit();
359 }
360 
361 // clueI add in this command line parser
362 
371 bool parseCommandLine(int argc, char **argv, string &cliOption, bool &ignoreFilesWithMimeType)
372 {
373  po::options_description desc(AGENT_NAME ": available options");
374  desc.add_options()
375  ("help,h", "show this help")
376  ("ignoreFilesWithMimeType,I","ignoreFilesWithMimeType")
377  ("license,l", "scancode license")
378  ("copyright,r", "scancode copyright")
379  ("email,e", "scancode email")
380  ("url,u", "scancode url")
381  ("config,c", po::value<string>(), "path to the sysconfigdir")
382  ("scheduler_start", "specifies, that the command was called by the scheduler")
383  ("userID", po::value<int>(), "the id of the user that created the job (only in combination with --scheduler_start)")
384  ("groupID", po::value<int>(), "the id of the group of the user that created the job (only in combination with --scheduler_start)")
385  ("jobId", po::value<int>(), "the id of the job (only in combination with --scheduler_start)");
386  po::variables_map vm;
387  try
388  {
389  po::store(po::command_line_parser(argc, argv).options(desc).run(), vm);
390  if (vm.count("help") > 0)
391  {
392  cout << desc << "\n";
393  exit(EXIT_SUCCESS);
394  }
395  cliOption = "";
396  cliOption += vm.count("license") > 0 ? "l" : "";
397  cliOption += vm.count("copyright") > 0 ? "c" : "";
398  cliOption += vm.count("email") > 0 ? "e" : "";
399  cliOption += vm.count("url") > 0 ? "u" : "";
400  ignoreFilesWithMimeType =
401  vm.count("ignoreFilesWithMimeType") > 0 ? true : false;
402  }
403  catch (boost::bad_any_cast &)
404  {
405  LOG_FATAL("wrong parameter type\n ");
406  cout << desc << "\n";
407  return false;
408  }
409  catch (po::error &)
410  {
411  LOG_FATAL("wrong command line arguments\n");
412  cout << desc << "\n";
413  return false;
414  }
415  return true;
416 }
Maps agent data to database schema.
Definition: database.hpp:25
bool saveHighlightInfo(long licenseFileId, unsigned start, unsigned length)
save highlight information in the highlight table
unsigned long getCachedLicenseIdForName(std::string const &rfShortName) const
for given short name search license
long saveLicenseMatch(int agentId, long pFileId, long licenseId, int percentMatch)
save license match with license_ref table in license_file table Insert license if already not present...
bool insertInDatabase(DatabaseEntry &entry) const
insert copyright/author in scancode_copyright/scancode_author table
void insertOrCacheLicenseIdForName(std::string const &rfShortName, std::string const &rfFullname, std::string const &rfTexturl)
calling function for selectOrInsertLicenseIdForName
bool insertNoResultInDatabase(int agentId, long pFileId, long licenseId)
Insert null value of license for uploads having no licenses.
std::vector< unsigned long > queryFileIdsForUpload(int uploadId, bool ignoreFilesWithMimeType)
Function to get pfile ID for uploads.
ScancodeDatabaseHandler spawn() const
Instantiate a new object spawn for ScanCode Database handler Used to create new objects for threads.
Definition: state.hpp:16
int getAgentId() const
getter function for agent Id
Definition: state.cc:14
bool commit() const
COMMIT a transaction block in DB.
bool begin() const
BEGIN a transaction block in DB.
char * getPFileNameForFileId(unsigned long pfileId) const
Get the file name of a give pfile id.
bool rollback() const
ROLLBACK a transaction block in DB.
DB wrapper for agents.
Class to handle file related operations.
Definition: files.hpp:26
Abstract class to provide interface to scanners.
Definition: scanners.hpp:52
int queryAgentId(PGconn *dbConn)
Get agent id, exit if agent id is incorrect.
void matchFileWithLicenses(const string &sContent, unsigned long pFileId, CopyrightState const &state, int agentId, CopyrightDatabaseHandler &databaseHandler)
Scan a given file with all available scanners and save findings to database.
int writeARS(int agentId, int arsId, int uploadId, int success, const fo::DbManager &dbManager)
Call C function fo_WriteARS() and translate the arguments.
CopyrightState getState(CliOptions &&cliOptions)
Create a new state for the current agent based on CliOptions.
bool processUploadId(const CopyrightState &state, int agentId, int uploadId, CopyrightDatabaseHandler &databaseHandler, bool ignoreFilesWithMimeType)
Process a given upload id, scan from statements and add to database.
void bail(int exitval)
Disconnect with scheduler returning an error code and exit.
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
Definition: libfossagent.c:214
FUNCTION int fo_GetAgentKey(PGconn *pgConn, const char *agent_name, long Upload_pk, const char *rev, const char *agent_desc)
Get the latest enabled agent key (agent_pk) from the database.
Definition: libfossagent.c:158
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
Definition: libfossrepo.c:352
void fo_scheduler_disconnect(int retcode)
Disconnect the scheduler connection.
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:16
start($application)
start the application Assumes application is restartable via /etc/init.d/<script>....
Definition: pkgConfig.php:1214
Store the results of a regex match.
Definition: scanners.hpp:28