FOSSology  4.4.0
Open Source License Compliance by Open Source Software
scancode_dbhandler.cc
1 /*
2  SPDX-FileCopyrightText: © 2021 Sarita Singh <saritasingh.0425@gmail.com>
3 
4  SPDX-License-Identifier: GPL-2.0-only
5 */
6 
7 #include "scancode_dbhandler.hpp"
8 
13  agent_fk(0),
14  pfile_fk(0),
15  content(""),
16  hash(""),
17  type(""),
18  copy_startbyte(0),
19  copy_endbyte(0)
20 {
21 };
22 
29 DatabaseEntry::DatabaseEntry(Match match, unsigned long agentId,
30  unsigned long pfileId) :
31  agent_fk(agentId), pfile_fk(pfileId), hash("")
32 {
33  content = match.getMatchName();
34  type = match.getType();
35  copy_startbyte = match.getStartPosition();
36  copy_endbyte = match.getStartPosition() + match.getLength();
37 };
38 
46 {
47  std::string result;
48  for (size_t i = 0; i < size; ++i)
49  {
50  if (i != 0)
51  result += ", ";
52  result += in[i].name;
53  result += " ";
54  result += in[i].type;
55  result += " ";
56  result += in[i].creationFlags;
57  }
58  return result;
59 }
60 
67 {
68 }
69 
77 {
78  DbManager spawnedDbMan(dbManager.spawn());
79  return ScancodeDatabaseHandler(spawnedDbMan);
80 }
81 
87 vector<unsigned long> ScancodeDatabaseHandler::queryFileIdsForUpload(int uploadId, bool ignoreFilesWithMimeType)
88 {
89  return queryFileIdsVectorForUpload(uploadId,ignoreFilesWithMimeType);
90 }
91 
98 bool ScancodeDatabaseHandler::insertNoResultInDatabase(int agentId, long pFileId ,long licenseId)
99 {
100  return dbManager.execPrepared(
101  fo_dbManager_PrepareStamement(dbManager.getStruct_dbManager(),
102  "scancodeInsertNoLicense",
103  "INSERT INTO license_file"
104  "(agent_fk, pfile_fk, rf_fk)"
105  " VALUES($1,$2,$3)",
106  int, long, long
107  ),
108  agentId, pFileId, licenseId);
109 }
110 
121  int agentId,
122  long pFileId,
123  long licenseId,
124  int percentMatch)
125 {
127  fo_dbManager_PrepareStamement(
129  "saveLicenseMatch",
130  "WITH "
131  "selectExisting AS ("
132  "SELECT fl_pk FROM ONLY license_file"
133  " WHERE (agent_fk = $1 AND pfile_fk = $2 AND rf_fk = $3)"
134  "),"
135  "insertNew AS ("
136  "INSERT INTO license_file"
137  "(agent_fk, pfile_fk, rf_fk, rf_match_pct)"
138  " SELECT $1, $2, $3, $4"
139  " WHERE NOT EXISTS(SELECT * FROM license_file WHERE (agent_fk = $1 AND pfile_fk = $2 AND rf_fk = $3))"
140  " RETURNING fl_pk"
141  ") "
142  "SELECT fl_pk FROM insertNew "
143  "UNION "
144  "SELECT fl_pk FROM selectExisting",
145  int, long, long, unsigned
146  ),
147  agentId,
148  pFileId,
149  licenseId,
150  percentMatch
151  );
152 
153  long licenseFilePK= -1;
154  if(!result.isFailed()){
155 
156  vector<unsigned long> res = result.getSimpleResults<unsigned long>(0,
158 
159  licenseFilePK = res.at(0);
160  }
161  return licenseFilePK;
162 }
163 
172  long licenseFileId,
173  unsigned start,
174  unsigned length)
175 {
176  return dbManager.execPrepared(
177  fo_dbManager_PrepareStamement(
179  "saveHighlightInfo",
180  "INSERT INTO highlight"
181  "(fl_fk, type, start, len)"
182  " SELECT $1, 'L', $2, $3 "
183  " WHERE NOT EXISTS(SELECT * FROM highlight WHERE (fl_fk = $1 AND start = $2 AND len = $3))",
184  long, unsigned, unsigned
185  ),
186  licenseFileId,
187  start,
188  length
189  );}
190 
197 void ScancodeDatabaseHandler::insertOrCacheLicenseIdForName(string const& rfShortName, string const& rfFullName, string const& rfTextUrl)
198 {
199  if (getCachedLicenseIdForName(rfShortName)==0)
200  {
201  unsigned long licenseId = selectOrInsertLicenseIdForName(rfShortName, rfFullName, rfTextUrl);
202 
203  if (licenseId > 0)
204  {
205  licenseRefCache.insert(std::make_pair(rfShortName, licenseId));
206  }
207  }
208 }
209 
215 unsigned long ScancodeDatabaseHandler::getCachedLicenseIdForName(string const& rfShortName) const
216 {
217  auto findIterator = licenseRefCache.find(rfShortName);
218  if (findIterator != licenseRefCache.end())
219  {
220  return findIterator->second;
221  }
222  else
223  {
224  return 0;
225  }
226 }
227 
234 bool hasEnding(string const &firstString, string const &ending)
235 {
236  if (firstString.length() >= ending.length())
237  {
238  return (0
239  == firstString.compare(firstString.length() - ending.length(),
240  ending.length(), ending));
241  }
242  else
243  {
244  return false;
245  }
246 }
247 
255 unsigned long ScancodeDatabaseHandler::selectOrInsertLicenseIdForName(string rfShortName, string rfFullname, string rfTexturl)
256 {
257  bool success = false;
258  unsigned long result = 0;
259 
260  icu::UnicodeString unicodeCleanShortname = fo::recodeToUnicode(rfShortName);
261 
262  // Clean shortname to get utf8 string
263  rfShortName = "";
264  unicodeCleanShortname.toUTF8String(rfShortName);
265 
266  fo_dbManager_PreparedStatement *searchWithOr = fo_dbManager_PrepareStamement(
268  "selectLicenseIdWithOrScancode",
269  " SELECT rf_pk FROM ONLY license_ref"
270  " WHERE LOWER(rf_shortname) = LOWER($1)"
271  " OR LOWER(rf_shortname) = LOWER($2);",
272  char*, char*);
273 
274  if (hasEnding(rfShortName, "+") || hasEnding(rfShortName, "-or-later"))
275  {
276  string tempShortName(rfShortName);
277  /* Convert shortname to lower-case to make it case-insensitive*/
278  std::transform(tempShortName.begin(), tempShortName.end(), tempShortName.begin(),
279  ::tolower);
280  string plus("+");
281  string orLater("-or-later");
282 
283  unsigned long int plusLast = tempShortName.rfind(plus);
284  unsigned long int orLaterLast = tempShortName.rfind(orLater);
285 
286  /* Remove last occurrence of + and -or-later (if found) */
287  if (plusLast != string::npos)
288  {
289  tempShortName.erase(plusLast, string::npos);
290  }
291  if (orLaterLast != string::npos)
292  {
293  tempShortName.erase(orLaterLast, string::npos);
294  }
295 
296  QueryResult queryResult = dbManager.execPrepared(searchWithOr,
297  (tempShortName + plus).c_str(), (tempShortName + orLater).c_str());
298 
299  success = queryResult && queryResult.getRowCount() > 0;
300  if (success)
301  {
302  result = queryResult.getSimpleResults<unsigned long>(0, fo::stringToUnsignedLong)[0];
303  }
304  }
305  else
306  {
307  string tempShortName(rfShortName);
308  /* Convert shortname to lower-case */
309  std::transform(tempShortName.begin(), tempShortName.end(), tempShortName.begin(),
310  ::tolower);
311  string only("-only");
312 
313  unsigned long int onlyLast = tempShortName.rfind(only);
314 
315  /* Remove last occurrence of -only (if found) */
316  if (onlyLast != string::npos)
317  {
318  tempShortName.erase(onlyLast, string::npos);
319  }
320 
321  QueryResult queryResult = dbManager.execPrepared(searchWithOr,
322  tempShortName.c_str(), (tempShortName + only).c_str());
323 
324  success = queryResult && queryResult.getRowCount() > 0;
325  if (success)
326  {
327  result = queryResult.getSimpleResults<unsigned long>(0, fo::stringToUnsignedLong)[0];
328  }
329  }
330 
331  if (result > 0)
332  {
333  return result;
334  }
335 
336 
337  unsigned count = 0;
338  while ((!success) && count++<3)
339  {
340  if (!dbManager.begin())
341  continue;
342 
343  dbManager.queryPrintf("LOCK TABLE license_ref");
344  QueryResult queryResult = dbManager.execPrepared(
345  fo_dbManager_PrepareStamement(
347  "selectOrInsertLicenseIdForName",
348  "WITH "
349  "selectExisting AS ("
350  "SELECT rf_pk FROM ONLY license_ref"
351  " WHERE rf_shortname = $1"
352  "),"
353  "insertNew AS ("
354  "INSERT INTO license_ref(rf_shortname, rf_text, rf_detector_type, rf_fullname, rf_url)"
355  " SELECT $1, $2, $3, $4, $5"
356  " WHERE NOT EXISTS(SELECT * FROM selectExisting)"
357  " RETURNING rf_pk"
358  ") "
359 
360  "SELECT rf_pk FROM insertNew "
361  "UNION "
362  "SELECT rf_pk FROM selectExisting",
363  char*, char*, int, char* , char*
364  ),
365  rfShortName.c_str(),
366  "License by Scancode.",
367  4,
368  rfFullname.c_str(),
369  rfTexturl.c_str()
370 
371  );
372 
373  success = queryResult && queryResult.getRowCount() > 0;
374 
375  if (success) {
376  success &= dbManager.commit();
377 
378  if (success) {
379  result = queryResult.getSimpleResults(0, fo::stringToUnsignedLong)[0];
380  }
381  } else {
383  }
384  }
385 
386  return result;
387 }
388 
395 {
396  std::string tableName = "scancode_author";
397 
398  if("scancode_statement" == entry.type ){
399  tableName = "scancode_copyright";
400  }
401 
402  return dbManager.execPrepared(
403  fo_dbManager_PrepareStamement(
405  ("insertInDatabaseFor " + tableName).c_str(),
406  ("INSERT INTO "+ tableName +
407  "(agent_fk, pfile_fk, content, hash, type, copy_startbyte, copy_endbyte)" +
408  " SELECT $1, $2, $3, md5($3), $4, $5, $6 "
409  " WHERE NOT EXISTS(SELECT * FROM " + tableName +
410  " WHERE (agent_fk= $1 AND pfile_fk = $2 AND hash = md5($3)))").c_str(),
411  long, long, char*, char*, int, int
412  ),
413  entry.agent_fk, entry.pfile_fk,
414  entry.content.c_str(),
415  entry.type.c_str(),
416  entry.copy_startbyte, entry.copy_endbyte
417  );
418 }
419 
425 {
426  int failedCounter = 0;
427  bool tablesChecked = false;
428 
430  while (!tablesChecked && failedCounter < MAX_TABLE_CREATION_RETRIES)
431  {
432  dbManager.begin();
433  tablesChecked = createTableAgentFindings("scancode_copyright") && createTableAgentFindings("scancode_author")&& createTableAgentEvents("scancode_copyright_event") && createTableAgentEvents("scancode_author_event");
434 
435 
436  if (tablesChecked)
437  dbManager.commit();
438  else
439  {
441  ++failedCounter;
442  if (failedCounter < MAX_TABLE_CREATION_RETRIES)
443  LOG_WARNING("table creation failed: trying again (%d/%d) \n", failedCounter, MAX_TABLE_CREATION_RETRIES);
444  }
445  }
446  if (tablesChecked && (failedCounter > 0))
447  LOG_NOTICE("table creation succeeded on try %d/%d \n", failedCounter, MAX_TABLE_CREATION_RETRIES);
448  dbManager.ignoreWarnings(false);
449  return tablesChecked;
450 }
451 
457 #define CSEQUENCE_NAME "scancode_copyright_pk_seq"
458 #define CCOLUMN_NAME_PK "scancode_copyright_pk"
459  {CCOLUMN_NAME_PK, "bigint",
460  "PRIMARY KEY DEFAULT nextval('" CSEQUENCE_NAME "'::regclass)"},
461  {"agent_fk", "bigint", "NOT NULL"},
462  {"pfile_fk", "bigint", "NOT NULL"},
463  {"content", "text", ""},
464  {"hash", "text", ""},
465  {"type", "text", ""},
466  {"copy_startbyte", "integer", ""},
467  {"copy_endbyte", "integer", ""},
468  {"is_enabled", "boolean", "NOT NULL DEFAULT TRUE"},
469 };
470 
476 #define ASEQUENCE_NAME "scancode_author_pk_seq"
477 #define ACOLUMN_NAME_PK "scancode_author_pk"
478  {ACOLUMN_NAME_PK, "bigint",
479  "PRIMARY KEY DEFAULT nextval('" ASEQUENCE_NAME "'::regclass)"},
480  {"agent_fk", "bigint", "NOT NULL"},
481  {"pfile_fk", "bigint", "NOT NULL"},
482  {"content", "text", ""},
483  {"hash", "text", ""},
484  {"type", "text", ""},
485  {"copy_startbyte", "integer", ""},
486  {"copy_endbyte", "integer", ""},
487  {"is_enabled", "boolean", "NOT NULL DEFAULT TRUE"},
488 };
489 
496 {
497  const char *tablename = "";
498  const char *sequencename = "";
499  if (tableName == "scancode_copyright") {
500  tablename = "scancode_copyright";
501  sequencename = "scancode_copyright_pk_seq";
502  } else if (tableName == "scancode_author") {
503  tablename = "scancode_author";
504  sequencename = "scancode_author_pk_seq";
505  }
506  if (!dbManager.sequenceExists(sequencename)) {
507  RETURN_IF_FALSE(dbManager.queryPrintf("CREATE SEQUENCE %s"
508  " START WITH 1"
509  " INCREMENT BY 1"
510  " NO MAXVALUE"
511  " NO MINVALUE"
512  " CACHE 1",sequencename));
513  }
514 
515  if (!dbManager.tableExists(tablename))
516  {
517  if (tableName == "scancode_copyright") {
519  RETURN_IF_FALSE(dbManager.queryPrintf("CREATE table %s(%s)", tablename,
521  )
522  );
523  } else if (tableName == "scancode_author") {
525  RETURN_IF_FALSE(dbManager.queryPrintf("CREATE table %s(%s)", tablename,
527  )
528  );
529  }
530 
531  RETURN_IF_FALSE(dbManager.queryPrintf(
532  "CREATE INDEX %s_agent_fk_index"
533  " ON %s"
534  " USING BTREE (agent_fk)",
535  tablename, tablename
536  ));
537 
538  RETURN_IF_FALSE(dbManager.queryPrintf(
539  "CREATE INDEX %s_hash_index"
540  " ON %s"
541  " USING BTREE (hash)",
542  tablename, tablename
543  ));
544 
545  RETURN_IF_FALSE(dbManager.queryPrintf(
546  "CREATE INDEX %s_pfile_fk_index"
547  " ON %s"
548  " USING BTREE (pfile_fk)",
549  tablename, tablename
550  ));
551 
552  RETURN_IF_FALSE(dbManager.queryPrintf(
553  "ALTER TABLE ONLY %s"
554  " ADD CONSTRAINT agent_fk"
555  " FOREIGN KEY (agent_fk)"
556  " REFERENCES agent(agent_pk) ON DELETE CASCADE",
557  tablename
558  ));
559 
560  RETURN_IF_FALSE(dbManager.queryPrintf(
561  "ALTER TABLE ONLY %s"
562  " ADD CONSTRAINT pfile_fk"
563  " FOREIGN KEY (pfile_fk)"
564  " REFERENCES pfile(pfile_pk) ON DELETE CASCADE",
565  tablename
566  ));
567  }
568  return true;
569 }
570 
576 #define CESEQUENCE_NAME "scancode_copyright_event_pk_seq"
577 #define CECOLUMN_NAME_PK "scancode_copyright_event_pk"
578  {CECOLUMN_NAME_PK, "bigint",
579  "PRIMARY KEY DEFAULT nextval('" CESEQUENCE_NAME "'::regclass)"},
580  {"upload_fk", "bigint", "NOT NULL"},
581  {"uploadtree_fk", "bigint", "NOT NULL"},
582  {"scancode_copyright_fk", "bigint", "NOT NULL"},
583  {"content", "text", ""},
584  {"hash", "text", ""},
585  {"is_enabled", "boolean", "NOT NULL DEFAULT FALSE"},
586  {"scope", "int4", "NOT NULL"},
587 };
588 
594 #define AESEQUENCE_NAME "scancode_author_event_pk_seq"
595 #define AECOLUMN_NAME_PK "scancode_author_event_pk"
596  {AECOLUMN_NAME_PK, "bigint",
597  "PRIMARY KEY DEFAULT nextval('" AESEQUENCE_NAME "'::regclass)"},
598  {"upload_fk", "bigint", "NOT NULL"},
599  {"uploadtree_fk", "bigint", "NOT NULL"},
600  {"scancode_author_fk", "bigint", "NOT NULL"},
601  {"content", "text", ""},
602  {"hash", "text", ""},
603  {"is_enabled", "boolean", "NOT NULL DEFAULT FALSE"},
604  {"scope", "int4", "NOT NULL"},
605 };
606 
613 {
614  const char *tablename = "";
615  const char *etablename = "";
616  const char *esequencename = "";
617  if (tableName == "scancode_copyright_event") {
618  etablename = "scancode_copyright_event";
619  esequencename = "scancode_copyright_event_pk_seq";
620  tablename = "scancode_copyright";
621  } else if (tableName == "scancode_author_event") {
622  etablename = "scancode_author_event";
623  esequencename = "scancode_author_event_pk_seq";
624  tablename = "scancode_author";
625  }
626  if (!dbManager.sequenceExists(esequencename)) {
627  RETURN_IF_FALSE(dbManager.queryPrintf("CREATE SEQUENCE %s"
628  " START WITH 1"
629  " INCREMENT BY 1"
630  " NO MAXVALUE"
631  " NO MINVALUE"
632  " CACHE 1",esequencename));
633  }
634 
635  if (!dbManager.tableExists(etablename))
636  {
637  if (tableName == "scancode_copyright_event") {
639  RETURN_IF_FALSE(dbManager.queryPrintf("CREATE table %s(%s)", etablename,
641  )
642  );
643  } else if (tableName == "scancode_author_event") {
645  RETURN_IF_FALSE(dbManager.queryPrintf("CREATE table %s(%s)", etablename,
647  )
648  );
649  }
650  RETURN_IF_FALSE(dbManager.queryPrintf(
651  "CREATE INDEX %s_upload_fk_index"
652  " ON %s"
653  " USING BTREE (upload_fk)",
654  etablename, etablename
655  ));
656  RETURN_IF_FALSE(dbManager.queryPrintf(
657  "CREATE INDEX %s_uploadtree_fk_index"
658  " ON %s"
659  " USING BTREE (uploadtree_fk)",
660  etablename, etablename
661  ));
662  RETURN_IF_FALSE(dbManager.queryPrintf(
663  "CREATE INDEX %s_scancode_fk_index"
664  " ON %s"
665  " USING BTREE (%s_fk)",
666  etablename, etablename, tablename
667  ));
668 
669  RETURN_IF_FALSE(dbManager.queryPrintf(
670  "ALTER TABLE ONLY %s"
671  " ADD CONSTRAINT upload_fk"
672  " FOREIGN KEY (upload_fk)"
673  " REFERENCES upload(upload_pk) ON DELETE CASCADE",
674  etablename
675  ));
676 
677 
678  RETURN_IF_FALSE(dbManager.queryPrintf(
679  "ALTER TABLE ONLY %s"
680  " ADD CONSTRAINT %s_fk"
681  " FOREIGN KEY (%s_fk)"
682  " REFERENCES %s(%s_pk) ON DELETE CASCADE",
683  etablename, tablename, tablename, tablename, tablename
684  ));
685  RETURN_IF_FALSE(dbManager.queryPrintf(
686  "ALTER TABLE %s"
687  " ALTER COLUMN scope"
688  " SET DEFAULT 1",
689  etablename
690  ));
691  }
692  return true;
693 }
bool hasEnding(string const &firstString, string const &ending)
Maps agent data to database schema.
Definition: database.hpp:25
int copy_startbyte
Definition: database.hpp:44
std::string content
Definition: database.hpp:31
std::string type
Type of statement found.
Definition: database.hpp:43
int copy_endbyte
Definition: database.hpp:45
DatabaseEntry()
Default constructor for DatabaseEntry.
Definition: database.cc:26
bool saveHighlightInfo(long licenseFileId, unsigned start, unsigned length)
save highlight information in the highlight table
static const ColumnDef columns_copyright_event[]
Columns required to store copyright deactivated statement.
unsigned long selectOrInsertLicenseIdForName(std::string rfShortname, std::string rfFullname, std::string rfTexturl)
insert license if not present in license_ref table and return rf_pk
bool createTables() const
create tables to save copyright and author informations
unsigned long getCachedLicenseIdForName(std::string const &rfShortName) const
for given short name search license
static const ColumnDef columns_copyright[]
Columns required to store copyright information by scancode agent.
long saveLicenseMatch(int agentId, long pFileId, long licenseId, int percentMatch)
save license match with license_ref table in license_file table Insert license if already not present...
static const ColumnDef columns_author_event[]
Columns required to store author deactivated statement.
bool insertInDatabase(DatabaseEntry &entry) const
insert copyright/author in scancode_copyright/scancode_author table
void insertOrCacheLicenseIdForName(std::string const &rfShortName, std::string const &rfFullname, std::string const &rfTexturl)
calling function for selectOrInsertLicenseIdForName
bool insertNoResultInDatabase(int agentId, long pFileId, long licenseId)
Insert null value of license for uploads having no licenses.
std::string getColumnCreationString(const ColumnDef in[], size_t size) const
get string of parameters for a column for table creation
std::vector< unsigned long > queryFileIdsForUpload(int uploadId, bool ignoreFilesWithMimeType)
Function to get pfile ID for uploads.
ScancodeDatabaseHandler(fo::DbManager dbManager)
Default constructor for ScanCode Database Handler class.
bool createTableAgentFindings(string tablename) const
create table to store agent findings
static const ColumnDef columns_author[]
Columns required to store author information by scancode agent.
bool createTableAgentEvents(string tablename) const
create table to store agent events
ScancodeDatabaseHandler spawn() const
Instantiate a new object spawn for ScanCode Database handler Used to create new objects for threads.
Database handler for agents.
std::vector< unsigned long > queryFileIdsVectorForUpload(int uploadId, bool ignoreFilesWithMimeType) const
Get pfile ids for a given upload id.
DbManager dbManager
DbManager to use.
DB wrapper for agents.
QueryResult execPrepared(fo_dbManager_PreparedStatement *stmt,...) const
Execute a prepared statement with new parameters.
bool tableExists(const char *tableName) const
void ignoreWarnings(bool) const
QueryResult queryPrintf(const char *queryFormat,...) const
Execute a query in printf format.
bool sequenceExists(const char *name) const
fo_dbManager * getStruct_dbManager() const
DbManager spawn() const
Wrapper for DB result.
std::vector< T > getSimpleResults(int columnN, T(functionP)(const char *)) const
Get vector of a single column from query result.
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:16
fo namespace holds the FOSSology library functions.
unsigned long stringToUnsignedLong(const char *string)
Definition: libfossUtils.cc:20
icu::UnicodeString recodeToUnicode(const std::string &input)
Definition: libfossUtils.cc:46
start($application)
start the application Assumes application is restartable via /etc/init.d/<script>....
Definition: pkgConfig.php:1214
Definition: match.h:20
Holds the column related data for table creation.
Store the results of a regex match.
Definition: scanners.hpp:28