7 #include "scancode_utils.hpp"
9 namespace po = boost::program_options;
15 void bail(
int exitval) {
27 return State(agentId);
36 char *COMMIT_HASH =
fo_sysconfig(AGENT_NAME,
"COMMIT_HASH");
40 if (!asprintf(&agentRevision,
"%s.%s", VERSION, COMMIT_HASH))
44 agentRevision, AGENT_DESC);
62 int writeARS(
const State &state,
int arsId,
int uploadId,
int success,
64 PGconn *connection =
dbManager.getConnection();
67 return fo_WriteARS(connection, arsId, uploadId, agentId, AGENT_ARS, NULL,
80 vector<unsigned long> fileIds =
83 unordered_map<unsigned long, string> fileIdsMap;
84 unordered_map<string, unsigned long> fileIdsMapReverse;
88 string fileLocation = tmpnam(
nullptr);
89 string outputFile = tmpnam(
nullptr);
91 size_t pFileCount = fileIds.size();
92 for (
size_t it = 0; it < pFileCount; ++it) {
93 unsigned long pFileId = fileIds[it];
98 mapFileNameWithId(pFileId, fileIdsMap, fileIdsMapReverse, databaseHandler);
103 writeFileNameToTextFile(fileIdsMap, fileLocation);
104 scanFileWithScancode(state, fileLocation, outputFile);
106 std::ifstream opfile(outputFile);
108 std::cerr <<
"Error opening the JSON file.\n";
114 while (getline(opfile, line)) {
118 #pragma omp parallel default(none) \
119 shared(databaseHandler, scanResults, fileIdsMapReverse, state, errors)
125 Json::CharReaderBuilder json_reader_builder;
126 auto scanner = unique_ptr<Json::CharReader>(json_reader_builder.newCharReader());
127 Json::Value scancodeValue;
134 string fileName = scancodeValue[
"file"].asString();
135 unsigned long fileId = fileIdsMapReverse[fileName];
143 if (unlink(outputFile.c_str()) != 0) {
144 LOG_FATAL(
"Unable to delete file %s \n", outputFile.c_str());
156 void mapFileNameWithId(
unsigned long pFileId,
157 unordered_map<unsigned long, string> &fileIdsMap,
158 unordered_map<string, unsigned long> &fileIdsMapReverse,
162 LOG_FATAL(
"File not found %lu \n", pFileId);
166 char *fileName = NULL;
173 fileIdsMap[file.getId()] = file.getFileName();
174 fileIdsMapReverse[file.getFileName()] = file.getId();
179 LOG_FATAL(
"PFile not found in repo %lu \n", pFileId);
188 void writeFileNameToTextFile(unordered_map<unsigned long, string> &fileIdsMap,
string fileLocation) {
189 std::ofstream outputFile(fileLocation, std::ios::app);
191 if (!outputFile.is_open()) {
192 LOG_FATAL(
"Unable to open file");
195 for (
auto const& x : fileIdsMap)
197 outputFile << x.second <<
"\n";
208 string getScanResult(
const string& line) {
211 size_t startIndex = 0;
212 size_t braceCount = 0;
214 for (
size_t i = 0; i < line.length(); ++i) {
218 if (braceCount == 0) {
222 }
else if (c ==
'}') {
224 if (braceCount == 0) {
225 scanResult = line.substr(startIndex, i - startIndex + 1);
245 string scancodeResult,
string &filename,
unsigned long fileId) {
246 map<string, vector<Match>> scancodeData =
247 extractDataFromScancodeResult(scancodeResult, filename);
248 return saveLicenseMatchesToDatabase(
249 state, scancodeData[
"scancode_license"], fileId,
251 saveOtherMatchesToDatabase(
252 state, scancodeData[
"scancode_statement"], fileId,
254 saveOtherMatchesToDatabase(
255 state, scancodeData[
"scancode_author"], fileId,
257 saveOtherMatchesToDatabase(
258 state, scancodeData[
"scancode_email"], fileId,
260 saveOtherMatchesToDatabase(
261 state, scancodeData[
"scancode_url"], fileId,
278 bool saveLicenseMatchesToDatabase(
const State &state,
279 const vector<Match> &matches,
280 unsigned long pFileId,
283 for (
const auto &
match : matches) {
288 if (!databaseHandler.
begin()) {
291 for (
const auto &
match : matches) {
293 string rfShortname =
match.getMatchName();
294 int percent =
match.getPercentage();
296 unsigned length =
match.getLength();
297 unsigned long licenseId =
300 if (licenseId == 0) {
302 LOG_ERROR(
"cannot get licenseId for shortname '%s' \n",
303 rfShortname.c_str());
306 if (rfShortname ==
"No_license_found") {
309 LOG_ERROR(
"failing save licenseMatch \n");
315 if (licenseFileId > 0) {
320 LOG_ERROR(
"failing save licensehighlight \n");
324 LOG_ERROR(
"failing save licenseMatch \n");
329 return databaseHandler.
commit();
340 bool saveOtherMatchesToDatabase(
const State &state,
341 const vector<Match> &matches,
342 unsigned long pFileId,
345 if (!databaseHandler.
begin())
348 for (
const auto &
match : matches) {
354 LOG_ERROR(
"failing save otherMatches \n");
358 return databaseHandler.
commit();
371 bool parseCommandLine(
int argc,
char **argv,
string &cliOption,
bool &ignoreFilesWithMimeType)
373 po::options_description desc(AGENT_NAME
": available options");
375 (
"help,h",
"show this help")
376 (
"ignoreFilesWithMimeType,I",
"ignoreFilesWithMimeType")
377 (
"license,l",
"scancode license")
378 (
"copyright,r",
"scancode copyright")
379 (
"email,e",
"scancode email")
380 (
"url,u",
"scancode url")
381 (
"config,c", po::value<string>(),
"path to the sysconfigdir")
382 (
"scheduler_start",
"specifies, that the command was called by the scheduler")
383 (
"userID", po::value<int>(),
"the id of the user that created the job (only in combination with --scheduler_start)")
384 (
"groupID", po::value<int>(),
"the id of the group of the user that created the job (only in combination with --scheduler_start)")
385 (
"jobId", po::value<int>(),
"the id of the job (only in combination with --scheduler_start)");
386 po::variables_map vm;
389 po::store(po::command_line_parser(argc, argv).options(desc).run(), vm);
390 if (vm.count(
"help") > 0)
392 cout << desc <<
"\n";
396 cliOption += vm.count(
"license") > 0 ?
"l" :
"";
397 cliOption += vm.count(
"copyright") > 0 ?
"c" :
"";
398 cliOption += vm.count(
"email") > 0 ?
"e" :
"";
399 cliOption += vm.count(
"url") > 0 ?
"u" :
"";
400 ignoreFilesWithMimeType =
401 vm.count(
"ignoreFilesWithMimeType") > 0 ? true :
false;
403 catch (boost::bad_any_cast &)
405 LOG_FATAL(
"wrong parameter type\n ");
406 cout << desc <<
"\n";
411 LOG_FATAL(
"wrong command line arguments\n");
412 cout << desc <<
"\n";
bool processUploadId(const CompatibilityState &state, int uploadId, CompatibilityDatabaseHandler &databaseHandler, int groupId)
CompatibilityState getState(DbManager &dbManager, CompatibilityCliOptions &&cliOptions)
Create a new state for the current agent based on CliOptions.
int writeARS(const CompatibilityState &state, int arsId, int uploadId, int success, DbManager &dbManager)
int queryAgentId(DbManager &dbManager)
Maps agent data to database schema.
bool saveHighlightInfo(long licenseFileId, unsigned start, unsigned length)
save highlight information in the highlight table
unsigned long getCachedLicenseIdForName(std::string const &rfShortName) const
for given short name search license
long saveLicenseMatch(int agentId, long pFileId, long licenseId, int percentMatch)
save license match with license_ref table in license_file table Insert license if already not present...
bool insertInDatabase(DatabaseEntry &entry) const
insert copyright/author in scancode_copyright/scancode_author table
void insertOrCacheLicenseIdForName(std::string const &rfShortName, std::string const &rfFullname, std::string const &rfTexturl)
calling function for selectOrInsertLicenseIdForName
bool insertNoResultInDatabase(int agentId, long pFileId, long licenseId)
Insert null value of license for uploads having no licenses.
std::vector< unsigned long > queryFileIdsForUpload(int uploadId, bool ignoreFilesWithMimeType)
Function to get pfile ID for uploads.
ScancodeDatabaseHandler spawn() const
Instantiate a new object spawn for ScanCode Database handler Used to create new objects for threads.
int getAgentId() const
getter function for agent Id
bool commit() const
COMMIT a transaction block in DB.
bool begin() const
BEGIN a transaction block in DB.
char * getPFileNameForFileId(unsigned long pfileId) const
Get the file name of a give pfile id.
bool rollback() const
ROLLBACK a transaction block in DB.
Class to handle file related operations.
Abstract class to provide interface to scanners.
void matchFileWithLicenses(const string &sContent, unsigned long pFileId, CopyrightState const &state, int agentId, CopyrightDatabaseHandler &databaseHandler)
Scan a given file with all available scanners and save findings to database.
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
FUNCTION int fo_GetAgentKey(PGconn *pgConn, const char *agent_name, long Upload_pk, const char *rev, const char *agent_desc)
Get the latest enabled agent key (agent_pk) from the database.
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
void fo_scheduler_disconnect(int retcode)
Disconnect the scheduler connection.
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
fo_dbManager * dbManager
fo_dbManager object
start($application)
start the application Assumes application is restartable via /etc/init.d/<script>....
Store the results of a regex match.