12 #include "copyrightUtils.hpp"
13 #include <boost/program_options.hpp>
27 char* COMMIT_HASH =
fo_sysconfig(AGENT_NAME,
"COMMIT_HASH");
30 if (!asprintf(&agentRevision,
"%s.%s", VERSION, COMMIT_HASH))
36 AGENT_NAME, 0, agentRevision, AGENT_DESC);
55 return fo_WriteARS(
dbManager.getConnection(), arsId, uploadId, agentId, AGENT_ARS, NULL, success);
79 std::vector<std::string>& fileNames, std::string& directoryToScan)
83 boost::program_options::options_description desc(IDENTITY
": recognized options");
85 (
"help,h",
"shows help")
88 boost::program_options::value<unsigned>(&type)
89 ->default_value(ALL_TYPES),
90 "type of regex to try"
93 "verbose,v",
"increase verbosity"
97 boost::program_options::value<vector<string> >(),
98 "user defined Regex to search: [{name=cli}@@][{matchingGroup=0}@@]{regex} e.g. 'linux@@1@@(linus) torvalds'"
102 boost::program_options::value< vector<string> >(),
106 "json,J",
"output JSON"
109 "ignoreFilesWithMimeType,I",
"ignoreFilesWithMimeType"
112 "config,c", boost::program_options::value<string>(),
"path to the sysconfigdir"
115 "scheduler_start",
"specifies, that the command was called by the scheduler"
118 "userID", boost::program_options::value<int>(),
"the id of the user that created the job (only in combination with --scheduler_start)"
121 "groupID", boost::program_options::value<int>(),
"the id of the group of the user that created the job (only in combination with --scheduler_start)"
124 "jobId", boost::program_options::value<int>(),
"the id of the job (only in combination with --scheduler_start)"
127 "directory,d", boost::program_options::value<string>(),
"directory to scan (recursive)"
131 boost::program_options::positional_options_description p;
134 boost::program_options::variables_map vm;
138 boost::program_options::store(
139 boost::program_options::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
141 type = vm[
"type"].as<
unsigned>();
143 if ((vm.count(
"help") > 0) || (type > ALL_TYPES))
145 cout << desc << endl;
149 if (vm.count(
"files"))
151 fileNames = vm[
"files"].as<std::vector<string> >();
154 unsigned long verbosity = vm.count(
"verbose");
155 bool json = vm.count(
"json") > 0 ? true :
false;
156 bool ignoreFilesWithMimeType = vm.count(
"ignoreFilesWithMimeType") > 0 ? true :
false;
158 dest =
CliOptions(verbosity, type, json, ignoreFilesWithMimeType);
160 if (vm.count(
"regex"))
162 const std::vector<std::string>& userRegexesFmts = vm[
"regex"].as<vector<std::string> >();
163 for (
auto it = userRegexesFmts.begin(); it != userRegexesFmts.end(); ++it) {
167 cout <<
"cannot parse regex format : " << *it << endl;
177 if (vm.count(
"directory"))
179 if (vm.count(
"files"))
181 cout <<
"cannot pass files and directory at the same time" << endl;
182 cout << desc << endl;
186 directoryToScan = vm[
"directory"].as<std::string>();
191 catch (boost::bad_any_cast&) {
192 cout <<
"wrong parameter type" << endl;
193 cout << desc << endl;
196 catch (boost::program_options::error&)
198 cout <<
"wrong command line arguments" << endl;
199 cout << desc << endl;
211 #ifdef IDENTITY_COPYRIGHT
249 #define RGX_FMT_SEPARATOR "@@"
250 auto fmtRegex = rx::regex(
251 "(?:([[:alpha:]]+)" RGX_FMT_SEPARATOR
")?(?:([[:digit:]]+)" RGX_FMT_SEPARATOR
")?(.*)",
252 rx::regex_constants::icase
255 rx::match_results<std::string::const_iterator>
match;
256 if (rx::regex_match(regexDesc.begin(), regexDesc.end(),
match, fmtRegex))
258 std::string type(
match.length(1) > 0 ?
match.str(1) : defaultType.c_str());
260 int regId =
match.length(2) > 0 ? std::stoi(std::string(
match.str(2))) : 0;
262 if (
match.length(3) == 0)
265 std::istringstream stream;
266 stream.str(type +
"=" +
match.str(3));
298 if (!copyrightDatabaseHandler.
begin())
304 for (
auto m = matches.begin(); m != matches.end(); ++m)
313 entry.
type = m->type;
315 if (entry.
content.length() != 0)
320 copyrightDatabaseHandler.
rollback();
326 return copyrightDatabaseHandler.
commit();
340 const list<unptr::shared_ptr<scanner>>& scanners = state.
getScanners();
341 for (
auto sc = scanners.begin(); sc != scanners.end(); ++sc)
343 (*sc)->ScanString(sContent, l);
367 cout <<
"File not found " << pFileId << endl;
371 char* fileName = NULL;
373 #pragma omp critical (repo_mk_path)
388 cout <<
"PFile not found in repo " << pFileId << endl;
407 vector<unsigned long> fileIds = databaseHandler.
queryFileIdsForUpload(agentId, uploadId, ignoreFilesWithMimeType);
409 #pragma omp parallel num_threads(THREADS)
413 size_t pFileCount = fileIds.size();
415 for (
size_t it = 0; it < pFileCount; ++it)
417 unsigned long pFileId = fileIds[it];
440 const string fileName)
442 const list<unptr::shared_ptr<scanner>>& scanners = state.
getScanners();
443 list<match> matchList;
454 for (
auto sc = scanners.begin(); sc != scanners.end(); ++sc)
456 (*sc)->ScanString(
s, matchList);
459 return make_pair(
s, matchList);
470 const std::pair<
string, list<match>> resultPair,
bool &printComma)
473 #if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16))
475 Json::FastWriter jsonWriter;
479 Json::StreamWriterBuilder jsonWriter;
480 jsonWriter[
"commentStyle"] =
"None";
481 jsonWriter[
"indentation"] =
"";
484 if (resultPair.first.empty())
486 result[
"file"] = fileName;
487 result[
"results"] =
"Unable to read file";
491 list<match> resultList = resultPair.second;
493 for (
auto m : resultList)
496 j[
"start"] = m.start;
499 j[
"content"] = cleanMatch(resultPair.first, m);
502 result[
"file"] = fileName;
503 result[
"results"] = results;
506 #pragma omp critical (jsonPrinter)
517 #if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16))
520 jsonString = jsonWriter.write(result);
521 jsonString.replace(jsonString.find(
"\n"),
string(
"\n").length(),
"");
524 jsonString = Json::writeString(jsonWriter, result);
526 cout <<
" " << jsonString << flush;
536 const std::pair<
string, list<match>> resultPair)
538 if (resultPair.first.empty())
540 cout << fileName <<
" :: Unable to read file" << endl;
544 ss << fileName <<
" ::" << endl;
546 list<match> resultList = resultPair.second;
547 for (
auto m = resultList.begin(); m != resultList.end(); ++m)
549 ss <<
"\t[" << m->start <<
':' << m->end <<
':' << m->type <<
"] '"
550 << cleanMatch(resultPair.first, *m)
Store the options sent through the CLI.
unsigned int getOptType() const
Get the opt type set by CliOptions.
Manages database related requests for agent.
std::vector< unsigned long > queryFileIdsForUpload(int agentId, int uploadId, bool ignoreFilesWithMimeType)
Get the list of pfile ids on which the given agent has no findings for a given upload.
bool insertInDatabase(DatabaseEntry &entry) const
Insert a finding in database.
CopyrightDatabaseHandler spawn() const
Spawn/fork a new database handler and return it.
Holds information about state of one agent.
const CliOptions & getCliOptions() const
Get the CliOptions set by user.
void addScanner(scanner *scanner)
Add scanner to state.
const std::list< unptr::shared_ptr< scanner > > & getScanners() const
Get available scanner s.
Maps agent data to database schema.
std::string type
Type of statement found.
bool commit() const
COMMIT a transaction block in DB.
bool begin() const
BEGIN a transaction block in DB.
char * getPFileNameForFileId(unsigned long pfileId) const
Get the file name of a give pfile id.
bool rollback() const
ROLLBACK a transaction block in DB.
Implementation of scanner class for copyright.
Provides a regex scanner using predefined regexs.
Abstract class to provide interface to scanners.
void matchPFileWithLicenses(CopyrightState const &state, int agentId, unsigned long pFileId, CopyrightDatabaseHandler &databaseHandler)
Get the file contents, scan for statements and save findings to database.
scanner * makeRegexScanner(const std::string ®exDesc, const std::string &defaultType)
Make a boost regex scanner object based on regex desc and type.
bool parseCliOptions(int argc, char **argv, CliOptions &dest, std::vector< std::string > &fileNames, std::string &directoryToScan)
Parse the options sent by CLI to CliOptions object.
pair< string, list< match > > processSingleFile(const CopyrightState &state, const string fileName)
int queryAgentId(PGconn *dbConn)
Get agent id, exit if agent id is incorrect.
static void addDefaultScanners(CopyrightState &state)
Add default scanners to the agent state.
void matchFileWithLicenses(const string &sContent, unsigned long pFileId, CopyrightState const &state, int agentId, CopyrightDatabaseHandler &databaseHandler)
Scan a given file with all available scanners and save findings to database.
int writeARS(int agentId, int arsId, int uploadId, int success, const fo::DbManager &dbManager)
Call C function fo_WriteARS() and translate the arguments.
CopyrightState getState(CliOptions &&cliOptions)
Create a new state for the current agent based on CliOptions.
bool processUploadId(const CopyrightState &state, int agentId, int uploadId, CopyrightDatabaseHandler &databaseHandler, bool ignoreFilesWithMimeType)
Process a given upload id, scan from statements and add to database.
void appendToJson(const std::string fileName, const std::pair< string, list< match >> resultPair, bool &printComma)
void printResultToStdout(const std::string fileName, const std::pair< string, list< match >> resultPair)
void bail(int exitval)
Disconnect with scheduler returning an error code and exit.
bool saveToDatabase(const string &s, const list< match > &matches, unsigned long pFileId, int agentId, const CopyrightDatabaseHandler ©rightDatabaseHandler)
Save findings to the database if agent was called by scheduler.
int s
The socket that the CLI will use to communicate.
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
FUNCTION int fo_GetAgentKey(PGconn *pgConn, const char *agent_name, long Upload_pk, const char *rev, const char *agent_desc)
Get the latest enabled agent key (agent_pk) from the database.
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
void fo_scheduler_disconnect(int retcode)
Disconnect the scheduler connection.
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
fo_dbManager * dbManager
fo_dbManager object
bool ReadFileToString(const string &fileName, string &out)
Utility: read file to string from scanners.h.
Store the results of a regex match.