12 #include "copyrightUtils.hpp" 
   13 #include <boost/program_options.hpp> 
   27   char* COMMIT_HASH = 
fo_sysconfig(AGENT_NAME, 
"COMMIT_HASH");
 
   30   if (!asprintf(&agentRevision, 
"%s.%s", VERSION, COMMIT_HASH))
 
   36     AGENT_NAME, 0, agentRevision, AGENT_DESC);
 
   55   return fo_WriteARS(
dbManager.getConnection(), arsId, uploadId, agentId, AGENT_ARS, NULL, success);
 
   79   std::vector<std::string>& fileNames, std::string& directoryToScan)
 
   83   boost::program_options::options_description desc(IDENTITY 
": recognized options");
 
   85         (
"help,h", 
"shows help")
 
   88           boost::program_options::value<unsigned>(&type)
 
   89             ->default_value(ALL_TYPES),
 
   90           "type of regex to try" 
   93           "verbose,v", 
"increase verbosity" 
   97           boost::program_options::value<vector<string> >(),
 
   98           "user defined Regex to search: [{name=cli}@@][{matchingGroup=0}@@]{regex} e.g. 'linux@@1@@(linus) torvalds'" 
  102           boost::program_options::value< vector<string> >(),
 
  106           "json,J", 
"output JSON" 
  109           "ignoreFilesWithMimeType,I", 
"ignoreFilesWithMimeType" 
  112           "config,c", boost::program_options::value<string>(), 
"path to the sysconfigdir" 
  115           "scheduler_start", 
"specifies, that the command was called by the scheduler" 
  118           "userID", boost::program_options::value<int>(), 
"the id of the user that created the job (only in combination with --scheduler_start)" 
  121           "groupID", boost::program_options::value<int>(), 
"the id of the group of the user that created the job (only in combination with --scheduler_start)" 
  124           "jobId", boost::program_options::value<int>(), 
"the id of the job (only in combination with --scheduler_start)" 
  127           "directory,d", boost::program_options::value<string>(), 
"directory to scan (recursive)" 
  131   boost::program_options::positional_options_description p;
 
  134   boost::program_options::variables_map vm;
 
  138     boost::program_options::store(
 
  139       boost::program_options::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
 
  141     type = vm[
"type"].as<
unsigned>();
 
  143     if ((vm.count(
"help") > 0) || (type > ALL_TYPES))
 
  145       cout << desc << endl;
 
  149     if (vm.count(
"files"))
 
  151       fileNames = vm[
"files"].as<std::vector<string> >();
 
  154     unsigned long verbosity = vm.count(
"verbose");
 
  155     bool json = vm.count(
"json") > 0 ? true : 
false;
 
  156     bool ignoreFilesWithMimeType = vm.count(
"ignoreFilesWithMimeType") > 0 ? true : 
false;
 
  158     dest = 
CliOptions(verbosity, type, json, ignoreFilesWithMimeType);
 
  160     if (vm.count(
"regex"))
 
  162       const std::vector<std::string>& userRegexesFmts = vm[
"regex"].as<vector<std::string> >();
 
  163       for (
auto it = userRegexesFmts.begin(); it != userRegexesFmts.end(); ++it) {
 
  167           cout << 
"cannot parse regex format : " << *it << endl;
 
  177     if (vm.count(
"directory"))
 
  179       if (vm.count(
"files"))
 
  181         cout << 
"cannot pass files and directory at the same time" << endl;
 
  182         cout << desc << endl;
 
  186       directoryToScan = vm[
"directory"].as<std::string>();
 
  191   catch (boost::bad_any_cast&) {
 
  192     cout << 
"wrong parameter type" << endl;
 
  193     cout << desc << endl;
 
  196   catch (boost::program_options::error&)
 
  198     cout << 
"wrong command line arguments" << endl;
 
  199     cout << desc << endl;
 
  211 #ifdef IDENTITY_COPYRIGHT 
  249   #define RGX_FMT_SEPARATOR "@@" 
  250   auto fmtRegex = rx::regex(
 
  251     "(?:([[:alpha:]]+)" RGX_FMT_SEPARATOR 
")?(?:([[:digit:]]+)" RGX_FMT_SEPARATOR 
")?(.*)",
 
  252     rx::regex_constants::icase
 
  255   rx::match_results<std::string::const_iterator> 
match;
 
  256   if (rx::regex_match(regexDesc.begin(), regexDesc.end(), 
match, fmtRegex))
 
  258     std::string type(
match.length(1) > 0 ? 
match.str(1) : defaultType.c_str());
 
  260     int regId = 
match.length(2) > 0 ? std::stoi(std::string(
match.str(2))) : 0;
 
  262     if (
match.length(3) == 0)
 
  265     std::istringstream stream;
 
  266     stream.str(type + 
"=" + 
match.str(3));
 
  298   if (!copyrightDatabaseHandler.
begin())
 
  304   for (
auto m = matches.begin(); m != matches.end(); ++m)
 
  313     entry.
type = m->type;
 
  315     if (entry.
content.length() != 0)
 
  320         copyrightDatabaseHandler.
rollback();
 
  326   return copyrightDatabaseHandler.
commit();
 
  340   const list<unptr::shared_ptr<scanner>>& scanners = state.
getScanners();
 
  341   for (
auto sc = scanners.begin(); sc != scanners.end(); ++sc)
 
  343     (*sc)->ScanString(sContent, l);
 
  367     cout << 
"File not found " << pFileId << endl;
 
  371   char* fileName = NULL;
 
  373 #pragma omp critical (repo_mk_path) 
  388     cout << 
"PFile not found in repo " << pFileId << endl;
 
  407   vector<unsigned long> fileIds = databaseHandler.
queryFileIdsForUpload(agentId, uploadId, ignoreFilesWithMimeType);
 
  409 #pragma omp parallel num_threads(THREADS) 
  413     size_t pFileCount = fileIds.size();
 
  415     for (
size_t it = 0; it < pFileCount; ++it)
 
  417       unsigned long pFileId = fileIds[it];
 
  440   const string fileName)
 
  442   const list<unptr::shared_ptr<scanner>>& scanners = state.
getScanners();
 
  443   list<match> matchList;
 
  454     for (
auto sc = scanners.begin(); sc != scanners.end(); ++sc)
 
  456       (*sc)->ScanString(
s, matchList);
 
  459   return make_pair(
s, matchList);
 
  470     const std::pair<
string, list<match>> resultPair, 
bool &printComma)
 
  473 #if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16)) 
  475   Json::FastWriter jsonWriter;
 
  479   Json::StreamWriterBuilder jsonWriter;
 
  480   jsonWriter[
"commentStyle"] = 
"None";
 
  481   jsonWriter[
"indentation"] = 
"";
 
  484   if (resultPair.first.empty())
 
  486     result[
"file"] = fileName;
 
  487     result[
"results"] = 
"Unable to read file";
 
  491     list<match> resultList = resultPair.second;
 
  493     for (
auto m : resultList)
 
  496       j[
"start"] = m.start;
 
  499       j[
"content"] = cleanMatch(resultPair.first, m);
 
  502     result[
"file"] = fileName;
 
  503     result[
"results"] = results;
 
  506 #pragma omp critical (jsonPrinter) 
  517 #if JSONCPP_VERSION_HEXA < ((1 << 24) | (4 << 16)) 
  520     jsonString = jsonWriter.write(result);
 
  521     jsonString.replace(jsonString.find(
"\n"), 
string(
"\n").length(), 
"");
 
  524     jsonString = Json::writeString(jsonWriter, result);
 
  526     cout << 
"  " << jsonString << flush;
 
  536     const std::pair<
string, list<match>> resultPair)
 
  538   if (resultPair.first.empty())
 
  540     cout << fileName << 
" :: Unable to read file" << endl;
 
  544   ss << fileName << 
" ::" << endl;
 
  546   list<match> resultList = resultPair.second;
 
  547   for (
auto m = resultList.begin();  m != resultList.end(); ++m)
 
  549     ss << 
"\t[" << m->start << 
':' << m->end << 
':' << m->type << 
"] '" 
  550        << cleanMatch(resultPair.first, *m)
 
Store the options sent through the CLI.
 
unsigned int getOptType() const
Get the opt type set by CliOptions.
 
Manages database related requests for agent.
 
std::vector< unsigned long > queryFileIdsForUpload(int agentId, int uploadId, bool ignoreFilesWithMimeType)
Get the list of pfile ids on which the given agent has no findings for a given upload.
 
bool insertInDatabase(DatabaseEntry &entry) const
Insert a finding in database.
 
CopyrightDatabaseHandler spawn() const
Spawn/fork a new database handler and return it.
 
Holds information about state of one agent.
 
const CliOptions & getCliOptions() const
Get the CliOptions set by user.
 
void addScanner(scanner *scanner)
Add scanner to state.
 
const std::list< unptr::shared_ptr< scanner > > & getScanners() const
Get available scanner s.
 
Maps agent data to database schema.
 
std::string type
Type of statement found.
 
bool commit() const
COMMIT a transaction block in DB.
 
bool begin() const
BEGIN a transaction block in DB.
 
char * getPFileNameForFileId(unsigned long pfileId) const
Get the file name of a give pfile id.
 
bool rollback() const
ROLLBACK a transaction block in DB.
 
Implementation of scanner class for copyright.
 
Provides a regex scanner using predefined regexs.
 
Abstract class to provide interface to scanners.
 
void matchPFileWithLicenses(CopyrightState const &state, int agentId, unsigned long pFileId, CopyrightDatabaseHandler &databaseHandler)
Get the file contents, scan for statements and save findings to database.
 
scanner * makeRegexScanner(const std::string ®exDesc, const std::string &defaultType)
Make a boost regex scanner object based on regex desc and type.
 
bool parseCliOptions(int argc, char **argv, CliOptions &dest, std::vector< std::string > &fileNames, std::string &directoryToScan)
Parse the options sent by CLI to CliOptions object.
 
pair< string, list< match > > processSingleFile(const CopyrightState &state, const string fileName)
 
int queryAgentId(PGconn *dbConn)
Get agent id, exit if agent id is incorrect.
 
static void addDefaultScanners(CopyrightState &state)
Add default scanners to the agent state.
 
void matchFileWithLicenses(const string &sContent, unsigned long pFileId, CopyrightState const &state, int agentId, CopyrightDatabaseHandler &databaseHandler)
Scan a given file with all available scanners and save findings to database.
 
int writeARS(int agentId, int arsId, int uploadId, int success, const fo::DbManager &dbManager)
Call C function fo_WriteARS() and translate the arguments.
 
CopyrightState getState(CliOptions &&cliOptions)
Create a new state for the current agent based on CliOptions.
 
bool processUploadId(const CopyrightState &state, int agentId, int uploadId, CopyrightDatabaseHandler &databaseHandler, bool ignoreFilesWithMimeType)
Process a given upload id, scan from statements and add to database.
 
void appendToJson(const std::string fileName, const std::pair< string, list< match >> resultPair, bool &printComma)
 
void printResultToStdout(const std::string fileName, const std::pair< string, list< match >> resultPair)
 
void bail(int exitval)
Disconnect with scheduler returning an error code and exit.
 
bool saveToDatabase(const string &s, const list< match > &matches, unsigned long pFileId, int agentId, const CopyrightDatabaseHandler ©rightDatabaseHandler)
Save findings to the database if agent was called by scheduler.
 
int s
The socket that the CLI will use to communicate.
 
FUNCTION int fo_WriteARS(PGconn *pgConn, int ars_pk, int upload_pk, int agent_pk, const char *tableName, const char *ars_status, int ars_success)
Write ars record.
 
FUNCTION int fo_GetAgentKey(PGconn *pgConn, const char *agent_name, long Upload_pk, const char *rev, const char *agent_desc)
Get the latest enabled agent key (agent_pk) from the database.
 
char * fo_RepMkPath(const char *Type, char *Filename)
Given a filename, construct the full path to the file.
 
void fo_scheduler_disconnect(int retcode)
Disconnect the scheduler connection.
 
void fo_scheduler_heart(int i)
This function must be called by agents to let the scheduler know they are alive and how many items th...
 
char * fo_sysconfig(const char *sectionname, const char *variablename)
gets a system configuration variable from the configuration data.
 
fo_dbManager * dbManager
fo_dbManager object
 
bool ReadFileToString(const string &fileName, string &out)
Utility: read file to string from scanners.h.
 
Store the results of a regex match.