FOSSology  4.5.1
Open Source License Compliance by Open Source Software
ReportImportAgent.php
1 <?php
2 /*
3  SPDX-FileCopyrightText: © 2015-2017,2024 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 namespace Fossology\ReportImport;
8 
19 
20 require_once 'SpdxTwoImportSource.php';
21 require_once 'SpdxThreeImportSource.php';
22 require_once 'XmlImportSource.php';
23 require_once 'ReportImportSink.php';
24 require_once 'ReportImportHelper.php';
25 require_once 'ReportImportConfiguration.php';
26 
27 require_once 'version.php';
28 require_once 'services.php';
29 
30 class ReportImportAgent extends Agent
31 {
32  const REPORT_KEY = "report";
33  const ACLA_KEY = "addConcludedAsDecisions";
34  const ACLAO_KEY = "addConcludedAsDecisionsOverwrite";
35  const ACLATBD_KEY = "addConcludedAsDecisionsTBD";
36  const ALIFI_KEY = "addLicenseInfoFromInfoInFile";
37  const ALFC_KEY = "addLicenseInfoFromConcluded";
38  const ANLA_KEY = "addNewLicensesAs";
39  const LMATCH_KEY = "licenseMatch";
40  const COPYRIGHTS_KEY = "addCopyrights";
41 
43  private $uploadDao;
45  private $userDao;
47  protected $dbManager;
49  protected $licenseDao;
51  protected $clearingDao;
53  private $copyrightDao;
54 
55  protected $agent_pk;
56 
57  function __construct()
58  {
59  parent::__construct(AGENT_REPORTIMPORT_NAME, AGENT_REPORTIMPORT_VERSION, AGENT_REPORTIMPORT_REV);
60  $this->uploadDao = $this->container->get('dao.upload');
61  $this->dbManager = $this->container->get('db.manager');
62  $this->userDao = $this->container->get('dao.user');
63  $this->licenseDao = $this->container->get('dao.license');
64  $this->clearingDao = $this->container->get('dao.clearing');
65  $this->copyrightDao = $this->container->get('dao.copyright');
66 
67  $this->agentSpecifLongOptions[] = self::REPORT_KEY.':';
68  $this->agentSpecifLongOptions[] = self::ACLA_KEY.':';
69  $this->agentSpecifLongOptions[] = self::ACLAO_KEY.':';
70  $this->agentSpecifLongOptions[] = self::ACLATBD_KEY.':';
71  $this->agentSpecifLongOptions[] = self::ALIFI_KEY.':';
72  $this->agentSpecifLongOptions[] = self::ALFC_KEY.':';
73  $this->agentSpecifLongOptions[] = self::ANLA_KEY.':';
74  $this->agentSpecifLongOptions[] = self::LMATCH_KEY.':';
75  $this->agentSpecifLongOptions[] = self::COPYRIGHTS_KEY.':';
76 
77  $this->setAgent_PK();
78  }
79 
83  private function setAgent_PK()
84  {
85  // should be already set in $this->agentId?
86  $row = $this->dbManager->getSingleRow(
87  "SELECT agent_pk FROM agent WHERE agent_name = $1 order by agent_ts desc limit 1",
88  array(AGENT_REPORTIMPORT_NAME), __METHOD__."select"
89  );
90 
91  if ($row === false) {
92  throw new \Exception("agent_pk could not be determined");
93  }
94  $this->agent_pk = intval($row['agent_pk']);
95  }
96 
97  function processUploadId($uploadId)
98  {
99  $this->heartbeat(0);
100 
101  $reportPre = array_key_exists(self::REPORT_KEY,$this->args) ? $this->args[self::REPORT_KEY] : "";
102  $reportPre = trim($reportPre, "\"'");
103  global $SysConf;
104  $fileBase = $SysConf['FOSSOLOGY']['path'] . "/ReportImport/";
105  $report = $fileBase . $reportPre;
106  if(empty($reportPre) || !is_readable($report)) {
107  echo "No report was uploaded\n";
108  echo "Maybe the permissions on " . htmlspecialchars($fileBase) . " are not sufficient\n";
109  return false;
110  }
111 
112  $this->dbManager->insertTableRow('reportgen',
113  array('upload_fk'=>$uploadId, 'job_fk'=>$this->jobId, 'filepath'=>$report),
114  __METHOD__.'addToReportgen');
115 
116  $configuration = new ReportImportConfiguration($this->args);
117 
118  $this->walkAllFiles($report, $uploadId, $configuration);
119 
120  return true;
121  }
122 
123  private function getItemTreeBounds($upload_pk)
124  {
125  $uploadtreeTablename = GetUploadtreeTableName($upload_pk);
126 
127  $uploadtreeRec = $this->dbManager->getSingleRow(
128  'SELECT uploadtree_pk FROM uploadtree WHERE parent IS NULL AND upload_fk=$1',
129  array($upload_pk),
130  __METHOD__.'.find.uploadtree.to.use.in.browse.link');
131  $uploadtree_pk = $uploadtreeRec['uploadtree_pk'];
133  return $this->uploadDao->getItemTreeBounds($uploadtree_pk, $uploadtreeTablename);
134  }
135 
136  static private function getEntries($fileId, $fileName, &$pfilePerFileName, &$hashMap=NULL, &$pfilesPerHash=NULL, $hashAlgo="sha1")
137  {
138  $pfilesByFilename = self::getEntriesForFilename($fileName, $pfilePerFileName);
139 
140  if (($pfilesByFilename !== null && sizeof($pfilesByFilename) > 0))
141  {
142  if ( $hashMap !== null && sizeof($hashMap) > 0 )
143  {
144  $pfiles = array();
145  foreach ($pfilesByFilename as $pfile)
146  {
147  if (strtolower($pfile[$hashAlgo]) !== strtolower($hashMap[$hashAlgo]))
148  {
149  print "INFO: the file with fileName=[$fileName] does not match the hash of pfile_pk=[" . $pfile['pfile_pk'] . "] and uploadtree_pk=[" . $pfile['uploadtree_pk'] . "]\n";
150  }
151  else
152  {
153  $pfiles[] = $pfile;
154  }
155  }
156  return $pfiles;
157  }
158  else
159  {
160  return $pfilesByFilename;
161  }
162  }
163 
164  if ($pfilesPerHash !== null && sizeof($pfilesPerHash) > 0 &&
165  $hashMap !== null && sizeof($hashMap) > 0 )
166  {
167  return self::getEntriesForHash($hashMap, $pfilesPerHash, 'sha1');
168  }
169 
170  return array();
171  }
172 
173  static private function getEntriesForFilename($filename, &$pfilesPerFileName)
174  {
175  if(array_key_exists($filename, $pfilesPerFileName))
176  {
177  return array($pfilesPerFileName[$filename]);
178  }
179  # Allow matching "./README.MD" with "pack.tar.gz/pack.tar/README.MD" by
180  # matching "/README.MD" with "/README.MD".
181  $length = strlen($filename) - 1;
182  $fileWithoutDot = substr($filename, -$length);
183  if($length > 3)
184  {
185  foreach(array_keys($pfilesPerFileName) as $key)
186  {
187  if(substr($key, -$length) === $fileWithoutDot)
188  {
189  return array($pfilesPerFileName[$key]);
190  }
191  }
192  }
193  return array();
194  }
195 
196  static private function getEntriesForHash(&$hashMap, &$pfilesPerHash, $hashAlgo)
197  {
198  if(!array_key_exists($hashAlgo, $hashMap))
199  {
200  return array();
201  }
202 
203  $hash = strtolower($hashMap[$hashAlgo]);
204  if(!array_key_exists($hash, $pfilesPerHash))
205  {
206  return array();
207  }
208  return $pfilesPerHash[$hash];
209  }
210 
216  private function getImportSource($reportFilename)
217  {
218 
219  if(StringOperation::stringEndsWith($reportFilename, ".rdf") ||
220  StringOperation::stringEndsWith($reportFilename, ".rdf.xml") ||
221  StringOperation::stringEndsWith($reportFilename, ".ttl")){
226  $parse = new SpdxTwoImportSource($reportFilename);
227  $version = $parse->getVersion();
228  if($version == "2.2" || $version == "2.3"){
229  $importSource = new SpdxTwoImportSource($reportFilename);
230  if($importSource->parse()) {
231  return $importSource;
232  }
233  }
234  else{
235  $importSource = new SpdxThreeImportSource($reportFilename);
236  if($importSource->parse()) {
237  return $importSource;
238  }
239  }
240  }
241 
242  if (StringOperation::stringEndsWith($reportFilename, ".xml")) {
243  $importSource = new XmlImportSource($reportFilename);
244  if($importSource->parse()) {
245  return $importSource;
246  }
247  }
248 
249  error_log("ERROR: can not handle report");
250  throw new \Exception("unsupported report type with filename: $reportFilename");
251  }
252 
256  public function walkAllFiles($reportFilename, $upload_pk, $configuration)
257  {
259  $source = $this->getImportSource($reportFilename);
260  if ($source === NULL) {
261  return;
262  }
263 
265  $sink = new ReportImportSink($this->agent_pk, $this->userDao,
266  $this->licenseDao, $this->clearingDao, $this->copyrightDao,
267  $this->dbManager, $this->groupId, $this->userId, $this->jobId,
268  $configuration);
269 
270  // Prepare data from DB
271  $itemTreeBounds = $this->uploadDao->getParentItemBounds($upload_pk);
272  $pfilePerFileName = $this->uploadDao->getPFileDataPerFileName($itemTreeBounds);
273  $pfilesPerHash = $this->uploadDao->getPFilesDataPerHashAlgo($itemTreeBounds, 'sha1');
274 
275  foreach ($source->getAllFiles() as $fileId => $fileName)
276  {
277  $hashMap = NULL;
278  if ($pfilesPerHash !== NULL && sizeof($pfilesPerHash) > 0)
279  {
280  $hashMap = $source->getHashesMap($fileId);
281  }
282 
283  $pfiles = self::getEntries($fileId,
284  $fileName, $pfilePerFileName,
285  $hashMap, $pfilesPerHash, 'sha1');
286 
287  if ($pfiles === null || sizeof($pfiles) === 0) {
288  print "WARN: no match for fileId=[".$fileId."] with filename=[".$fileName."]\n";
289  continue;
290  }
291 
292  $this->heartbeat(sizeof($pfiles));
293 
294  $data = $source->getDataForFile($fileId)
295  ->setPfiles($pfiles);
296  $sink->handleData($data);
297  }
298  }
299 }
int agent_pk
Definition: agent.h:74
Structure of an Agent with all required parameters.
Definition: Agent.php:41
heartbeat($newProcessed)
Send hear beat to the scheduler.
Definition: Agent.php:203
Fossology exception.
Definition: Exception.php:15
static stringEndsWith($haystack, $needle)
processUploadId($uploadId)
Given an upload ID, process the items in it.
char * trim(char *ptext)
Trimming whitespace.
Definition: fossconfig.c:690
FUNCTION char * GetUploadtreeTableName(PGconn *pgConn, int upload_pk)
Get the uploadtree table name for this upload_pk If upload_pk does not exist, return "uploadtree".
Definition: libfossagent.c:414
int jobId
The id of the job.
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:16