FOSSology  4.5.1
Open Source License Compliance by Open Source Software
ReportImportAgent.php
1 <?php
2 /*
3  SPDX-FileCopyrightText: © 2015-2017,2024 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 namespace Fossology\ReportImport;
8 
19 
20 require_once 'SpdxTwoImportSource.php';
21 require_once 'SpdxThreeImportSource.php';
22 require_once 'XmlImportSource.php';
23 require_once 'ReportImportSink.php';
24 require_once 'ReportImportHelper.php';
25 require_once 'ReportImportConfiguration.php';
26 
27 require_once 'version.php';
28 require_once 'services.php';
29 
30 class ReportImportAgent extends Agent
31 {
32  const REPORT_KEY = "report";
33  const ACLA_KEY = "addConcludedAsDecisions";
34 
36  private $uploadDao;
38  private $userDao;
40  protected $dbManager;
42  protected $licenseDao;
44  protected $clearingDao;
46  private $copyrightDao;
47 
48  protected $agent_pk;
49 
50  function __construct()
51  {
52  parent::__construct(AGENT_REPORTIMPORT_NAME, AGENT_REPORTIMPORT_VERSION, AGENT_REPORTIMPORT_REV);
53  $this->uploadDao = $this->container->get('dao.upload');
54  $this->dbManager = $this->container->get('db.manager');
55  $this->userDao = $this->container->get('dao.user');
56  $this->licenseDao = $this->container->get('dao.license');
57  $this->clearingDao = $this->container->get('dao.clearing');
58  $this->copyrightDao = $this->container->get('dao.copyright');
59  $this->agentSpecifLongOptions[] = self::REPORT_KEY.':';
60  $this->agentSpecifLongOptions[] = self::ACLA_KEY.':';
61 
62  $this->setAgent_PK();
63  }
64 
68  private function setAgent_PK()
69  {
70  // should be already set in $this->agentId?
71  $row = $this->dbManager->getSingleRow(
72  "SELECT agent_pk FROM agent WHERE agent_name = $1 order by agent_ts desc limit 1",
73  array(AGENT_REPORTIMPORT_NAME), __METHOD__."select"
74  );
75 
76  if ($row === false) {
77  throw new \Exception("agent_pk could not be determined");
78  }
79  $this->agent_pk = intval($row['agent_pk']);
80  }
81 
88  static private function preWorkOnArgsFlp(&$args,$longArgsKey)
89  {
90  if (is_array($args) &&
91  array_key_exists($longArgsKey, $args)){
92  echo "DEBUG: unrefined \$longArgs are: ".$args[$longArgsKey]."\n";
93  $chunks = explode(" --", $args[$longArgsKey]);
94  if(sizeof($chunks) > 1)
95  {
96  $args[$longArgsKey] = $chunks[0];
97  foreach(array_slice($chunks, 1) as $chunk)
98  {
99  if (strpos($chunk, '=') !== false)
100  {
101  list($key, $value) = explode('=', $chunk, 2);
102  $args[$key] = $value;
103  }
104  else
105  {
106  $args[$chunk] = true;
107  }
108  }
109  }
110  }
111  }
112 
113  function processUploadId($uploadId)
114  {
115  $this->heartbeat(0);
116 
117  self::preWorkOnArgsFlp($this->args, self::REPORT_KEY);
118 
119  $reportPre = array_key_exists(self::REPORT_KEY,$this->args) ? $this->args[self::REPORT_KEY] : "";
120  global $SysConf;
121  $fileBase = $SysConf['FOSSOLOGY']['path'] . "/ReportImport/";
122  $report = $fileBase . $reportPre;
123  if(empty($reportPre) || !is_readable($report)) {
124  echo "No report was uploaded\n";
125  echo "Maybe the permissions on " . htmlspecialchars($fileBase) . " are not sufficient\n";
126  return false;
127  }
128 
129  $this->dbManager->insertTableRow('reportgen',
130  array('upload_fk'=>$uploadId, 'job_fk'=>$this->jobId, 'filepath'=>$report),
131  __METHOD__.'addToReportgen');
132 
133  $configuration = new ReportImportConfiguration($this->args);
134 
135  $this->walkAllFiles($report, $uploadId, $configuration);
136 
137  return true;
138  }
139 
140  private function getItemTreeBounds($upload_pk)
141  {
142  $uploadtreeTablename = GetUploadtreeTableName($upload_pk);
143 
144  $uploadtreeRec = $this->dbManager->getSingleRow(
145  'SELECT uploadtree_pk FROM uploadtree WHERE parent IS NULL AND upload_fk=$1',
146  array($upload_pk),
147  __METHOD__.'.find.uploadtree.to.use.in.browse.link');
148  $uploadtree_pk = $uploadtreeRec['uploadtree_pk'];
150  return $this->uploadDao->getItemTreeBounds($uploadtree_pk, $uploadtreeTablename);
151  }
152 
153  static private function getEntries($fileId, $fileName, &$pfilePerFileName, &$hashMap=NULL, &$pfilesPerHash=NULL, $hashAlgo="sha1")
154  {
155  $pfilesByFilename = self::getEntriesForFilename($fileName, $pfilePerFileName);
156 
157  if (($pfilesByFilename !== null && sizeof($pfilesByFilename) > 0))
158  {
159  if ( $hashMap !== null && sizeof($hashMap) > 0 )
160  {
161  $pfiles = array();
162  foreach ($pfilesByFilename as $pfile)
163  {
164  if (strtolower($pfile[$hashAlgo]) !== strtolower($hashMap[$hashAlgo]))
165  {
166  print "INFO: the file with fileName=[$fileName] does not match the hash of pfile_pk=[" . $pfile['pfile_pk'] . "] and uploadtree_pk=[" . $pfile['uploadtree_pk'] . "]\n";
167  }
168  else
169  {
170  $pfiles[] = $pfile;
171  }
172  }
173  return $pfiles;
174  }
175  else
176  {
177  return $pfilesByFilename;
178  }
179  }
180 
181  if ($pfilesPerHash !== null && sizeof($pfilesPerHash) > 0 &&
182  $hashMap !== null && sizeof($hashMap) > 0 )
183  {
184  return self::getEntriesForHash($hashMap, $pfilesPerHash, 'sha1');
185  }
186 
187  return array();
188  }
189 
190  static private function getEntriesForFilename($filename, &$pfilesPerFileName)
191  {
192  if(array_key_exists($filename, $pfilesPerFileName))
193  {
194  return array($pfilesPerFileName[$filename]);
195  }
196  # Allow matching "./README.MD" with "pack.tar.gz/pack.tar/README.MD" by
197  # matching "/README.MD" with "/README.MD".
198  $length = strlen($filename) - 1;
199  $fileWithoutDot = substr($filename, -$length);
200  if($length > 3)
201  {
202  foreach(array_keys($pfilesPerFileName) as $key)
203  {
204  if(substr($key, -$length) === $fileWithoutDot)
205  {
206  return array($pfilesPerFileName[$key]);
207  }
208  }
209  }
210  return array();
211  }
212 
213  static private function getEntriesForHash(&$hashMap, &$pfilesPerHash, $hashAlgo)
214  {
215  if(!array_key_exists($hashAlgo, $hashMap))
216  {
217  return array();
218  }
219 
220  $hash = strtolower($hashMap[$hashAlgo]);
221  if(!array_key_exists($hash, $pfilesPerHash))
222  {
223  return array();
224  }
225  return $pfilesPerHash[$hash];
226  }
227 
233  private function getImportSource($reportFilename)
234  {
235 
236  if(StringOperation::stringEndsWith($reportFilename, ".rdf") ||
237  StringOperation::stringEndsWith($reportFilename, ".rdf.xml") ||
238  StringOperation::stringEndsWith($reportFilename, ".ttl")){
243  $parse = new SpdxTwoImportSource($reportFilename);
244  $version = $parse->getVersion();
245  if($version == "2.2" || $version == "2.3"){
246  $importSource = new SpdxTwoImportSource($reportFilename);
247  if($importSource->parse()) {
248  return $importSource;
249  }
250  }
251  else{
252  $importSource = new SpdxThreeImportSource($reportFilename);
253  if($importSource->parse()) {
254  return $importSource;
255  }
256  }
257  }
258 
259  if (StringOperation::stringEndsWith($reportFilename, ".xml")) {
260  $importSource = new XmlImportSource($reportFilename);
261  if($importSource->parse()) {
262  return $importSource;
263  }
264  }
265 
266  error_log("ERROR: can not handle report");
267  throw new \Exception("unsupported report type with filename: $reportFilename");
268  }
269 
273  public function walkAllFiles($reportFilename, $upload_pk, $configuration)
274  {
276  $source = $this->getImportSource($reportFilename);
277  if ($source === NULL) {
278  return;
279  }
280 
282  $sink = new ReportImportSink($this->agent_pk, $this->userDao,
283  $this->licenseDao, $this->clearingDao, $this->copyrightDao,
284  $this->dbManager, $this->groupId, $this->userId, $this->jobId,
285  $configuration);
286 
287  // Prepare data from DB
288  $itemTreeBounds = $this->uploadDao->getParentItemBounds($upload_pk);
289  $pfilePerFileName = $this->uploadDao->getPFileDataPerFileName($itemTreeBounds);
290  $pfilesPerHash = $this->uploadDao->getPFilesDataPerHashAlgo($itemTreeBounds, 'sha1');
291 
292  foreach ($source->getAllFiles() as $fileId => $fileName)
293  {
294  $hashMap = NULL;
295  if ($pfilesPerHash !== NULL && sizeof($pfilesPerHash) > 0)
296  {
297  $hashMap = $source->getHashesMap($fileId);
298  }
299 
300  $pfiles = self::getEntries($fileId,
301  $fileName, $pfilePerFileName,
302  $hashMap, $pfilesPerHash, 'sha1');
303 
304  if ($pfiles === null || sizeof($pfiles) === 0) {
305  print "WARN: no match for fileId=[".$fileId."] with filename=[".$fileName."]\n";
306  continue;
307  }
308 
309  $this->heartbeat(sizeof($pfiles));
310 
311  $data = $source->getDataForFile($fileId)
312  ->setPfiles($pfiles);
313  $sink->handleData($data);
314  }
315  }
316 }
int agent_pk
Definition: agent.h:74
Structure of an Agent with all required parameters.
Definition: Agent.php:41
heartbeat($newProcessed)
Send hear beat to the scheduler.
Definition: Agent.php:203
Fossology exception.
Definition: Exception.php:15
static stringEndsWith($haystack, $needle)
static preWorkOnArgsFlp(&$args, $longArgsKey)
processUploadId($uploadId)
Given an upload ID, process the items in it.
FUNCTION char * GetUploadtreeTableName(PGconn *pgConn, int upload_pk)
Get the uploadtree table name for this upload_pk If upload_pk does not exist, return "uploadtree".
Definition: libfossagent.c:414
int jobId
The id of the job.
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:16
list_t type structure used to keep various lists. (e.g. there are multiple lists).
Definition: nomos.h:308