FOSSology  4.4.0
Open Source License Compliance by Open Source Software
ReportImportAgent.php
1 <?php
2 /*
3  SPDX-FileCopyrightText: © 2015-2017 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 namespace Fossology\ReportImport;
8 
17 
18 require_once 'SpdxTwoImportSource.php';
19 require_once 'XmlImportSource.php';
20 require_once 'ReportImportSink.php';
21 require_once 'ReportImportHelper.php';
22 require_once 'ReportImportConfiguration.php';
23 
24 require_once 'version.php';
25 require_once 'services.php';
26 
27 class ReportImportAgent extends Agent
28 {
29  const REPORT_KEY = "report";
30  const ACLA_KEY = "addConcludedAsDecisions";
31 
33  private $uploadDao;
35  private $userDao;
37  protected $dbManager;
39  protected $licenseDao;
41  protected $clearingDao;
43  private $copyrightDao;
44 
45  protected $agent_pk;
46 
47  function __construct()
48  {
49  parent::__construct(AGENT_REPORTIMPORT_NAME, AGENT_REPORTIMPORT_VERSION, AGENT_REPORTIMPORT_REV);
50  $this->uploadDao = $this->container->get('dao.upload');
51  $this->dbManager = $this->container->get('db.manager');
52  $this->userDao = $this->container->get('dao.user');
53  $this->licenseDao = $this->container->get('dao.license');
54  $this->clearingDao = $this->container->get('dao.clearing');
55  $this->copyrightDao = $this->container->get('dao.copyright');
56  $this->agentSpecifLongOptions[] = self::REPORT_KEY.':';
57  $this->agentSpecifLongOptions[] = self::ACLA_KEY.':';
58 
59  $this->setAgent_PK();
60  }
61 
62  private function setAgent_PK()
63  {
64  // should be already set in $this->agentId?
65  $row = $this->dbManager->getSingleRow(
66  "SELECT agent_pk FROM agent WHERE agent_name = $1 order by agent_ts desc limit 1",
67  array(AGENT_REPORTIMPORT_NAME), __METHOD__."select"
68  );
69 
70  if ($row === false)
71  {
72  throw new \Exception("agent_pk could not be determined");
73  }
74  $this->agent_pk = intval($row['agent_pk']);
75  }
76 
83  static private function preWorkOnArgsFlp(&$args,$longArgsKey)
84  {
85  if (is_array($args) &&
86  array_key_exists($longArgsKey, $args)){
87  echo "DEBUG: unrefined \$longArgs are: ".$args[$longArgsKey]."\n";
88  $chunks = explode(" --", $args[$longArgsKey]);
89  if(sizeof($chunks) > 1)
90  {
91  $args[$longArgsKey] = $chunks[0];
92  foreach(array_slice($chunks, 1) as $chunk)
93  {
94  if (strpos($chunk, '=') !== false)
95  {
96  list($key, $value) = explode('=', $chunk, 2);
97  $args[$key] = $value;
98  }
99  else
100  {
101  $args[$chunk] = true;
102  }
103  }
104  }
105  }
106  }
107 
108  function processUploadId($uploadId)
109  {
110  $this->heartbeat(0);
111 
112  self::preWorkOnArgsFlp($this->args, self::REPORT_KEY);
113 
114  $reportPre = array_key_exists(self::REPORT_KEY,$this->args) ? $this->args[self::REPORT_KEY] : "";
115  global $SysConf;
116  $fileBase = $SysConf['FOSSOLOGY']['path']."/ReportImport/";
117  $report = $fileBase.$reportPre;
118  if(empty($reportPre) || !is_readable($report))
119  {
120  echo "No report was uploaded\n";
121  echo "Maybe the permissions on ".htmlspecialchars($fileBase)." are not sufficient\n";
122  return false;
123  }
124 
125  $this->dbManager->insertTableRow('reportgen',
126  array('upload_fk'=>$uploadId, 'job_fk'=>$this->jobId, 'filepath'=>$report),
127  __METHOD__.'addToReportgen');
128 
129  $configuration = new ReportImportConfiguration($this->args);
130 
131  $this->walkAllFiles($report, $uploadId, $configuration);
132 
133  return true;
134  }
135 
136  private function getItemTreeBounds($upload_pk)
137  {
138  $uploadtreeTablename = GetUploadtreeTableName($upload_pk);
139 
140  $uploadtreeRec = $this->dbManager->getSingleRow(
141  'SELECT uploadtree_pk FROM uploadtree WHERE parent IS NULL AND upload_fk=$1',
142  array($upload_pk),
143  __METHOD__.'.find.uploadtree.to.use.in.browse.link');
144  $uploadtree_pk = $uploadtreeRec['uploadtree_pk'];
146  return $this->uploadDao->getItemTreeBounds($uploadtree_pk, $uploadtreeTablename);
147  }
148 
149  static private function getEntries($fileId, $fileName, &$pfilePerFileName, &$hashMap=NULL, &$pfilesPerHash=NULL, $hashAlgo="sha1")
150  {
151  $pfilesByFilename = self::getEntriesForFilename($fileName, $pfilePerFileName);
152 
153  if (($pfilesByFilename !== null || sizeof($pfilesByFilename) === 0))
154  {
155  if ( $hashMap !== null && sizeof($hashMap) > 0 )
156  {
157  $pfiles = array();
158  foreach ($pfilesByFilename as $pfile)
159  {
160  if (strtolower($pfile[$hashAlgo]) !== strtolower($hashMap[$hashAlgo]))
161  {
162  print "INFO: the file with fileName=[$fileName] does not match the hash of pfile_pk=[" . $pfile['pfile_pk'] . "] and uploadtree_pk=[" . $pfile['uploadtree_pk'] . "]\n";
163  }
164  else
165  {
166  $pfiles[] = $pfile;
167  }
168  }
169  return $pfiles;
170  }
171  else
172  {
173  return $pfilesByFilename;
174  }
175  }
176 
177  if ($pfilesPerHash !== null && sizeof($pfilesPerHash) > 0 &&
178  $hashMap !== null && sizeof($hashMap) > 0 )
179  {
180  return self::getEntriesForHash($hashMap, $pfilesPerHash, 'sha1');
181  }
182 
183  return array();
184  }
185 
186  static private function getEntriesForFilename($filename, &$pfilesPerFileName)
187  {
188  if(array_key_exists($filename, $pfilesPerFileName))
189  {
190  return array($pfilesPerFileName[$filename]);
191  }
192  $length = strlen($filename);
193  if($length > 3)
194  {
195  foreach(array_keys($pfilesPerFileName) as $key)
196  {
197  if(substr($key, -$length) === $filename)
198  {
199  return array($pfilesPerFileName[$key]);
200  }
201  }
202  }
203  return array();
204  }
205 
206  static private function getEntriesForHash(&$hashMap, &$pfilesPerHash, $hashAlgo)
207  {
208  if(!array_key_exists($hashAlgo, $hashMap))
209  {
210  return array();
211  }
212 
213  $hash = strtolower($hashMap[$hashAlgo]);
214  if(!array_key_exists($hash, $pfilesPerHash))
215  {
216  return array();
217  }
218  return $pfilesPerHash[$hash];
219  }
220 
226  private function getImportSource($reportFilename)
227  {
228 
229  if(substr($reportFilename, -4) === ".xml")
230  {
231  $importSource = new XmlImportSource($reportFilename);
232  if($importSource->parse())
233  {
234  return $importSource;
235  }
236  }
237 
238  if(substr($reportFilename, -4) === ".rdf")
239  {
240  $importSource = new SpdxTwoImportSource($reportFilename);
241  if($importSource->parse())
242  {
243  return $importSource;
244  }
245  }
246 
247  error_log("ERROR: can not handle report");
248  throw new \Exception("unsupported report type with filename: $reportFilename");
249  }
250 
251  public function walkAllFiles($reportFilename, $upload_pk, $configuration)
252  {
254  $source = $this->getImportSource($reportFilename);
255  if($source === NULL)
256  {
257  return;
258  }
259 
261  $sink = new ReportImportSink($this->agent_pk, $this->userDao, $this->licenseDao, $this->clearingDao, $this->copyrightDao,
262  $this->dbManager, $this->groupId, $this->userId, $this->jobId, $configuration);
263 
264  // Prepare data from DB
265  $itemTreeBounds = $this->getItemTreeBounds($upload_pk);
266  $pfilePerFileName = $this->uploadDao->getPFileDataPerFileName($itemTreeBounds);
267  $pfilesPerHash = $this->uploadDao->getPFilesDataPerHashAlgo($itemTreeBounds, 'sha1');
268 
269  foreach ($source->getAllFiles() as $fileId => $fileName)
270  {
271  $hashMap = NULL;
272  if ($pfilesPerHash !== NULL && sizeof($pfilesPerHash) > 0)
273  {
274  $hashMap = $source->getHashesMap($fileId);
275  }
276 
277  $pfiles = self::getEntries($fileId,
278  $fileName, $pfilePerFileName,
279  $hashMap, $pfilesPerHash, 'sha1');
280 
281  if ($pfiles === null || sizeof($pfiles) === 0)
282  {
283  print "WARN: no match for fileId=[".$fileId."] with filename=[".$fileName."]\n";
284  continue;
285  }
286 
287  $this->heartbeat(sizeof($pfiles));
288 
289  $data = $source->getDataForFile($fileId)
290  ->setPfiles($pfiles);
291  $sink->handleData($data);
292  }
293  }
294 }
int agent_pk
Definition: agent.h:74
Structure of an Agent with all required parameters.
Definition: Agent.php:41
heartbeat($newProcessed)
Send hear beat to the scheduler.
Definition: Agent.php:203
static preWorkOnArgsFlp(&$args, $longArgsKey)
processUploadId($uploadId)
Given an upload ID, process the items in it.
FUNCTION char * GetUploadtreeTableName(PGconn *pgConn, int upload_pk)
Get the uploadtree table name for this upload_pk If upload_pk does not exist, return "uploadtree".
Definition: libfossagent.c:414
int jobId
The id of the job.
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:16
list_t type structure used to keep various lists. (e.g. there are multiple lists).
Definition: nomos.h:308