FOSSology  4.7.0
Open Source License Compliance by Open Source Software
LicenseCsvImport.php
Go to the documentation of this file.
1 <?php
2 /*
3  SPDX-FileCopyrightText: © 2014-2015 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 
9 
15 
26 {
29  protected $dbManager;
32  protected $userDao;
35  protected $delimiter = ',';
38  protected $enclosure = '"';
41  protected $headrow = null;
44  protected $nkMap = array();
47  protected $mdkMap = array();
50  protected $alias = array(
51  'shortname'=>array('shortname','Short Name'),
52  'licensetype'=>array('licensetype','License Type'),
53  'fullname'=>array('fullname','Long Name'),
54  'spdx_id'=>array('spdx_id', 'SPDX ID'),
55  'text'=>array('text','Full Text'),
56  'parent_shortname'=>array('parent_shortname','Decider Short Name'),
57  'report_shortname'=>array('report_shortname','Regular License Text Short Name'),
58  'url'=>array('url','URL'),
59  'notes'=>array('notes'),
60  'source'=>array('source','Foreign ID'),
61  'risk'=>array('risk','risk_level'),
62  'group'=>array('group','License group'),
63  'obligations'=>array('obligations','License obligations'),
64  'external_id'=>array('id','LicenseDB Id'),
65  );
66 
69  protected $obligationMap;
70 
77  {
78  $this->dbManager = $dbManager;
79  $this->userDao = $userDao;
80  $this->obligationMap = $obligationMap;
81  }
86  public function setDelimiter($delimiter=',')
87  {
88  $this->delimiter = substr($delimiter,0,1);
89  }
90 
95  public function setEnclosure($enclosure='"')
96  {
97  $this->enclosure = substr($enclosure,0,1);
98  }
99 
106  public function handleFile($filename, $fileExtension)
107  {
108  if (!is_file($filename) || ($handle = fopen($filename, 'r')) === false) {
109  return _('Internal error');
110  }
111  $cnt = -1;
112  $msg = '';
113  try {
114  if ($fileExtension == 'csv') {
115  while (($row = fgetcsv($handle,0,$this->delimiter,$this->enclosure)) !== false) {
116  $log = $this->handleCsv($row);
117  if (!empty($log)) {
118  $msg .= "$log\n";
119  }
120  $cnt++;
121  }
122  $msg .= _('Read csv').(": $cnt ")._('licenses');
123  } else {
124  $jsonContent = fread($handle, filesize($filename));
125  $data = json_decode($jsonContent, true);
126  if ($data === null && json_last_error() !== JSON_ERROR_NONE) {
127  $msg .= "Error decoding JSON: " . json_last_error_msg() . "\n";
128  }
129  $msg = $this->importJsonData($data, $msg);
130  $msg .= _('Read json').(":". count($data) ." ")._('licenses');
131  }
132  } catch(\Exception $e) {
133  fclose($handle);
134  return $msg .= _('Error while parsing file').': '.$e->getMessage();
135  }
136  fclose($handle);
137  return $msg;
138  }
139 
146  function handleRowJson($row)
147  {
148  $defaultValues = array(
149  'parent_shortname' => null,
150  'report_shortname' => null,
151  'url' => '',
152  'notes' => '',
153  'source' => '',
154  'risk' => 0,
155  'group' => null,
156  'spdx_id' => null
157  );
158  $newArray = array();
159  foreach ($row as $key => $value) {
160  $newKey = $key;
161  foreach ($this->alias as $aliasKey => $aliasValues) {
162  if (in_array($key, $aliasValues)) {
163  $newKey = $aliasKey;
164  break;
165  }
166  }
167  $newArray[$newKey] = $value;
168  }
169  foreach ($defaultValues as $key => $defaultValue) {
170  if (!array_key_exists($key, $newArray)) {
171  $newArray[$key] = $defaultValue;
172  }
173  }
174  return $newArray;
175  }
176 
183  private function handleCsv($row)
184  {
185  if ($this->headrow === null) {
186  $this->headrow = $this->handleHeadCsv($row);
187  return 'head okay';
188  }
189 
190  $mRow = array();
191  foreach (array('shortname','fullname','text') as $needle) {
192  $mRow[$needle] = $row[$this->headrow[$needle]];
193  }
194  foreach (array('parent_shortname' => null, 'report_shortname' => null,
195  'url' => '', 'notes' => '', 'source' => '', 'risk' => 0,
196  'group' => null, 'spdx_id' => null, 'licensetype' => "Permisssive"
197  ) as $optNeedle=>$defaultValue) {
198  $mRow[$optNeedle] = $defaultValue;
199  if ($this->headrow[$optNeedle]!==false && array_key_exists($this->headrow[$optNeedle], $row)) {
200  $mRow[$optNeedle] = $row[$this->headrow[$optNeedle]];
201  }
202  }
203 
204  return $this->handleCsvLicense($mRow);
205  }
206 
213  private function handleJsonLicense($row)
214  {
215  $mRow = $this->handleRowJson($row);
216  foreach (array('parent_shortname' => null, 'report_shortname' => null,
217  'url' => '', 'notes' => '', 'source' => '', 'risk' => 0,
218  'group' => null, 'spdx_id' => null, 'licensetype' => 'Permisssive',
219  'obligation_ids' => array()
220  ) as $optNeedle => $defaultValue) {
221  if (!array_key_exists($optNeedle, $mRow)) {
222  $mRow[$optNeedle] = $defaultValue;
223  }
224  }
225 
226  return $this->handleLicenseDBLicenseImport($mRow);
227  }
228 
235  private function handleHeadCsv($row)
236  {
237  $headrow = array();
238  $row[0] = trim($row[0], "\xEF\xBB\xBF"); // Remove BOM
239  foreach (array('shortname','fullname','text') as $needle) {
240  $col = ArrayOperation::multiSearch($this->alias[$needle], $row);
241  if (false === $col) {
242  throw new \Exception("Undetermined position of $needle");
243  }
244  $headrow[$needle] = $col;
245  }
246  foreach (array('parent_shortname', 'report_shortname', 'url', 'notes',
247  'source', 'risk', 'group', 'spdx_id', 'licensetype') as $optNeedle) {
248  $headrow[$optNeedle] = ArrayOperation::multiSearch($this->alias[$optNeedle], $row);
249  }
250  return $headrow;
251  }
252 
259  private function updateLicense($row, $rfPk)
260  {
261  $stmt = __METHOD__ . '.getOldLicense';
262  $oldLicense = $this->dbManager->getSingleRow('SELECT ' .
263  'rf_shortname, rf_fullname, rf_spdx_id, rf_text, rf_url, rf_notes, rf_source, rf_risk, rf_licensetype ' .
264  'FROM license_ref WHERE rf_pk = $1', array($rfPk), $stmt);
265 
266  $log = "License '$row[shortname]' already exists in DB (id = $rfPk)";
267  $stmt = __METHOD__ . '.updateLicense';
268  $sql = "UPDATE license_ref SET ";
269  if (! empty($row['group'])) {
270  $sql = "UPDATE license_candidate SET ";
271  }
272  $extraParams = array();
273  $param = array($rfPk);
274  if (isset($row['fullname']) && $row['fullname'] != $oldLicense['rf_fullname']) {
275  $param[] = $row['fullname'];
276  $stmt .= '.fullN';
277  $extraParams[] = "rf_fullname=$" . count($param);
278  $log .= ", updated fullname";
279  }
280  // update shortname only if import from licensedb
281  if (isset($row['external_id']) && isset($row['shortname']) && $row['shortname'] !== $oldLicense['rf_shortname']) {
282  $param[] = $row['shortname'];
283  $stmt .= '.shortN';
284  $extraParams[] = "rf_shortname=$" . count($param);
285  $log .= ", updated shortname";
286  }
287  if (isset($row['spdx_id']) && $row['spdx_id'] != $oldLicense['rf_spdx_id']) {
288  $param[] = $row['spdx_id'];
289  $stmt .= '.spId';
290  $extraParams[] = "rf_spdx_id=$" . count($param);
291  $log .= ", updated SPDX ID";
292  }
293  if (isset($row['text']) && $row['text'] != $oldLicense['rf_text'] && $row['text'] != LicenseMap::TEXT_MAX_CHAR_LIMIT) {
294  $param[] = $row['text'];
295  $stmt .= '.text';
296  $extraParams[] = "rf_text=$" . count($param) . ",rf_md5=md5($" .
297  count($param) . ")";
298  $log .= ", updated text";
299  }
300  if (isset($row['url']) && $row['url'] != $oldLicense['rf_url']) {
301  $param[] = $row['url'];
302  $stmt .= '.url';
303  $extraParams[] = "rf_url=$" . count($param);
304  $log .= ", updated URL";
305  }
306  if (isset($row['notes']) && $row['notes'] != $oldLicense['rf_notes']) {
307  $param[] = $row['notes'];
308  $stmt .= '.notes';
309  $extraParams[] = "rf_notes=$" . count($param);
310  $log .= ", updated notes";
311  }
312  if (isset($row['source']) && $row['source'] != $oldLicense['rf_source']) {
313  $param[] = $row['source'];
314  $stmt .= '.updSource';
315  $extraParams[] = "rf_source=$".count($param);
316  $log .= ', updated the source';
317  }
318  if (isset($row['risk']) && $row['risk'] != $oldLicense['rf_risk']) {
319  $param[] = $row['risk'];
320  $stmt .= '.updRisk';
321  $extraParams[] = "rf_risk=$".count($param);
322  $log .= ', updated the risk level';
323  }
324  if (isset($row['licensetype']) && $row['licensetype'] != $oldLicense['rf_licensetype']) {
325  $param[] = $row['licensetype'];
326  $stmt .= '.types';
327  $extraParams[] = "rf_licensetype=$".count($param);
328  $log .= ', updated the licensetype';
329  }
330  if (count($param) > 1) {
331  $sql .= join(",", $extraParams);
332  $sql .= " WHERE rf_pk=$1;";
333  $this->dbManager->getSingleRow($sql, $param, $stmt);
334  if (!isset($row['external_id'])) {
335  $this->mdkMap[md5($row['text'])] = $rfPk;
336  }
337  }
338 
339  // edit license maps only in case of normal import
340  // In case of import from licensedb, the mapping info must come from licensedb
341  if ($row['external_id'] === null) {
342  $stmt = __METHOD__ . '.getOldMapping';
343  $sql = 'SELECT rf_parent FROM license_map WHERE rf_fk = $1 AND usage = $2;';
344  $oldParent = null;
345  $oldParentRow = $this->dbManager->getSingleRow($sql, array($rfPk,
346  LicenseMap::CONCLUSION), $stmt);
347  if (!empty($oldParentRow)) {
348  $oldParent = $oldParentRow['rf_parent'];
349  }
350  $oldReport = null;
351  $oldReportRow = $this->dbManager->getSingleRow($sql, array($rfPk,
352  LicenseMap::REPORT), $stmt);
353  if (!empty($oldReportRow)) {
354  $oldReport = $oldReportRow['rf_parent'];
355  }
356 
357  $newParent = null;
358  $newParent = ($row['parent_shortname'] == null) ? null :
359  $this->getKeyFromShortname($row['parent_shortname']);
360 
361  $newReport = null;
362  $newReport = ($row['report_shortname'] == null) ? null :
363  $this->getKeyFromShortname($row['report_shortname']);
364 
365  if (($oldParent != $newParent) && $this->setMap($newParent, $rfPk, LicenseMap::CONCLUSION)) {
366  $log .= " with conclusion '$row[parent_shortname]'";
367  }
368  if (($oldReport != $newReport) && $this->setMap($newReport, $rfPk, LicenseMap::REPORT)) {
369  $log .= " reporting '$row[report_shortname]'";
370  }
371  }
372  return $log;
373  }
374 
383  private function handleCsvLicense($row)
384  {
385  if (empty($row['risk'])) {
386  $row['risk'] = 0;
387  }
388  if (empty($row['external_id'])) {
389  $row['external_id'] = null;
390  }
391  $rfPk = $this->getKeyFromShortname($row['shortname'], $row['group']);
392  $md5Match = $this->getKeyFromMd5($row['text']);
393 
394  // If shortname exists, does not collide with other texts and is not
395  // candidate
396  if ($rfPk !== false) {
397  if (! empty($row['group']) || ($md5Match == $rfPk || $md5Match === false)) {
398  return $this->updateLicense($row, $rfPk);
399  } else {
400  return "Error: MD5 checksum of '" . $row['shortname'] .
401  "' collides with license id=$md5Match";
402  }
403  }
404  if ($md5Match !== false && empty($row['group'])) {
405  return "Error: MD5 checksum of '" . $row['shortname'] .
406  "' collides with license id=$md5Match";
407  }
408 
409  $return = "";
410  if (!empty($row['group'])) {
411  $return = $this->insertNewLicense($row, "license_candidate");
412  } else {
413  $return = $this->insertNewLicense($row, "license_ref");
414  }
415  return $return['log'];
416  }
417 
430  private function handleLicenseDBLicenseImport($row)
431  {
432  if (empty($row["external_id"])) {
433  return "Error: external_id cannot be empty";
434  }
435  $stmt = __METHOD__ . ".getExistingLicense";
436  $rfPk = $this->dbManager->getSingleRow("SELECT rf_pk FROM " .
437  "license_ref WHERE rf_external_id=$1", array($row["external_id"]), $stmt);
438  $log = '';
439  if (!empty($rfPk)) {
440  // update license
441  $log .= $this->updateLicense($row, $rfPk['rf_pk']);
442 
443  $this->dbManager->begin();
444  // fetch new obligation associations
445  $stmt = __METHOD__ . "getNewLicenseObligations";
446  $newObIds = array();
447  foreach ($row['obligation_ids'] as $obExternalId) {
448  $obPk = $this->dbManager->getSingleRow("SELECT ob_pk FROM obligation_ref WHERE ob_external_id = $1;",
449  array($obExternalId), $stmt);
450  if (!empty($obPk)) {
451  $newObIds[] = $obPk['ob_pk'];
452  } else {
453  $log .= \sprintf('obligation with ob_external_id %s not found', $obExternalId);
454  }
455  }
456 
457  // fetch old obligation associations
458  $stmt = __METHOD__ . "getOldObligationAssociations";
459  $oldObIdsDB = $this->dbManager->getRows("SELECT ob_fk FROM obligation_map WHERE rf_fk = $1", array($rfPk['rf_pk']), $stmt);
460  $oldObIds = array();
461  foreach ($oldObIdsDB as $obId) {
462  $oldObIds[] = $obId['ob_fk'];
463  }
464 
465  // create diff of obligations between current associated obligations and licensedb
466  // associated obligations
467  $diff = ArrayOperation::getArrayDiffs($oldObIds, $newObIds);
468 
469  // delete associations
470  foreach ($diff['remove'] as $obid) {
471  $this->obligationMap->unassociateLicenseFromObligation($obid, $rfPk['rf_pk']);
472  }
473 
474  // insert associations
475  foreach ($diff['add'] as $obid) {
476  $this->obligationMap->associateLicenseWithObligation($obid, $rfPk['rf_pk']);
477  }
478  $this->dbManager->commit();
479  } else {
480  $licRetVal = $this->insertNewLicense($row, "license_ref");
481  $log .= $licRetVal['log'];
482 
483  $this->dbManager->begin();
484  $newObIds = array();
485  $stmt = __METHOD__ . "getNewLicenseObligationsForInsert";
486  foreach ($row['obligation_ids'] as $obExternalId) {
487  $obPk = $this->dbManager->getSingleRow("SELECT ob_pk FROM obligation_ref WHERE ob_external_id = $1;",
488  array($obExternalId), $stmt);
489  if (!empty($obPk)) {
490  $newObIds[] = $obPk['ob_pk'];
491  } else {
492  $log .= \sprintf('obligation with ob_external_id %s not found', $obExternalId);
493  }
494  }
495 
496  // insert associations
497  foreach ($newObIds as $obid) {
498  $this->obligationMap->associateLicenseWithObligation($obid, $licRetVal['pkey']);
499  }
500  $this->dbManager->commit();
501  }
502 
503  return $log;
504  }
505 
517  private function insertMapIfNontrivial($fromName,$toName,$usage)
518  {
519  $isNontrivial = ($fromName!==null && $fromName!=$toName && $this->getKeyFromShortname($fromName)!==false);
520  if ($isNontrivial) {
521  $this->dbManager->insertTableRow('license_map',
522  array('rf_fk'=>$this->getKeyFromShortname($toName),
523  'rf_parent'=>$this->getKeyFromShortname($fromName),
524  'usage'=> $usage));
525  }
526  return $isNontrivial;
527  }
528 
534  private function getKeyFromShortname($shortname, $groupFk = null)
535  {
536  $keyName = $shortname;
537  $tableName = "license_ref";
538  $addCondition = "AND rf_external_id IS NULL";
539  $statement = __METHOD__ . ".getId";
540  $params = array($shortname);
541 
542  if ($groupFk != null) {
543  $keyName .= $groupFk;
544  $tableName = "license_candidate";
545  $addCondition = "AND group_fk = $2";
546  $statement .= ".candidate";
547  $params[] = $this->userDao->getGroupIdByName($groupFk);
548  }
549  $sql = "SELECT rf_pk FROM ONLY $tableName WHERE rf_shortname = $1 $addCondition;";
550 
551  if (array_key_exists($keyName, $this->nkMap)) {
552  return $this->nkMap[$keyName];
553  }
554  $row = $this->dbManager->getSingleRow($sql, $params, $statement);
555  $this->nkMap[$keyName] = ($row===false) ? false : $row['rf_pk'];
556  return $this->nkMap[$keyName];
557  }
558 
564  private function getKeyFromMd5($licenseText)
565  {
566  $md5 = md5($licenseText);
567  if (array_key_exists($md5, $this->mdkMap)) {
568  return $this->mdkMap[$md5];
569  }
570  $row = $this->dbManager->getSingleRow("SELECT rf_pk " .
571  "FROM ONLY license_ref WHERE rf_md5=md5($1) AND rf_external_id IS NULL",
572  array($licenseText));
573  $this->mdkMap[$md5] = (empty($row)) ? false : $row['rf_pk'];
574  return $this->mdkMap[$md5];
575  }
576 
587  private function setMap($from, $to, $usage)
588  {
589  $return = false;
590  if (!empty($from)) {
591  $sql = "SELECT license_map_pk, rf_parent FROM license_map WHERE rf_fk = $1 AND usage = $2;";
592  $statement = __METHOD__ . ".getCurrentMapping";
593  $row = $this->dbManager->getSingleRow($sql, array($to, $usage), $statement);
594  if (!empty($row) && $row['rf_parent'] != $from) {
595  $this->dbManager->updateTableRow("license_map", array(
596  'rf_fk' => $to,
597  'rf_parent' => $from,
598  'usage' => $usage
599  ), 'license_map_pk', $row['license_map_pk']);
600  $return = true;
601  } elseif (empty($row)) {
602  $this->dbManager->insertTableRow('license_map', array(
603  'rf_fk' => $to,
604  'rf_parent' => $from,
605  'usage' => $usage
606  ));
607  $return = true;
608  }
609  }
610  return $return;
611  }
612 
622  private function insertNewLicense($row, $tableName = "license_ref")
623  {
624  $stmtInsert = __METHOD__ . '.insert.' . $tableName;
625  $columns = array(
626  "rf_shortname" => $row['shortname'],
627  "rf_licensetype" => $row['licensetype'],
628  "rf_fullname" => $row['fullname'],
629  "rf_spdx_id" => $row['spdx_id'],
630  "rf_text" => $row['text'],
631  "rf_md5" => md5($row['text']),
632  "rf_detector_type" => 1,
633  "rf_url" => $row['url'],
634  "rf_notes" => $row['notes'],
635  "rf_source" => $row['source'],
636  "rf_risk" => $row['risk'],
637  "rf_external_id" => $row['external_id']
638  );
639 
640  $as = "";
641  if ($tableName == "license_candidate") {
642  $groupId = $this->userDao->getGroupIdByName($row['group']);
643  if (empty($groupId)) {
644  return "Error: Unable to insert candidate license " . $row['shortname'] .
645  " as group " . $row['group'] . " does not exist";
646  }
647  $columns["group_fk"] = $groupId;
648  $columns["marydone"] = $this->dbManager->booleanToDb(true);
649  $as = " as candidate license under group " . $row["group"];
650  }
651 
652  $newPk = $this->dbManager->insertTableRow($tableName, $columns, $stmtInsert, 'rf_pk');
653 
654  // populate license maps and cache only when the import is not from licensedb
655  $log = "Inserted '$row[shortname]' in DB" . $as;
656  if ($row['external_id'] === null) {
657  if ($tableName == "license_candidate") {
658  $this->nkMap[$row['shortname'].$row['group']] = $newPk;
659  } else {
660  $this->nkMap[$row['shortname']] = $newPk;
661  }
662  $this->mdkMap[md5($row['text'])] = $newPk;
663 
664  if ($this->insertMapIfNontrivial($row['parent_shortname'], $row['shortname'], LicenseMap::CONCLUSION)) {
665  $log .= " with conclusion '$row[parent_shortname]'";
666  }
667  if ($this->insertMapIfNontrivial($row['report_shortname'], $row['shortname'], LicenseMap::REPORT)) {
668  $log .= " reporting '$row[report_shortname]'";
669  }
670  }
671 
672  $return = array();
673  $return['log'] = $log;
674  $return['pkey'] = $newPk;
675  return $return;
676  }
677 
683  public function importJsonData($data, string $msg): string
684  {
685  foreach ($data as $row) {
686  $log = $this->handleJsonLicense($row);
687  if (!empty($log)) {
688  $msg .= "$log\n";
689  }
690  }
691  return $msg;
692  }
693 }
insertMapIfNontrivial($fromName, $toName, $usage)
Insert in license_map table if the license conclusion is non-trivial.
handleFile($filename, $fileExtension)
Read the CSV line by line and import it.
handleCsvLicense($row)
Handle a single row from CSV.
updateLicense($row, $rfPk)
Update the license info in the DB.
handleLicenseDBLicenseImport($row)
Handle a single row from csv import form LicenseDB.
setEnclosure($enclosure='"')
Update the enclosure.
getKeyFromShortname($shortname, $groupFk=null)
Get the license id using license shortname from DB or nkMap.
__construct(DbManager $dbManager, UserDao $userDao, $obligationMap=null)
handleHeadCsv($row)
Handle a row as head row.
insertNewLicense($row, $tableName="license_ref")
Insert a new license in DB.
setDelimiter($delimiter=',')
Update the delimiter.
setMap($from, $to, $usage)
Update license mappings.
Wrapper class for license map.
Definition: LicenseMap.php:19
Wrapper class for obligation map.
Fossology exception.
Definition: Exception.php:15
static getArrayDiffs(array $oldArray, array $newArray)
Get list of additions/removals to make $oldArray equal to $newArray.
char * trim(char *ptext)
Trimming whitespace.
Definition: fossconfig.c:690
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:16
Utility functions for specific applications.