FOSSology  4.5.0-rc1
Open Source License Compliance by Open Source Software
LicenseCsvImport.php
Go to the documentation of this file.
1 <?php
2 /*
3  SPDX-FileCopyrightText: © 2014-2015 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 
9 
14 
25 {
28  protected $dbManager;
31  protected $userDao;
34  protected $delimiter = ',';
37  protected $enclosure = '"';
40  protected $headrow = null;
43  protected $nkMap = array();
46  protected $mdkMap = array();
49  protected $alias = array(
50  'shortname'=>array('shortname','Short Name'),
51  'licensetype'=>array('licensetype','License Type'),
52  'fullname'=>array('fullname','Long Name'),
53  'spdx_id'=>array('spdx_id', 'SPDX ID'),
54  'text'=>array('text','Full Text'),
55  'parent_shortname'=>array('parent_shortname','Decider Short Name'),
56  'report_shortname'=>array('report_shortname','Regular License Text Short Name'),
57  'url'=>array('url','URL'),
58  'notes'=>array('notes'),
59  'source'=>array('source','Foreign ID'),
60  'risk'=>array('risk','risk_level'),
61  'group'=>array('group','License group'),
62  'obligations'=>array('obligations','License obligations')
63  );
64 
71  {
72  $this->dbManager = $dbManager;
73  $this->userDao = $userDao;
74  }
75 
80  public function setDelimiter($delimiter=',')
81  {
82  $this->delimiter = substr($delimiter,0,1);
83  }
84 
89  public function setEnclosure($enclosure='"')
90  {
91  $this->enclosure = substr($enclosure,0,1);
92  }
93 
100  public function handleFile($filename, $fileExtension)
101  {
102  if (!is_file($filename) || ($handle = fopen($filename, 'r')) === false) {
103  return _('Internal error');
104  }
105  $cnt = -1;
106  $msg = '';
107  try {
108  if ($fileExtension == 'csv') {
109  while (($row = fgetcsv($handle,0,$this->delimiter,$this->enclosure)) !== false) {
110  $log = $this->handleCsv($row);
111  if (!empty($log)) {
112  $msg .= "$log\n";
113  }
114  $cnt++;
115  }
116  $msg .= _('Read csv').(": $cnt ")._('licenses');
117  } else {
118  $jsonContent = fread($handle, filesize($filename));
119  $data = json_decode($jsonContent, true);
120  if ($data === null && json_last_error() !== JSON_ERROR_NONE) {
121  $msg .= "Error decoding JSON: " . json_last_error_msg() . "\n";
122  }
123  $msg = $this->importJsonData($data, $msg);
124  $msg .= _('Read json').(":". count($data) ." ")._('licenses');
125  }
126  } catch(\Exception $e) {
127  fclose($handle);
128  return $msg .= _('Error while parsing file').': '.$e->getMessage();
129  }
130  fclose($handle);
131  return $msg;
132  }
133 
140  function handleRowJson($row)
141  {
142  $defaultValues = array(
143  'parent_shortname' => null,
144  'report_shortname' => null,
145  'url' => '',
146  'notes' => '',
147  'source' => '',
148  'risk' => 0,
149  'group' => null,
150  'spdx_id' => null
151  );
152  $newArray = array();
153  foreach ($row as $key => $value) {
154  $newKey = $key;
155  foreach ($this->alias as $aliasKey => $aliasValues) {
156  if (in_array($key, $aliasValues)) {
157  $newKey = $aliasKey;
158  break;
159  }
160  }
161  $newArray[$newKey] = $value;
162  }
163  foreach ($defaultValues as $key => $defaultValue) {
164  if (!array_key_exists($key, $newArray)) {
165  $newArray[$key] = $defaultValue;
166  }
167  }
168  return $newArray;
169  }
170 
177  private function handleCsv($row)
178  {
179  if ($this->headrow === null) {
180  $this->headrow = $this->handleHeadCsv($row);
181  return 'head okay';
182  }
183 
184  $mRow = array();
185  foreach (array('shortname','fullname','text') as $needle) {
186  $mRow[$needle] = $row[$this->headrow[$needle]];
187  }
188  foreach (array('parent_shortname' => null, 'report_shortname' => null,
189  'url' => '', 'notes' => '', 'source' => '', 'risk' => 0,
190  'group' => null, 'spdx_id' => null, 'licensetype' => "Permisssive"
191  ) as $optNeedle=>$defaultValue) {
192  $mRow[$optNeedle] = $defaultValue;
193  if ($this->headrow[$optNeedle]!==false && array_key_exists($this->headrow[$optNeedle], $row)) {
194  $mRow[$optNeedle] = $row[$this->headrow[$optNeedle]];
195  }
196  }
197 
198  return $this->handleCsvLicense($mRow);
199  }
200 
207  private function handleHeadCsv($row)
208  {
209  $headrow = array();
210  $row[0] = trim($row[0], "\xEF\xBB\xBF"); // Remove BOM
211  foreach (array('shortname','fullname','text') as $needle) {
212  $col = ArrayOperation::multiSearch($this->alias[$needle], $row);
213  if (false === $col) {
214  throw new \Exception("Undetermined position of $needle");
215  }
216  $headrow[$needle] = $col;
217  }
218  foreach (array('parent_shortname', 'report_shortname', 'url', 'notes',
219  'source', 'risk', 'group', 'spdx_id', 'licensetype') as $optNeedle) {
220  $headrow[$optNeedle] = ArrayOperation::multiSearch($this->alias[$optNeedle], $row);
221  }
222  return $headrow;
223  }
224 
231  private function updateLicense($row, $rfPk)
232  {
233  $stmt = __METHOD__ . '.getOldLicense';
234  $oldLicense = $this->dbManager->getSingleRow('SELECT ' .
235  'rf_shortname, rf_fullname, rf_spdx_id, rf_text, rf_url, rf_notes, rf_source, rf_risk, rf_licensetype ' .
236  'FROM license_ref WHERE rf_pk = $1', array($rfPk), $stmt);
237 
238  $stmt = __METHOD__ . '.getOldMapping';
239  $sql = 'SELECT rf_parent FROM license_map WHERE rf_fk = $1 AND usage = $2;';
240  $oldParent = null;
241  $oldParentRow = $this->dbManager->getSingleRow($sql, array($rfPk,
242  LicenseMap::CONCLUSION), $stmt);
243  if (!empty($oldParentRow)) {
244  $oldParent = $oldParentRow['rf_parent'];
245  }
246  $oldReport = null;
247  $oldReportRow = $this->dbManager->getSingleRow($sql, array($rfPk,
248  LicenseMap::REPORT), $stmt);
249  if (!empty($oldReportRow)) {
250  $oldReport = $oldReportRow['rf_parent'];
251  }
252 
253  $newParent = null;
254  $newParent = ($row['parent_shortname'] == null) ? null :
255  $this->getKeyFromShortname($row['parent_shortname']);
256 
257  $newReport = null;
258  $newReport = ($row['report_shortname'] == null) ? null :
259  $this->getKeyFromShortname($row['report_shortname']);
260 
261  $log = "License '$row[shortname]' already exists in DB (id = $rfPk)";
262  $stmt = __METHOD__ . '.updateLicense';
263  $sql = "UPDATE license_ref SET ";
264  if (! empty($row['group'])) {
265  $sql = "UPDATE license_candidate SET ";
266  }
267  $extraParams = array();
268  $param = array($rfPk);
269  if (!empty($row['fullname']) && $row['fullname'] != $oldLicense['rf_fullname']) {
270  $param[] = $row['fullname'];
271  $stmt .= '.fullN';
272  $extraParams[] = "rf_fullname=$" . count($param);
273  $log .= ", updated fullname";
274  }
275  if (!empty($row['spdx_id']) && $row['spdx_id'] != $oldLicense['rf_spdx_id']) {
276  $param[] = $row['spdx_id'];
277  $stmt .= '.spId';
278  $extraParams[] = "rf_spdx_id=$" . count($param);
279  $log .= ", updated SPDX ID";
280  }
281  if (!empty($row['text']) && $row['text'] != $oldLicense['rf_text'] && $row['text'] != LicenseMap::TEXT_MAX_CHAR_LIMIT) {
282  $param[] = $row['text'];
283  $stmt .= '.text';
284  $extraParams[] = "rf_text=$" . count($param) . ",rf_md5=md5($" .
285  count($param) . ")";
286  $log .= ", updated text";
287  }
288  if (!empty($row['url']) && $row['url'] != $oldLicense['rf_url']) {
289  $param[] = $row['url'];
290  $stmt .= '.url';
291  $extraParams[] = "rf_url=$" . count($param);
292  $log .= ", updated URL";
293  }
294  if (!empty($row['notes']) && $row['notes'] != $oldLicense['rf_notes']) {
295  $param[] = $row['notes'];
296  $stmt .= '.notes';
297  $extraParams[] = "rf_notes=$" . count($param);
298  $log .= ", updated notes";
299  }
300  if (!empty($row['source']) && $row['source'] != $oldLicense['rf_source']) {
301  $param[] = $row['source'];
302  $stmt .= '.updSource';
303  $extraParams[] = "rf_source=$".count($param);
304  $log .= ', updated the source';
305  }
306  if (!empty($row['risk']) && $row['risk'] != $oldLicense['rf_risk']) {
307  $param[] = $row['risk'];
308  $stmt .= '.updRisk';
309  $extraParams[] = "rf_risk=$".count($param);
310  $log .= ', updated the risk level';
311  }
312  if (!empty($row['licensetype']) && $row['licensetype'] != $oldLicense['rf_licensetype']) {
313  $param[] = $row['licensetype'];
314  $stmt .= '.types';
315  $extraParams[] = "rf_licensetype=$".count($param);
316  $log .= ', updated the licensetype';
317  }
318  if (count($param) > 1) {
319  $sql .= join(",", $extraParams);
320  $sql .= " WHERE rf_pk=$1;";
321  $this->dbManager->getSingleRow($sql, $param, $stmt);
322  $this->mdkMap[md5($row['text'])] = $rfPk;
323  }
324 
325  if (($oldParent != $newParent) && $this->setMap($newParent, $rfPk, LicenseMap::CONCLUSION)) {
326  $log .= " with conclusion '$row[parent_shortname]'";
327  }
328  if (($oldReport != $newReport) && $this->setMap($newReport, $rfPk, LicenseMap::REPORT)) {
329  $log .= " reporting '$row[report_shortname]'";
330  }
331  return $log;
332  }
333 
342  private function handleCsvLicense($row)
343  {
344  if (empty($row['risk'])) {
345  $row['risk'] = 0;
346  }
347  $rfPk = $this->getKeyFromShortname($row['shortname'], $row['group']);
348  $md5Match = $this->getKeyFromMd5($row['text']);
349 
350  // If shortname exists, does not collide with other texts and is not
351  // candidate
352  if ($rfPk !== false) {
353  if (! empty($row['group']) || ($md5Match == $rfPk || $md5Match === false)) {
354  return $this->updateLicense($row, $rfPk);
355  } else {
356  return "Error: MD5 checksum of '" . $row['shortname'] .
357  "' collides with license id=$md5Match";
358  }
359  }
360  if ($md5Match !== false && empty($row['group'])) {
361  return "Error: MD5 checksum of '" . $row['shortname'] .
362  "' collides with license id=$md5Match";
363  }
364 
365  $return = "";
366  if (!empty($row['group'])) {
367  $return = $this->insertNewLicense($row, "license_candidate");
368  } else {
369  $return = $this->insertNewLicense($row, "license_ref");
370  }
371  return $return;
372  }
373 
385  private function insertMapIfNontrivial($fromName,$toName,$usage)
386  {
387  $isNontrivial = ($fromName!==null && $fromName!=$toName && $this->getKeyFromShortname($fromName)!==false);
388  if ($isNontrivial) {
389  $this->dbManager->insertTableRow('license_map',
390  array('rf_fk'=>$this->getKeyFromShortname($toName),
391  'rf_parent'=>$this->getKeyFromShortname($fromName),
392  'usage'=> $usage));
393  }
394  return $isNontrivial;
395  }
396 
402  private function getKeyFromShortname($shortname, $groupFk = null)
403  {
404  $keyName = $shortname;
405  $tableName = "license_ref";
406  $addCondition = "";
407  $statement = __METHOD__ . ".getId";
408  $params = array($shortname);
409 
410  if ($groupFk != null) {
411  $keyName .= $groupFk;
412  $tableName = "license_candidate";
413  $addCondition = "AND group_fk = $2";
414  $statement .= ".candidate";
415  $params[] = $this->userDao->getGroupIdByName($groupFk);
416  }
417  $sql = "SELECT rf_pk FROM ONLY $tableName WHERE rf_shortname = $1 $addCondition;";
418  if (array_key_exists($keyName, $this->nkMap)) {
419  return $this->nkMap[$keyName];
420  }
421  $row = $this->dbManager->getSingleRow($sql, $params, $statement);
422  $this->nkMap[$keyName] = ($row===false) ? false : $row['rf_pk'];
423  return $this->nkMap[$keyName];
424  }
425 
431  private function getKeyFromMd5($licenseText)
432  {
433  $md5 = md5($licenseText);
434  if (array_key_exists($md5, $this->mdkMap)) {
435  return $this->mdkMap[$md5];
436  }
437  $row = $this->dbManager->getSingleRow("SELECT rf_pk " .
438  "FROM ONLY license_ref WHERE rf_md5=md5($1)",
439  array($licenseText));
440  $this->mdkMap[$md5] = (empty($row)) ? false : $row['rf_pk'];
441  return $this->mdkMap[$md5];
442  }
443 
454  private function setMap($from, $to, $usage)
455  {
456  $return = false;
457  if (!empty($from)) {
458  $sql = "SELECT license_map_pk, rf_parent FROM license_map WHERE rf_fk = $1 AND usage = $2;";
459  $statement = __METHOD__ . ".getCurrentMapping";
460  $row = $this->dbManager->getSingleRow($sql, array($to, $usage), $statement);
461  if (!empty($row) && $row['rf_parent'] != $from) {
462  $this->dbManager->updateTableRow("license_map", array(
463  'rf_fk' => $to,
464  'rf_parent' => $from,
465  'usage' => $usage
466  ), 'license_map_pk', $row['license_map_pk']);
467  $return = true;
468  } elseif (empty($row)) {
469  $this->dbManager->insertTableRow('license_map', array(
470  'rf_fk' => $to,
471  'rf_parent' => $from,
472  'usage' => $usage
473  ));
474  $return = true;
475  }
476  }
477  return $return;
478  }
479 
489  private function insertNewLicense($row, $tableName = "license_ref")
490  {
491  $stmtInsert = __METHOD__ . '.insert.' . $tableName;
492  $columns = array(
493  "rf_shortname" => $row['shortname'],
494  "rf_licensetype" => $row['licensetype'],
495  "rf_fullname" => $row['fullname'],
496  "rf_spdx_id" => $row['spdx_id'],
497  "rf_text" => $row['text'],
498  "rf_md5" => md5($row['text']),
499  "rf_detector_type" => 1,
500  "rf_url" => $row['url'],
501  "rf_notes" => $row['notes'],
502  "rf_source" => $row['source'],
503  "rf_risk" => $row['risk']
504  );
505 
506  $as = "";
507  if ($tableName == "license_candidate") {
508  $groupId = $this->userDao->getGroupIdByName($row['group']);
509  if (empty($groupId)) {
510  return "Error: Unable to insert candidate license " . $row['shortname'] .
511  " as group " . $row['group'] . " does not exist";
512  }
513  $columns["group_fk"] = $groupId;
514  $columns["marydone"] = $this->dbManager->booleanToDb(true);
515  $as = " as candidate license under group " . $row["group"];
516  }
517 
518  $newPk = $this->dbManager->insertTableRow($tableName, $columns, $stmtInsert, 'rf_pk');
519 
520  if ($tableName == "license_candidate") {
521  $this->nkMap[$row['shortname'].$row['group']] = $newPk;
522  } else {
523  $this->nkMap[$row['shortname']] = $newPk;
524  }
525  $this->mdkMap[md5($row['text'])] = $newPk;
526  $return = "Inserted '$row[shortname]' in DB" . $as;
527 
528  if ($this->insertMapIfNontrivial($row['parent_shortname'], $row['shortname'], LicenseMap::CONCLUSION)) {
529  $return .= " with conclusion '$row[parent_shortname]'";
530  }
531  if ($this->insertMapIfNontrivial($row['report_shortname'], $row['shortname'], LicenseMap::REPORT)) {
532  $return .= " reporting '$row[report_shortname]'";
533  }
534  return $return;
535  }
536 
542  public function importJsonData($data, string $msg): string
543  {
544  foreach ($data as $row) {
545  $log = $this->handleCsvLicense($this->handleRowJson($row));
546  if (!empty($log)) {
547  $msg .= "$log\n";
548  }
549  }
550  return $msg;
551  }
552 }
insertMapIfNontrivial($fromName, $toName, $usage)
Insert in license_map table if the license conclusion is non-trivial.
handleFile($filename, $fileExtension)
Read the CSV line by line and import it.
handleCsvLicense($row)
Handle a single row from CSV.
updateLicense($row, $rfPk)
Update the license info in the DB.
__construct(DbManager $dbManager, UserDao $userDao)
setEnclosure($enclosure='"')
Update the enclosure.
getKeyFromShortname($shortname, $groupFk=null)
Get the license id using license shortname from DB or nkMap.
handleHeadCsv($row)
Handle a row as head row.
insertNewLicense($row, $tableName="license_ref")
Insert a new license in DB.
setDelimiter($delimiter=',')
Update the delimiter.
setMap($from, $to, $usage)
Update license mappings.
Wrapper class for license map.
Definition: LicenseMap.php:19
Fossology exception.
Definition: Exception.php:15
char * trim(char *ptext)
Trimming whitespace.
Definition: fossconfig.c:690
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:16
Utility functions for specific applications.