FOSSology  4.4.0
Open Source License Compliance by Open Source Software
LicenseCsvImport.php
Go to the documentation of this file.
1 <?php
2 /*
3  SPDX-FileCopyrightText: © 2014-2015 Siemens AG
4 
5  SPDX-License-Identifier: GPL-2.0-only
6 */
7 
9 
14 
25 {
28  protected $dbManager;
31  protected $userDao;
34  protected $delimiter = ',';
37  protected $enclosure = '"';
40  protected $headrow = null;
43  protected $nkMap = array();
46  protected $mdkMap = array();
49  protected $alias = array(
50  'shortname'=>array('shortname','Short Name'),
51  'licensetype'=>array('licensetype','License Type'),
52  'fullname'=>array('fullname','Long Name'),
53  'spdx_id'=>array('spdx_id', 'SPDX ID'),
54  'text'=>array('text','Full Text'),
55  'parent_shortname'=>array('parent_shortname','Decider Short Name'),
56  'report_shortname'=>array('report_shortname','Regular License Text Short Name'),
57  'url'=>array('url','URL'),
58  'notes'=>array('notes'),
59  'source'=>array('source','Foreign ID'),
60  'risk'=>array('risk','risk_level'),
61  'group'=>array('group','License group'),
62  'obligations'=>array('obligations','License obligations')
63  );
64 
71  {
72  $this->dbManager = $dbManager;
73  $this->userDao = $userDao;
74  }
75 
80  public function setDelimiter($delimiter=',')
81  {
82  $this->delimiter = substr($delimiter,0,1);
83  }
84 
89  public function setEnclosure($enclosure='"')
90  {
91  $this->enclosure = substr($enclosure,0,1);
92  }
93 
100  public function handleFile($filename, $fileExtension)
101  {
102  if (!is_file($filename) || ($handle = fopen($filename, 'r')) === false) {
103  return _('Internal error');
104  }
105  $cnt = -1;
106  $msg = '';
107  try {
108  if ($fileExtension == 'csv') {
109  while (($row = fgetcsv($handle,0,$this->delimiter,$this->enclosure)) !== false) {
110  $log = $this->handleCsv($row);
111  if (!empty($log)) {
112  $msg .= "$log\n";
113  }
114  $cnt++;
115  }
116  $msg .= _('Read csv').(": $cnt ")._('licenses');
117  } else {
118  $jsonContent = fread($handle, filesize($filename));
119  $data = json_decode($jsonContent, true);
120  if ($data === null && json_last_error() !== JSON_ERROR_NONE) {
121  $msg .= "Error decoding JSON: " . json_last_error_msg()."\n";
122  }
123  foreach ($data as $row) {
124  $log = $this->handleCsvLicense($this->handleRowJson($row));
125  if (!empty($log)) {
126  $msg .= "$log\n";
127  }
128  }
129  $msg .= _('Read json').(":". count($data) ." ")._('licenses');
130  }
131  } catch(\Exception $e) {
132  fclose($handle);
133  return $msg .= _('Error while parsing file').': '.$e->getMessage();
134  }
135  fclose($handle);
136  return $msg;
137  }
138 
145  function handleRowJson($row)
146  {
147  $defaultValues = array(
148  'parent_shortname' => null,
149  'report_shortname' => null,
150  'url' => '',
151  'notes' => '',
152  'source' => '',
153  'risk' => 0,
154  'group' => null,
155  'spdx_id' => null
156  );
157  $newArray = array();
158  foreach ($row as $key => $value) {
159  $newKey = $key;
160  foreach ($this->alias as $aliasKey => $aliasValues) {
161  if (in_array($key, $aliasValues)) {
162  $newKey = $aliasKey;
163  break;
164  }
165  }
166  $newArray[$newKey] = $value;
167  }
168  foreach ($defaultValues as $key => $defaultValue) {
169  if (!array_key_exists($key, $newArray)) {
170  $newArray[$key] = $defaultValue;
171  }
172  }
173  return $newArray;
174  }
175 
182  private function handleCsv($row)
183  {
184  if ($this->headrow === null) {
185  $this->headrow = $this->handleHeadCsv($row);
186  return 'head okay';
187  }
188 
189  $mRow = array();
190  foreach (array('shortname','fullname','text') as $needle) {
191  $mRow[$needle] = $row[$this->headrow[$needle]];
192  }
193  foreach (array('parent_shortname' => null, 'report_shortname' => null,
194  'url' => '', 'notes' => '', 'source' => '', 'risk' => 0,
195  'group' => null, 'spdx_id' => null, 'licensetype' => "Permisssive"
196  ) as $optNeedle=>$defaultValue) {
197  $mRow[$optNeedle] = $defaultValue;
198  if ($this->headrow[$optNeedle]!==false && array_key_exists($this->headrow[$optNeedle], $row)) {
199  $mRow[$optNeedle] = $row[$this->headrow[$optNeedle]];
200  }
201  }
202 
203  return $this->handleCsvLicense($mRow);
204  }
205 
212  private function handleHeadCsv($row)
213  {
214  $headrow = array();
215  $row[0] = trim($row[0], "\xEF\xBB\xBF"); // Remove BOM
216  foreach (array('shortname','fullname','text') as $needle) {
217  $col = ArrayOperation::multiSearch($this->alias[$needle], $row);
218  if (false === $col) {
219  throw new \Exception("Undetermined position of $needle");
220  }
221  $headrow[$needle] = $col;
222  }
223  foreach (array('parent_shortname', 'report_shortname', 'url', 'notes',
224  'source', 'risk', 'group', 'spdx_id', 'licensetype') as $optNeedle) {
225  $headrow[$optNeedle] = ArrayOperation::multiSearch($this->alias[$optNeedle], $row);
226  }
227  return $headrow;
228  }
229 
236  private function updateLicense($row, $rfPk)
237  {
238  $stmt = __METHOD__ . '.getOldLicense';
239  $oldLicense = $this->dbManager->getSingleRow('SELECT ' .
240  'rf_shortname, rf_fullname, rf_spdx_id, rf_text, rf_url, rf_notes, rf_source, rf_risk, rf_licensetype ' .
241  'FROM license_ref WHERE rf_pk = $1', array($rfPk), $stmt);
242 
243  $stmt = __METHOD__ . '.getOldMapping';
244  $sql = 'SELECT rf_parent FROM license_map WHERE rf_fk = $1 AND usage = $2;';
245  $oldParent = null;
246  $oldParentRow = $this->dbManager->getSingleRow($sql, array($rfPk,
247  LicenseMap::CONCLUSION), $stmt);
248  if (!empty($oldParentRow)) {
249  $oldParent = $oldParentRow['rf_parent'];
250  }
251  $oldReport = null;
252  $oldReportRow = $this->dbManager->getSingleRow($sql, array($rfPk,
253  LicenseMap::REPORT), $stmt);
254  if (!empty($oldReportRow)) {
255  $oldReport = $oldReportRow['rf_parent'];
256  }
257 
258  $newParent = null;
259  $newParent = ($row['parent_shortname'] == null) ? null :
260  $this->getKeyFromShortname($row['parent_shortname']);
261 
262  $newReport = null;
263  $newReport = ($row['report_shortname'] == null) ? null :
264  $this->getKeyFromShortname($row['report_shortname']);
265 
266  $log = "License '$row[shortname]' already exists in DB (id = $rfPk)";
267  $stmt = __METHOD__ . '.updateLicense';
268  $sql = "UPDATE license_ref SET ";
269  if (! empty($row['group'])) {
270  $sql = "UPDATE license_candidate SET ";
271  }
272  $extraParams = array();
273  $param = array($rfPk);
274  if (!empty($row['fullname']) && $row['fullname'] != $oldLicense['rf_fullname']) {
275  $param[] = $row['fullname'];
276  $stmt .= '.fullN';
277  $extraParams[] = "rf_fullname=$" . count($param);
278  $log .= ", updated fullname";
279  }
280  if (!empty($row['spdx_id']) && $row['spdx_id'] != $oldLicense['rf_spdx_id']) {
281  $param[] = $row['spdx_id'];
282  $stmt .= '.spId';
283  $extraParams[] = "rf_spdx_id=$" . count($param);
284  $log .= ", updated SPDX ID";
285  }
286  if (!empty($row['text']) && $row['text'] != $oldLicense['rf_text'] && $row['text'] != LicenseMap::TEXT_MAX_CHAR_LIMIT) {
287  $param[] = $row['text'];
288  $stmt .= '.text';
289  $extraParams[] = "rf_text=$" . count($param) . ",rf_md5=md5($" .
290  count($param) . ")";
291  $log .= ", updated text";
292  }
293  if (!empty($row['url']) && $row['url'] != $oldLicense['rf_url']) {
294  $param[] = $row['url'];
295  $stmt .= '.url';
296  $extraParams[] = "rf_url=$" . count($param);
297  $log .= ", updated URL";
298  }
299  if (!empty($row['notes']) && $row['notes'] != $oldLicense['rf_notes']) {
300  $param[] = $row['notes'];
301  $stmt .= '.notes';
302  $extraParams[] = "rf_notes=$" . count($param);
303  $log .= ", updated notes";
304  }
305  if (!empty($row['source']) && $row['source'] != $oldLicense['rf_source']) {
306  $param[] = $row['source'];
307  $stmt .= '.updSource';
308  $extraParams[] = "rf_source=$".count($param);
309  $log .= ', updated the source';
310  }
311  if (!empty($row['risk']) && $row['risk'] != $oldLicense['rf_risk']) {
312  $param[] = $row['risk'];
313  $stmt .= '.updRisk';
314  $extraParams[] = "rf_risk=$".count($param);
315  $log .= ', updated the risk level';
316  }
317  if (!empty($row['licensetype']) && $row['licensetype'] != $oldLicense['rf_licensetype']) {
318  $param[] = $row['licensetype'];
319  $stmt .= '.types';
320  $extraParams[] = "rf_licensetype=$".count($param);
321  $log .= ', updated the licensetype';
322  }
323  if (count($param) > 1) {
324  $sql .= join(",", $extraParams);
325  $sql .= " WHERE rf_pk=$1;";
326  $this->dbManager->getSingleRow($sql, $param, $stmt);
327  $this->mdkMap[md5($row['text'])] = $rfPk;
328  }
329 
330  if (($oldParent != $newParent) && $this->setMap($newParent, $rfPk, LicenseMap::CONCLUSION)) {
331  $log .= " with conclusion '$row[parent_shortname]'";
332  }
333  if (($oldReport != $newReport) && $this->setMap($newReport, $rfPk, LicenseMap::REPORT)) {
334  $log .= " reporting '$row[report_shortname]'";
335  }
336  return $log;
337  }
338 
347  private function handleCsvLicense($row)
348  {
349  if (empty($row['risk'])) {
350  $row['risk'] = 0;
351  }
352  $rfPk = $this->getKeyFromShortname($row['shortname'], $row['group']);
353  $md5Match = $this->getKeyFromMd5($row['text']);
354 
355  // If shortname exists, does not collide with other texts and is not
356  // candidate
357  if ($rfPk !== false) {
358  if (! empty($row['group']) || ($md5Match == $rfPk || $md5Match === false)) {
359  return $this->updateLicense($row, $rfPk);
360  } else {
361  return "Error: MD5 checksum of '" . $row['shortname'] .
362  "' collides with license id=$md5Match";
363  }
364  }
365  if ($md5Match !== false && empty($row['group'])) {
366  return "Error: MD5 checksum of '" . $row['shortname'] .
367  "' collides with license id=$md5Match";
368  }
369 
370  $return = "";
371  if (!empty($row['group'])) {
372  $return = $this->insertNewLicense($row, "license_candidate");
373  } else {
374  $return = $this->insertNewLicense($row, "license_ref");
375  }
376  return $return;
377  }
378 
390  private function insertMapIfNontrivial($fromName,$toName,$usage)
391  {
392  $isNontrivial = ($fromName!==null && $fromName!=$toName && $this->getKeyFromShortname($fromName)!==false);
393  if ($isNontrivial) {
394  $this->dbManager->insertTableRow('license_map',
395  array('rf_fk'=>$this->getKeyFromShortname($toName),
396  'rf_parent'=>$this->getKeyFromShortname($fromName),
397  'usage'=> $usage));
398  }
399  return $isNontrivial;
400  }
401 
407  private function getKeyFromShortname($shortname, $groupFk = null)
408  {
409  $keyName = $shortname;
410  $tableName = "license_ref";
411  $addCondition = "";
412  $statement = __METHOD__ . ".getId";
413  $params = array($shortname);
414 
415  if ($groupFk != null) {
416  $keyName .= $groupFk;
417  $tableName = "license_candidate";
418  $addCondition = "AND group_fk = $2";
419  $statement .= ".candidate";
420  $params[] = $this->userDao->getGroupIdByName($groupFk);
421  }
422  $sql = "SELECT rf_pk FROM ONLY $tableName WHERE rf_shortname = $1 $addCondition;";
423  if (array_key_exists($keyName, $this->nkMap)) {
424  return $this->nkMap[$keyName];
425  }
426  $row = $this->dbManager->getSingleRow($sql, $params, $statement);
427  $this->nkMap[$keyName] = ($row===false) ? false : $row['rf_pk'];
428  return $this->nkMap[$keyName];
429  }
430 
436  private function getKeyFromMd5($licenseText)
437  {
438  $md5 = md5($licenseText);
439  if (array_key_exists($md5, $this->mdkMap)) {
440  return $this->mdkMap[$md5];
441  }
442  $row = $this->dbManager->getSingleRow("SELECT rf_pk " .
443  "FROM ONLY license_ref WHERE rf_md5=md5($1)",
444  array($licenseText));
445  $this->mdkMap[$md5] = (empty($row)) ? false : $row['rf_pk'];
446  return $this->mdkMap[$md5];
447  }
448 
459  private function setMap($from, $to, $usage)
460  {
461  $return = false;
462  if (!empty($from)) {
463  $sql = "SELECT license_map_pk, rf_parent FROM license_map WHERE rf_fk = $1 AND usage = $2;";
464  $statement = __METHOD__ . ".getCurrentMapping";
465  $row = $this->dbManager->getSingleRow($sql, array($to, $usage), $statement);
466  if (!empty($row) && $row['rf_parent'] != $from) {
467  $this->dbManager->updateTableRow("license_map", array(
468  'rf_fk' => $to,
469  'rf_parent' => $from,
470  'usage' => $usage
471  ), 'license_map_pk', $row['license_map_pk']);
472  $return = true;
473  } elseif (empty($row)) {
474  $this->dbManager->insertTableRow('license_map', array(
475  'rf_fk' => $to,
476  'rf_parent' => $from,
477  'usage' => $usage
478  ));
479  $return = true;
480  }
481  }
482  return $return;
483  }
484 
494  private function insertNewLicense($row, $tableName = "license_ref")
495  {
496  $stmtInsert = __METHOD__ . '.insert.' . $tableName;
497  $columns = array(
498  "rf_shortname" => $row['shortname'],
499  "rf_licensetype" => $row['licensetype'],
500  "rf_fullname" => $row['fullname'],
501  "rf_spdx_id" => $row['spdx_id'],
502  "rf_text" => $row['text'],
503  "rf_md5" => md5($row['text']),
504  "rf_detector_type" => 1,
505  "rf_url" => $row['url'],
506  "rf_notes" => $row['notes'],
507  "rf_source" => $row['source'],
508  "rf_risk" => $row['risk']
509  );
510 
511  $as = "";
512  if ($tableName == "license_candidate") {
513  $groupId = $this->userDao->getGroupIdByName($row['group']);
514  if (empty($groupId)) {
515  return "Error: Unable to insert candidate license " . $row['shortname'] .
516  " as group " . $row['group'] . " does not exist";
517  }
518  $columns["group_fk"] = $groupId;
519  $columns["marydone"] = $this->dbManager->booleanToDb(true);
520  $as = " as candidate license under group " . $row["group"];
521  }
522 
523  $newPk = $this->dbManager->insertTableRow($tableName, $columns, $stmtInsert, 'rf_pk');
524 
525  if ($tableName == "license_candidate") {
526  $this->nkMap[$row['shortname'].$row['group']] = $newPk;
527  } else {
528  $this->nkMap[$row['shortname']] = $newPk;
529  }
530  $this->mdkMap[md5($row['text'])] = $newPk;
531  $return = "Inserted '$row[shortname]' in DB" . $as;
532 
533  if ($this->insertMapIfNontrivial($row['parent_shortname'], $row['shortname'], LicenseMap::CONCLUSION)) {
534  $return .= " with conclusion '$row[parent_shortname]'";
535  }
536  if ($this->insertMapIfNontrivial($row['report_shortname'], $row['shortname'], LicenseMap::REPORT)) {
537  $return .= " reporting '$row[report_shortname]'";
538  }
539  return $return;
540  }
541 }
insertMapIfNontrivial($fromName, $toName, $usage)
Insert in license_map table if the license conclusion is non-trivial.
handleFile($filename, $fileExtension)
Read the CSV line by line and import it.
handleCsvLicense($row)
Handle a single row from CSV.
updateLicense($row, $rfPk)
Update the license info in the DB.
__construct(DbManager $dbManager, UserDao $userDao)
setEnclosure($enclosure='"')
Update the enclosure.
getKeyFromShortname($shortname, $groupFk=null)
Get the license id using license shortname from DB or nkMap.
handleHeadCsv($row)
Handle a row as head row.
insertNewLicense($row, $tableName="license_ref")
Insert a new license in DB.
setDelimiter($delimiter=',')
Update the delimiter.
setMap($from, $to, $usage)
Update license mappings.
Wrapper class for license map.
Definition: LicenseMap.php:19
Fossology exception.
Definition: Exception.php:15
char * trim(char *ptext)
Trimming whitespace.
Definition: fossconfig.c:690
fo_dbManager * dbManager
fo_dbManager object
Definition: process.c:16
Utility functions for specific applications.