FOSSology  4.4.0
Open Source License Compliance by Open Source Software
fo_antelink.php
1 #!/usr/bin/php
2 <?php
3 /*
4  SPDX-FileCopyrightText: © 2012 Hewlett-Packard Development Company, L.P.
5 
6  SPDX-License-Identifier: GPL-2.0-only
7 */
8 
16 // $DATAROOTDIR and $PROJECT come from Makefile
17 //require_once "$DATAROOTDIR/$PROJECT/lib/php/bootstrap.php";
18 require_once "/usr/local/share/fossology/lib/php/bootstrap.php";
19 
20 // NOTE THIS IS A PRIVATE KEY - read from file acme.key
21 $acmekey = file_get_contents("acme.key");
22 // Antepedia Computing Machinery Engine (acme) url
23 $acmebaseurl = 'https://api.antepedia.com/acme/v3';
24 //$acmequeryurl = $acmebaseurl . "/fquery/$acmekey"; // Full query, slow. Antelink support recommends only sending one sha1 at a time.
25 $acmeBinaryqueryurl = $acmebaseurl . "/bquery/$acmekey";
26 $acmequeryurl = $acmebaseurl . "/squery/$acmekey";
27 $acmekeycheckurl = $acmebaseurl . "/checkey/$acmekey";
28 
29 $SysConf = array(); // fo system configuration variables
30 $PG_CONN = 0; // Database connection
31 
32 /* Set SYSCONFDIR and set global (for backward compatibility) */
33 $SysConf = bootstrap();
34 
35 /* Initialize global system configuration variables $SysConfig[] */
36 ConfigInit($SYSCONFDIR, $SysConf);
37 
38 /* Check Antelink acme key */
39 $ch = curl_init($acmekeycheckurl);
40 SetCurlArgs($ch);
41 $contents = curl_exec( $ch );
42 $response=json_decode($contents);
43 curl_close( $ch );
44 if (! $response->authorized) {
45  echo "Invalid antelink acme key.\n";
46  exit;
47 }
48 
49 // Maximum number of sha1's to send to antelink in a single batch
50 $MaxBinarySend = 500;
51 $MaxSend = 10;
52 
53 /* -p -u {upload_pk} -t {tag_pk}
54  * -u and -t are manditory
55  */
56 $Options = getopt("vpt:u:");
57 if ( array_key_exists('t', $Options)
58  && array_key_exists('u', $Options)
59  ) {
60  $tag_pk = $Options['t'];
61  $upload_pk = $Options['u'];
62 } else {
63  echo "Fatal: Missing parameter\n";
64  Usage($argc, $argv);
65  exit -1;
66 }
67 
68 $PrintOnly = ( array_key_exists('p', $Options)) ? true : false;
69 $Verbose = ( array_key_exists('v', $Options)) ? true : false;
70 
71 //$sql = "select distinct(pfile_fk), pfile_sha1, ufile_name from uploadtree,pfile where upload_fk='$upload_pk' and pfile_pk=pfile_fk";
72 $sql = "SELECT pfile_pk, pfile_sha1, ufile_name, acme_pfile_pk FROM (SELECT distinct(pfile_fk) AS PF, ufile_name FROM uploadtree
73 WHERE upload_fk='$upload_pk' and (ufile_mode&x'10000000'::int)=0) as SS
74 inner join pfile on (PF=pfile_pk)
75 left join acme_pfile on (PF=acme_pfile.pfile_fk) where acme_pfile_pk is null;";
76 $result = pg_query($PG_CONN, $sql);
77 DBCheckResult($result, $sql, __FILE__, __LINE__);
78 if (pg_num_rows($result) == 0) {
79  echo "Empty upload_pk $upload_pk\n";
80  exit;
81 }
82 
83 
84 /* loop through each row identifying each as foss or not
85  * Put the FOSS SHA1 into an array to send to the squery server.
86  * This two step process is needed because bquery can handle requests of 500 hashes
87  * but squery can only handle requests of 10 hashes. */
88 $MasterFOSSarray = array();
89 $ToAntelink = array();
90 $FoundFOSSfiles = 0;
91 $PrecheckFileCount = 0;
92 while ($row = pg_fetch_assoc($result)) {
93  $PrecheckFileCount++;
94  $ToAntelink[] = $row;
95  if (count($ToAntelink) >= $MaxBinarySend) {
96  if ($Verbose) {
97  echo "Precheck $PrecheckFileCount, found $FoundFOSSfiles\n";
98  }
99  $FoundFOSSfiles += QueryBinaryServer($ToAntelink, $MasterFOSSarray);
100  $ToAntelink = array();
101  }
102 }
103 pg_free_result($result);
104 if (count($ToAntelink)) {
105  $FoundFOSSfiles += QueryBinaryServer($ToAntelink, $MasterFOSSarray);
106  if ($Verbose) {
107  echo "Precheck $PrecheckFileCount, found $FoundFOSSfiles\n";
108  }
109 }
110 
111 /* loop through each row accumulating groups of $MaxSend files (sha1's) to send to antelink */
112 $ToAntelink = array();
113 $TaggedFileCount = 0;
114 $TotalFileCount = 0;
115 foreach ($MasterFOSSarray as $row) {
116  $TotalFileCount++;
117  $ToAntelink[] = $row;
118  if (count($ToAntelink) >= $MaxSend) {
119  $TaggedFileCount += QueryTag($ToAntelink, $tag_pk, $PrintOnly, $Verbose);
120  $ToAntelink = array();
121  }
122 }
123 
124 if (count($ToAntelink)) {
125  $TaggedFileCount += QueryTag($ToAntelink, $tag_pk, $PrintOnly, $Verbose);
126 }
127 
128 echo "$TaggedFileCount files tagged out of $TotalFileCount files.\n";
129 
130 return (0);
131 
132 
139 function QueryBinaryServer($ToAntelink, &$MasterFOSSarray)
140 {
141  global $PG_CONN;
142  global $acmeBinaryqueryurl;
143 
144  $NumFound = 0;
145 
146  /* construct array of just sha1's */
147  $sha1array = array();
148  foreach ($ToAntelink as $row) {
149  $sha1array[] = $row['pfile_sha1'];
150  }
151  $PostData = json_encode($sha1array);
152 
153  $curlch = curl_init($acmeBinaryqueryurl);
154  SetCurlArgs($curlch);
155 
156  curl_setopt($curlch, CURLOPT_POST, TRUE);
157  curl_setopt($curlch,CURLOPT_POSTFIELDS, $PostData);
158  curl_setopt($curlch, CURLOPT_RETURNTRANSFER, TRUE);
159 
160  //getting response from server
161  $curlresponse = curl_exec($curlch);
162 
163  if (curl_errno($curlch)) {
164  // Fatal: display curl errors
165  echo "Error " . curl_errno($curlch) . ": " . curl_error($curlch) . "\n";
166  return $NumFound;
167  }
168 
169  //closing the curl
170  curl_close($curlch);
171 
172  $response = json_decode($curlresponse);
173 
174  // print any errors
175  if ($response->error) {
176  echo $response->error . "\n";
177  }
178 
179  /* Add tag or print */
180  if (is_array($response->results)) {
181  foreach ($response->results as $result) {
182  $row = GetRawRow($result->sha1, $ToAntelink);
183  $MasterFOSSarray[] = $row;
184  }
185  }
186 
187  return $NumFound;
188 }
189 
190 
199 function QueryTag($ToAntelink, $tag_pk, $PrintOnly, $Verbose)
200 {
201  global $PG_CONN;
202  global $acmequeryurl;
203 
204  $numTagged = 0;
205 
206  /* construct array of arrays of name and sha1's */
207  $files=array();
208  foreach ($ToAntelink as $row) {
209  $file['hash']=$row['pfile_sha1'];
210  $file['name']=$row['ufile_name'];
211  $files[]=$file;
212  }
213  $request['files']=$files;
214 
215  $PostData = json_encode($request);
216 
217  $curlch = curl_init($acmequeryurl);
218  SetCurlArgs($curlch);
219 
220  curl_setopt($curlch, CURLOPT_POST, true);
221  curl_setopt($curlch, CURLOPT_POSTFIELDS, $PostData);
222  curl_setopt($curlch, CURLOPT_RETURNTRANSFER, true);
223 
224  //getting response from server
225  $response = curl_exec($curlch);
226 
227  if (curl_errno($curlch)) {
228  // Fatal: display curl errors
229  echo "Error " . curl_errno($curlch) . ": " . curl_error($curlch) . "\n";
230  return 0;
231  // exit;
232  }
233 
234  //closing the curl
235  curl_close($curlch);
236 
237  $response = json_decode($response);
238  //echo "response\n";
239  //print_r($response);
240 
241  // print any errors
242  if ($response->error) {
243  echo $response->error . "\n";
244  }
245 
246  /* Add tag or print */
247  if (is_array($response->results)) {
248  foreach ($response->results as $result) {
249  $row = GetRawRow($result->sha1, $ToAntelink);
250 
251  if ($PrintOnly) {
252  if (! empty($row)) {
253  print_r($row);
254  }
255  // echo $row['ufile_name'] . "\n";
256  print_r($result);
257  continue;
258  }
259 
260  foreach ($result->projects as $project) {
261  /* check if acme_project already exists (check if the url is unique) */
262  $url = pg_escape_string($PG_CONN, $project->url);
263  $name = pg_escape_string($PG_CONN, $project->name);
264  $acme_project_pk = '';
265  $sql = "SELECT acme_project_pk from acme_project where url='$url' and project_name='$name'";
266  $sqlresult = pg_query($PG_CONN, $sql);
267  DBCheckResult($sqlresult, $sql, __FILE__, __LINE__);
268  if (pg_num_rows($sqlresult) > 0) {
269  $projrow = pg_fetch_assoc($sqlresult);
270  $acme_project_pk = $projrow['acme_project_pk'];
271  }
272  pg_free_result($sqlresult);
273 
274  if (empty($acme_project_pk)) {
275  /* this is a new acme_project, so write the acme_project record */
276  $acme_project_pk = writeacme_project($project, $Verbose);
277  }
278 
279  /* write the acme_pfile record */
280  writeacme_pfile($acme_project_pk, $row['pfile_pk']);
281 
282  /* Tag the pfile (update tag_file table) */
283  /* There is no constraint preventing duplicate tags so do a precheck */
284  $sql = "SELECT * from tag_file where pfile_fk='$row[pfile_pk]' and tag_fk='$tag_pk'";
285  $sqlresult = pg_query($PG_CONN, $sql);
286  DBCheckResult($sqlresult, $sql, __FILE__, __LINE__);
287  if (pg_num_rows($sqlresult) == 0) {
288  $sql = "insert into tag_file (tag_fk, pfile_fk, tag_file_date, tag_file_text) values ($tag_pk, '$row[pfile_pk]', now(), NULL)";
289  $insresult = pg_query($PG_CONN, $sql);
290  DBCheckResult($insresult, $sql, __FILE__, __LINE__);
291  pg_free_result($insresult);
292  $numTagged++;
293  }
294  pg_free_result($sqlresult);
295  }
296  }
297  }
298 
299  return $numTagged;
300 }
301 
307 function GetRawRow($sha1, $ToAntelink)
308 {
309  /* find the sha1 in $ToAntelink and print the ufile_name */
310  foreach ($ToAntelink as $row) {
311  if (strcasecmp($row['pfile_sha1'], $sha1) == 0) {
312  return $row;
313  }
314  }
315  return '';
316 }
317 
318 
323 function SetCurlArgs($ch)
324 {
325  global $SysConf;
326  curl_setopt($ch,CURLOPT_USERAGENT,'Curl-php');
327  curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
328  curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
329  curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 2);
330  curl_setopt($ch,
331  CURLOPT_HTTPHEADER, array("Content-Type:
332  application/json; charset=utf-8","Accept:application/json,
333  text/javascript, */*; q=0.01"));
334 
335  /* parse http_proxy server and port */
336  $http_proxy = $SysConf['FOSSOLOGY']['http_proxy'];
337  $ProxyServer = substr($http_proxy, 0, strrpos($http_proxy, ":"));
338  $ProxyPort = substr(strrchr($http_proxy, ":"), 1);
339  if (! empty($ProxyServer)) {
340  curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, true);
341  curl_setopt($ch, CURLOPT_PROXY, $ProxyServer);
342  if (! empty($ProxyPort)) {
343  curl_setopt($ch, CURLOPT_PROXYPORT, $ProxyPort);
344  }
345  curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
346  }
347 }
348 
354 function Usage($argc, $argv)
355 {
356  echo "$argv[0] -v -p -u {upload_pk} -t {tag_pk}\n";
357  echo " -p prints out raw antepedia info, but do not update the db.\n";
358  echo " -v prints project found after inserting into db.\n";
359 }
360 
367 function writeacme_project($project, $Verbose)
368 {
369  global $PG_CONN;
370 
371  $project_name = pg_escape_string($PG_CONN, $project->name);
372  $url = pg_escape_string($PG_CONN, $project->url);
373  $description = pg_escape_string($PG_CONN, $project->description);
374 
375  /* convert licenses array to pipe delimited list */
376  $licenses = '';
377  foreach ($project->licenses as $license) {
378  if (! empty($licenses)) {
379  $licenses .= '|';
380  }
381  $licenses .= pg_escape_string($PG_CONN, $license);
382  }
383 
384  /* figure out if we have artefact or content data and pull release date an version out of their respective structs */
385  if (! empty($project->artefacts)) {
386  $artefact = $project->artefacts[0];
387  $projectDate = $artefact->releaseDate;
388  $version = pg_escape_string($PG_CONN, $artefact->version);
389  } else {
390  $content = $project->contents[0];
391  $projectDate = $content->releaseDate;
392  $version = pg_escape_string($PG_CONN, $content->revision);
393  }
394 
395  /* convert unix time to date m/d/yyyy
396  * Watch out for time stamps in milliseconds
397  */
398  if ($projectDate > 20000000000) {
399  $projectDate = $projectDate / 1000; // convert to seconds if necessary
400  }
401  $releasedate = date("Ymd", $projectDate);
402 
403  if ($Verbose) {
404  echo "Found project: $project_name\n";
405  }
406 
407  /* insert the data */
408  $sql = "insert into acme_project (project_name, url, description, licenses, releasedate, version)
409  values ('$project_name', '$url', '$description', '$licenses', '$releasedate', '$version')";
410  $InsResult = pg_query($PG_CONN, $sql);
411  DBCheckResult($InsResult, $sql, __FILE__, __LINE__);
412  pg_free_result($InsResult);
413 
414  /* retrieve and return the primary key */
415  $sql = "select acme_project_pk from acme_project where project_name='$project_name' and url='$url' and description='$description' and licenses='$licenses' and releasedate='$releasedate' and version='$version' ";
416  $result = pg_query($PG_CONN, $sql);
417  DBCheckResult($result, $sql, __FILE__, __LINE__);
418  $row = pg_fetch_assoc($result);
419  pg_free_result($result);
420  return $row['acme_project_pk'];
421 }
422 
428 function writeacme_pfile($acme_project_pk, $pfile_pk)
429 {
430  global $PG_CONN;
431 
432  /* insert the data */
433  $sql = "insert into acme_pfile (pfile_fk, acme_project_fk) values ($pfile_pk, $acme_project_pk)";
434  // ignore errors (this is a prototype). Errors are almost certainly from a duplicate insertion
435  @$InsResult = pg_query($PG_CONN, $sql);
436 }
437 
DBCheckResult($result, $sql, $filenm, $lineno)
Check the postgres result for unexpected errors. If found, treat them as fatal.
Definition: common-db.php:187
ConfigInit($sysconfdir, &$SysConf, $exitOnDbFail=true)
Initialize the fossology system after bootstrap().
Usage()
Print Usage statement.
Definition: fo_dbcheck.php:63
foreach($Options as $Option=> $OptVal) if(0==$reference_flag &&0==$nomos_flag) $PG_CONN
bootstrap($sysconfdir="")
Bootstrap the fossology php library.
Definition: migratetest.php:82