FOSSology  4.5.1
Open Source License Compliance by Open Source Software
OsselotLookupHelper.php
1 <?php
2 /*
3  SPDX-FileCopyrightText: © 2025 Vaibhav Sahu <sahusv4527@gmail.com>
4  SPDX-License-Identifier: GPL-2.0-only
5 */
6 
7 namespace Fossology\Lib\Util;
8 
9 use GuzzleHttp\Client;
10 use GuzzleHttp\Exception\GuzzleException;
11 use GuzzleHttp\RequestOptions;
12 
14 {
15  private Client $client;
16  private string $cacheDir;
17  private int $cacheTtl = 86400;
18 
19  public function __construct()
20  {
21  $this->client = new Client([
22  'timeout' => 300,
23  'connect_timeout' => 5,
24  ]);
25 
26  $baseCache = $GLOBALS['SysConf']['DIRECTORIES']['cache'] ?? sys_get_temp_dir();
27  $this->cacheDir = rtrim($baseCache, '/\\') . '/util/osselot';
28 
29  if (!is_dir($this->cacheDir)) {
30  @mkdir($this->cacheDir, 0755, true);
31  }
32  }
33 
40  public function getVersions(string $pkgName): array
41  {
42  if (empty($pkgName)) {
43  return [];
44  }
45 
46  global $SysConf;
47  $sysConfig = $SysConf['SYSCONFIG'];
48  $curatedUrl = $sysConfig['OsselotCuratedUrl'];
49 
50  $apiUrl = $curatedUrl . "?" . rawurlencode($pkgName);
51 
52  try {
53  $response = $this->client->get($apiUrl, [
54  'headers' => [
55  'Accept' => 'text/plain, text/html, */*',
56  'User-Agent' => 'Fossology-OsselotHelper',
57  ],
58  'timeout' => 30,
59  'connect_timeout' => 10,
60  ]);
61 
62  if ($response->getStatusCode() !== 200) {
63  return [];
64  }
65 
66  $responseBody = (string)$response->getBody();
67  if (empty($responseBody)) {
68  return [];
69  }
70 
71  $versions = [];
72  $lines = explode("\n", trim($responseBody));
73 
74  foreach ($lines as $line) {
75  $line = trim($line);
76  if (empty($line)) {
77  continue;
78  }
79 
80  if (preg_match('/^' . preg_quote($pkgName, '/') . '\/version-(.+)$/', $line, $matches)) {
81  $version = trim($matches[1]);
82  if (!empty($version)) {
83  $versions[] = $version;
84  }
85  }
86  }
87 
88  $versions = array_unique($versions);
89  sort($versions, SORT_NATURAL);
90 
91  return $versions;
92 
93  } catch (\Exception $e) {
94  return [];
95  }
96  }
97 
109  public function fetchSpdxFile(string $pkgName, string $version): ?string
110  {
111  if (empty($pkgName) || empty($version)) {
112  throw new \InvalidArgumentException('Package name and version cannot be empty');
113  }
114 
115  global $SysConf;
116  $sysConfig = $SysConf['SYSCONFIG'];
117  $githubRoot = $sysConfig['OsselotPackageAnalysisUrl'];
118  $primaryDomain = $sysConfig['OsselotPrimaryDomain'];
119  $fallbackDomain = $sysConfig['OsselotFallbackDomain'];
120 
121  $safeName = preg_replace('/[^a-zA-Z0-9_.\-]/', '_', $pkgName);
122  $safeVer = preg_replace('/[^a-zA-Z0-9_.\-]/', '_', $version);
123  $cacheFile = "{$this->cacheDir}/{$safeName}_{$safeVer}.rdf";
124 
125  if (is_file($cacheFile) && (time() - filemtime($cacheFile) < $this->cacheTtl)) {
126  return $cacheFile;
127  }
128 
129  $relPath = sprintf(
130  '%s/version-%s/%s-%s.spdx.rdf.xml',
131  rawurlencode($pkgName),
132  rawurlencode($version),
133  rawurlencode($pkgName),
134  rawurlencode($version)
135  );
136 
137  $candidates = [
138  "{$githubRoot}/{$relPath}",
139  "{$githubRoot}/{$relPath}.gz",
140  str_replace($primaryDomain, $fallbackDomain, "{$githubRoot}/{$relPath}"),
141  ];
142 
143  $options = [
144  RequestOptions::HEADERS => [
145  'Accept' => 'application/rdf+xml, application/xml, text/xml',
146  'User-Agent' => 'Fossology-OsselotHelper',
147  ],
148  RequestOptions::HTTP_ERRORS => false,
149  RequestOptions::CONNECT_TIMEOUT => 10,
150  RequestOptions::TIMEOUT => 30,
151  ];
152 
153  foreach ($candidates as $url) {
154  try {
155  $response = $this->client->get($url, $options);
156 
157  if ($response->getStatusCode() !== 200) {
158  continue;
159  }
160 
161  $body = (string) $response->getBody();
162  if (empty($body)) {
163  continue;
164  }
165 
166  if (str_ends_with($url, '.gz')) {
167  $decompressed = @gzdecode($body);
168  if ($decompressed === false) {
169  continue;
170  }
171  $body = $decompressed;
172  }
173 
174  if (!$this->isValidXml($body)) {
175  continue;
176  }
177 
178  if (!is_dir($this->cacheDir)) {
179  @mkdir($this->cacheDir, 0755, true);
180  }
181 
182  if (file_put_contents($cacheFile, $body) !== false) {
183  return $cacheFile;
184  }
185 
186  } catch (\Exception $e) {
187  continue;
188  }
189  }
190 
191  return null;
192  }
196  private function isValidXml(string $content): bool
197  {
198  $previousUseInternalErrors = libxml_use_internal_errors(true);
199  libxml_clear_errors();
200 
201  $doc = simplexml_load_string($content);
202  $errors = libxml_get_errors();
203 
204  libxml_use_internal_errors($previousUseInternalErrors);
205  libxml_clear_errors();
206 
207  return $doc !== false && empty($errors);
208  }
209 
215  public function clearCache(): bool
216  {
217  if (!is_dir($this->cacheDir)) {
218  return true;
219  }
220 
221  foreach (glob($this->cacheDir . '/*.rdf') as $file) {
222  if (is_file($file)) {
223  unlink($file);
224  }
225  }
226 
227  return true;
228  }
229 }
Fossology exception.
Definition: Exception.php:15
fetchSpdxFile(string $pkgName, string $version)
char * trim(char *ptext)
Trimming whitespace.
Definition: fossconfig.c:690