12 from typing
import Any
15 from packageurl
import PackageURL
16 from packageurl.contrib
import purl2url
18 DOWNLOAD_URL_KEY =
'fossology_download_url'
19 COMPONENT_TYPE_KEY =
'fossology_component_type'
24 Parser to classify each component based on it's type.
25 Ex: If purl is pkg:pypi/django@1.11.1,
26 it is a pypi package and should belong to python_components.
31 Initialize components list and load the sbom_data.
33 sbom_file: str | Path to sbom file
36 with open(sbom_file,
'r', encoding=
'utf-8')
as file:
38 except FileNotFoundError
as e:
39 logging.error(f
"SBOM file not found: {sbom_file}")
41 except json.JSONDecodeError
as e:
42 logging.error(f
"Invalid JSON in SBOM file: {sbom_file}")
44 except Exception
as e:
46 f
"An unexpected error occurred while reading SBOM file {sbom_file}: {e}"
51 self.parsed_components: dict[str, dict[str, Any]] = {}
55 Classify components based on it's type
57 :param root_download_dir: Download dir prefix. Will be used to create
62 .
get(
'component', {}).
get(
'name',
None))
63 for component
in self.
sbom_datasbom_data.
get(
'components', []):
64 purl = component.get(
'purl',
'')
70 comp_name = component.get(
'name',
'unknown_name')
71 comp_version = component.get(
'version',
'unknown_version')
73 component[
'download_dir'] = os.path.join(
74 root_download_dir, comp_type
or 'unclassified', comp_name, comp_version
76 component[COMPONENT_TYPE_KEY] = comp_type
77 self.parsed_components[purl] = component
81 Extracts the package type from the purl.
82 Example purl: pkg:pypi/django@1.11.1
83 The type here is 'pypi'.
85 purl: str | Purl of the package to scan
87 purl_type: str | Type of component or None
91 if purl.startswith(
"pkg:"):
92 parsed_purl = PackageURL.from_string(purl)
93 return parsed_purl.type
95 except ValueError
as e:
96 logging.warning(f
"Could not parse PURL '{purl}': {e}")
98 except Exception
as e:
100 "An unexpected error occurred while extracting PURL type for '"
106 def python_components(self) -> list[dict[str, Any]]:
107 return [comp
for comp
in self.parsed_components.values()
if
108 comp.get(COMPONENT_TYPE_KEY) ==
'pypi']
111 def npm_components(self) -> list[dict[str, Any]]:
112 return [comp
for comp
in self.parsed_components.values()
if
113 comp.get(COMPONENT_TYPE_KEY) ==
'npm']
116 def php_components(self) -> list[dict[str, Any]]:
117 return [comp
for comp
in self.parsed_components.values()
if
118 comp.get(COMPONENT_TYPE_KEY) ==
'composer']
121 def unsupported_components(self) -> list[dict[str, Any]]:
122 return [comp
for comp
in self.parsed_components.values()
if
123 comp.get(COMPONENT_TYPE_KEY)
not in [
'pypi',
'npm',
'composer']]
128 Python Parser to parse the python sboms to generate download urls from
129 cyclonedx format sbom files.
132 PYPI_BINARY_DIST_WHEEL =
'bdist_wheel'
133 PYPI_SOURCE_DIST =
'sdist'
137 Generate JSON REST API Endpoint to fetch download url.
139 package_name: str Name of package
140 version: str Version of package
142 JSON REST API endpoint tp fetch metadata of package
144 return f
"https://pypi.org/pypi/{package_name}/{version}/json"
148 Parse SBOM file for package name and download url of package.
152 for comp
in parser.python_components:
153 purl = comp.get(
'purl')
155 logging.warning(f
"Python component missing PURL: {comp}. Skipping.")
158 component = parser.parsed_components.get(purl)
161 f
"Component with PURL {purl} not found in parsed_components. "
166 package_name = component.get(
'name')
167 version = component.get(
'version')
168 if not package_name
or not version:
170 f
"Python component {purl} missing name or version. Skipping."
175 logging.info(f
"API endpoint for {package_name} : {api_endpoint}")
178 response = requests.get(
179 api_endpoint, timeout=10
181 response.raise_for_status()
184 data = response.json()
188 for url_info
in data.get(
'urls', []):
190 sdist_url = url_info.get(
'url')
192 wheel_url = url_info.get(
'url')
195 download_url = sdist_url
if sdist_url
else wheel_url
197 component[DOWNLOAD_URL_KEY] = download_url
200 f
"No suitable download URL found for {package_name} {version}"
204 project_urls = data.get(
'info', {}).
get(
'project_urls', {})
205 for key, value
in project_urls.items():
206 if "source" in key.lower():
207 component[
'vcs_url'] = value
208 if "homepage" in key.lower():
209 component[
'homepage_url'] = value
211 except requests.exceptions.RequestException
as e:
213 f
"Failed to retrieve data for {package_name} {version} from "
214 f
"{api_endpoint}: {e}"
216 except json.JSONDecodeError:
218 f
"Failed to decode JSON response from {api_endpoint} for "
219 f
"{package_name} {version}."
221 except Exception
as e:
223 f
"An unexpected error occurred while parsing Python component "
230 NPM Parser to parse the python sboms to generate download urls from
231 cyclonedx format sbom files.
236 Get download url from purl for NPM Packages
242 return purl2url.get_download_url(purl)
246 Parse the components to extract the tuple of (<package_name>,
251 for comp
in parser.npm_components:
252 purl = comp.get(
'purl')
254 logging.warning(f
"NPM component missing PURL: {comp}. Skipping.")
257 component = parser.parsed_components.get(purl)
260 f
"Component with PURL {purl} not found in parsed_components. "
265 name = component.get(
'name',
'unknown_name')
268 component[DOWNLOAD_URL_KEY] = download_url
269 except Exception
as e:
271 f
"Invalid Download URL for NPM package: {name} ({purl}) :: {e}"
None parse_components(self, Parser parser)
str _get_download_url(self, str purl)
def __init__(self, str sbom_file)
str|None _extract_type(self, str purl)
def classify_components(self, str root_download_dir)
None parse_components(self, Parser parser)
str _generate_api_endpoint(self, str package_name, str version)
string PYPI_BINARY_DIST_WHEEL