FOSSology  4.5.1
Open Source License Compliance by Open Source Software
Parsers.py
1 #!/usr/bin/env python3
2 
3 # SPDX-FileContributor: © Rajul Jha <rajuljha49@gmail.com>
4 
5 # SPDX-License-Identifier: GPL-2.0-only
6 
7 import requests
8 import json
9 from typing import Dict, Union
10 from packageurl.contrib import purl2url
11 
12 
13 class Parser:
14  """
15  Parser to classify each component based on it's type.
16  Ex: If purl is pkg:pypi/django@1.11.1,
17  it is a pypi package and should belong to python_components.
18  """
19  def __init__(self, sbom_file: str):
20  """
21  Initialize components list and load the sbom_data.
22  Args:
23  sbom_file: str | Path to sbom file
24  """
25  with open(sbom_file, 'r') as file:
26  self.sbom_datasbom_data = json.load(file)
27  self.python_componentspython_components = []
28  self.npm_componentsnpm_components = []
29  self.php_componentsphp_components = []
30  self.unsupported_componentsunsupported_components = []
31 
33  """
34  Classify components based on it's type
35  """
36  for component in self.sbom_datasbom_data.get('components',[]):
37  purl = component.get('purl')
38  if not purl:
39  continue
40  type = self._extract_type_extract_type(purl)
41 
42  if type == 'pypi':
43  self.python_componentspython_components.append(component)
44  elif type == 'npm':
45  self.npm_componentsnpm_components.append(component)
46  # elif type == 'composer':
47  # self.php_components.append(component)
48  else:
49  self.unsupported_componentsunsupported_components.append(component)
50 
51  def _extract_type(self, purl: str) -> Union[str,None]:
52  """
53  Extracts the package type from the purl.
54  Example purl: pkg:pypi/django@1.11.1
55  The type here is 'pypi'.
56  Args:
57  purl: str | Purl of the package to scan
58  Return:
59  purl_type: str | Type of component or None
60  """
61  # purl format: pkg:type/namespace/name@version?qualifiers#subpath
62  try:
63  if purl.startswith("pkg:"):
64  purl_type = purl.split(':')[1].split('/')[0]
65  return purl_type
66  return None
67  except Exception:
68  return None
69 
70 
72  """
73  Python Parser to parse the python sboms to generate download urls from
74  cyclonedx format sbom files.
75  """
76 
77  def _process_components(self, components : list[Dict]) -> list[str,str]:
78  """
79  Returns list of package name and version from SBOM component.
80  Args:
81  components: list[Dict]
82  Return:
83  list[str, str]: Name and versions of packages from sbom file
84  """
85  return [(comp['name'], comp['version']) for comp in components]
86 
87  def _generate_api_endpoint(self, package_name: str, version: str) -> str:
88  """
89  Generate JSON REST API Endpoint to fetch download url.
90  Args:
91  package_name: str Name of package
92  version: str Version of paclage
93  Return:
94  JSON REST API endpoint tp fetch metadata of package
95  """
96  return f"https://pypi.org/pypi/{package_name}/{version}/json"
97 
98  def parse_components(self, components: list[Dict]) -> Union[list[tuple[str,str]],None]:
99  """
100  Parse SBOM file for package name and download url of package.
101  Args:
102  sbom_file: str Path to sbom_file
103  Return:
104  list of tuples with package_name and download_url of that package
105  """
106  download_urls = []
107  packages = self._process_components_process_components(components)
108 
109  for package_name, version in packages:
110  api_endpoint = self._generate_api_endpoint_generate_api_endpoint(package_name, version)
111  print(f"API endpoint for {package_name} : {api_endpoint}")
112  response = requests.get(api_endpoint)
113 
114  if response.status_code == 200:
115  data = response.json()
116  sdist_url = None
117  wheel_url = None
118 
119  for url_info in data.get('urls', []):
120  if url_info.get('packagetype') == 'sdist':
121  sdist_url = url_info.get('url')
122  elif url_info.get('packagetype') == 'bdist_wheel':
123  wheel_url = url_info.get('url')
124 
125  # Prefer sdist, fallback to wheel if sdist is not available
126  download_url = sdist_url if sdist_url else wheel_url
127  if download_url:
128  download_urls.append((package_name, download_url))
129  else:
130  print(f"No suitable download URL found for {package_name} {version}")
131  else:
132  print(f"Failed to retrieve data for {package_name} {version}")
133 
134  return download_urls if download_urls else None
135 
136 
137 class NPMParser:
138  """
139  NPM Parser to parse the python sboms to generate download urls from
140  cyclonedx format sbom files.
141  """
142 
143  def _get_download_url(self, purl: str):
144  """
145  Get download url from purl for NPM Packages
146  Args:
147  purl: str
148  Return:
149  download_url: str
150  """
151  return purl2url.get_download_url(purl)
152 
153  def parse_components(self, components: list[Dict]) -> Union[list[tuple[str,str]],None]:
154  """
155  Parse the components to extract the tuple of (<package_name>, <download_url>)
156  Args:
157  components: list[Dict]
158  Return:
159  List[tuple(str,str)] (<package_name>, <download_url>)
160  """
161  download_urls = []
162  for comp in components:
163  name = comp['name']
164  purl = comp['purl']
165  try:
166  download_url = self._get_download_url_get_download_url(purl)
167  download_urls.append((name, download_url))
168  except Exception as e:
169  print(f"Invalid Download URL for NPM package: {name} :: {e}")
170 
171  return download_urls if download_urls else None
Union[list[tuple[str, str]], None] parse_components(self, list[Dict] components)
Definition: Parsers.py:153
def _get_download_url(self, str purl)
Definition: Parsers.py:143
def classify_components(self)
Definition: Parsers.py:32
Union[str, None] _extract_type(self, str purl)
Definition: Parsers.py:51
def __init__(self, str sbom_file)
Definition: Parsers.py:19
Union[list[tuple[str, str]], None] parse_components(self, list[Dict] components)
Definition: Parsers.py:98
str _generate_api_endpoint(self, str package_name, str version)
Definition: Parsers.py:87
list[str, str] _process_components(self, list[Dict] components)
Definition: Parsers.py:77