FOSSology  4.5.1
Open Source License Compliance by Open Source Software
Parsers.py
1 #!/usr/bin/env python3
2 
3 # SPDX-FileContributor: © Rajul Jha <rajuljha49@gmail.com>
4 
5 # SPDX-License-Identifier: GPL-2.0-only
6 
7 import requests
8 import json
9 from typing import Dict, Union
10 
11 
12 class Parser:
13  """
14  Parser to classify each component based on it's type.
15  Ex: If purl is pkg:pypi/django@1.11.1,
16  it is a pypi package and should belong to python_components.
17  """
18  def __init__(self, sbom_file: str):
19  """
20  Initialize components list and load the sbom_data.
21  Args:
22  sbom_file: str | Path to sbom file
23  """
24  with open(sbom_file, 'r') as file:
25  self.sbom_datasbom_data = json.load(file)
26  self.python_componentspython_components = []
27  self.npm_componentsnpm_components = []
28  self.php_componentsphp_components = []
29  self.unsupported_componentsunsupported_components = []
30 
32  """
33  Classify components based on it's type
34  """
35  for component in self.sbom_datasbom_data.get('components',[]):
36  purl = component.get('purl')
37  if not purl:
38  continue
39  type = self._extract_type_extract_type(purl)
40 
41  if type == 'pypi':
42  self.python_componentspython_components.append(component)
43  # elif type == 'npm':
44  # self.npm_components.append(component)
45  # elif type == 'composer':
46  # self.php_components.append(component)
47  else:
48  self.unsupported_componentsunsupported_components.append(component)
49 
50  def _extract_type(self, purl: str) -> Union[str,None]:
51  """
52  Extracts the package type from the purl.
53  Example purl: pkg:pypi/django@1.11.1
54  The type here is 'pypi'.
55  Args:
56  purl: str | Purl of the package to scan
57  Return:
58  purl_type: str | Type of component or None
59  """
60  # purl format: pkg:type/namespace/name@version?qualifiers#subpath
61  try:
62  if purl.startswith("pkg:"):
63  purl_type = purl.split(':')[1].split('/')[0]
64  return purl_type
65  return None
66  except Exception as e:
67  return None, str(e)
68 
69 
71  """
72  Python Parser to parse the python sboms to generate download urls from
73  cyclonedx format sbom files.
74  """
75 
76  def __process_components(self, components : list[Dict]) -> list[str,str]:
77  """
78  Returns list of package name and version from SBOM component.
79  Args:
80  components: list[Dict]
81  Return:
82  list[str, str]: Name and versions of packages from sbom file
83  """
84  return [(comp['name'], comp['version']) for comp in components]
85 
86  def __generate_api_endpoint(self, package_name: str, version: str) -> str:
87  """
88  Generate JSON REST API Endpoint to fetch download url.
89  Args:
90  package_name: str Name of package
91  version: str Version of paclage
92  Return:
93  JSON REST API endpoint tp fetch metadata of package
94  """
95  return f"https://pypi.org/pypi/{package_name}/{version}/json"
96 
97  def parse_components(self, components: list[Dict]) -> Union[list[tuple[str,str]],None]:
98  """
99  Parse SBOM file for package name and download url of package.
100  Args:
101  sbom_file: str Path to sbom_file
102  Return:
103  list of tuples with package_name and download_url of that package
104  """
105  download_urls = []
106  packages = self.__process_components__process_components(components)
107 
108  for package_name, version in packages:
109  api_endpoint = self.__generate_api_endpoint__generate_api_endpoint(package_name, version)
110  print(f"API endpoint for {package_name} : {api_endpoint}")
111  response = requests.get(api_endpoint)
112 
113  if response.status_code == 200:
114  data = response.json()
115  sdist_url = None
116  wheel_url = None
117 
118  for url_info in data.get('urls', []):
119  if url_info.get('packagetype') == 'sdist':
120  sdist_url = url_info.get('url')
121  elif url_info.get('packagetype') == 'bdist_wheel':
122  wheel_url = url_info.get('url')
123 
124  # Prefer sdist, fallback to wheel if sdist is not available
125  download_url = sdist_url if sdist_url else wheel_url
126  if download_url:
127  download_urls.append((package_name, download_url))
128  else:
129  print(f"No suitable download URL found for {package_name} {version}")
130  else:
131  print(f"Failed to retrieve data for {package_name} {version}")
132 
133  return download_urls if download_urls else None
def classify_components(self)
Definition: Parsers.py:31
Union[str, None] _extract_type(self, str purl)
Definition: Parsers.py:50
def __init__(self, str sbom_file)
Definition: Parsers.py:18
str __generate_api_endpoint(self, str package_name, str version)
Definition: Parsers.py:86
Union[list[tuple[str, str]], None] parse_components(self, list[Dict] components)
Definition: Parsers.py:97
list[str, str] __process_components(self, list[Dict] components)
Definition: Parsers.py:76