FOSSology  4.4.0
Open Source License Compliance by Open Source Software
SpdxReport.py
1 #!/usr/bin/env python3
2 
3 # SPDX-FileCopyrightText: © 2023 Siemens AG
4 # SPDX-FileContributor: Gaurav Mishra <mishra.gaurav@siemens.com>
5 #
6 # SPDX-License-Identifier: GPL-2.0-only
7 
8 import hashlib
9 import logging
10 from datetime import datetime
11 from typing import List, Set, Dict, Tuple
12 
13 from license_expression import get_spdx_licensing
14 from spdx_tools.spdx.model import (
15  Actor,
16  ActorType,
17  Checksum,
18  ChecksumAlgorithm,
19  CreationInfo,
20  Document,
21  File,
22  FileType,
23  Package,
24  PackageVerificationCode,
25  Relationship,
26  RelationshipType,
27  SpdxNoAssertion
28 )
29 from spdx_tools.spdx.validation.document_validator import \
30  validate_full_spdx_document
31 from spdx_tools.spdx.validation.validation_message import ValidationMessage
32 from spdx_tools.spdx.writer.write_anything import write_file
33 
34 from .ApiConfig import ApiConfig
35 from .CliOptions import CliOptions
36 from .Scanners import ScanResult, Scanners
37 
38 
39 class SpdxReport:
40  """
41  Handle SPDX reports.
42 
43  :ivar cli_options: CliOptions object
44  :ivar report_files: Dictionary of SPDX files with SPDX ID as key
45  :ivar license_package_set: Set of licenses found in package
46  :ivar creation_info: Report creation info
47  :ivar document: Report document
48  :ivar package: Report package
49  """
50 
51  def __init__(self, cli_options: CliOptions, api_config: ApiConfig):
52  """
53  :param cli_options: CliOptions to use
54  :param api_config: ApiConfig to use
55  """
56  self.cli_optionscli_options = cli_options
57  self.report_files: Dict[str, File] = {}
58  self.license_package_set: Set[str] = set()
59  self.creation_info: CreationInfo = CreationInfo(
60  spdx_version="SPDX-2.3",
61  spdx_id="SPDXRef-DOCUMENT",
62  name="FOSSology CI Report",
63  data_license="CC0-1.0",
64  document_namespace="https://fossology.org",
65  creators=[Actor(ActorType.ORGANIZATION, "FOSSology",
66  "fossology@fossology.org")],
67  created=datetime.now(),
68  )
69  self.document: Document = Document(self.creation_info)
70 
71  self.package: Package = Package(
72  name=api_config.project_name,
73  spdx_id="SPDXRef-Package",
74  files_analyzed=True,
75  download_location=SpdxNoAssertion(),
76  release_date=datetime.now(),
77  )
78  if api_config.project_desc is not None:
79  self.package.description = api_config.project_desc
80  if api_config.project_orig is not None and api_config.project_orig != "":
81  self.package.originator = Actor(ActorType.ORGANIZATION,
82  api_config.project_orig)
83  else:
84  self.package.originator = SpdxNoAssertion()
85  if api_config.project_url is not None and api_config.project_url != "":
86  self.package.download_location = api_config.project_url
87  else:
88  self.package.download_location = SpdxNoAssertion()
89 
90  self.document.packages = [self.package]
91 
92  describes_relationship = Relationship("SPDXRef-DOCUMENT",
93  RelationshipType.DESCRIBES,
94  "SPDXRef-Package")
95  self.document.relationships = [describes_relationship]
96 
97  def add_license_file(self, scan_result: ScanResult):
98  """
99  Add scan result from license scanner to report.
100 
101  :param scan_result: Scan result from license scanner.
102  """
103  all_allowed_licenses = all([lic in self.cli_optionscli_options.allowlist['licenses']
104  for lic in scan_result.result]) is True
105  spdx_id = self.__get_file_spdx_id__get_file_spdx_id(scan_result)
106 
107  if spdx_id in self.report_files:
108  file = self.report_files[spdx_id]
109  else:
110  file = self.__get_new_spdx_file__get_new_spdx_file(scan_result, spdx_id)
111 
112  file.file_types = [FileType.SOURCE]
113  if all_allowed_licenses:
114  file.license_concluded = get_spdx_licensing().parse(" AND ".join([
115  lic for lic in scan_result.result
116  ]))
117  else:
118  file.license_concluded = SpdxNoAssertion()
119  file.license_info_in_file = [
120  get_spdx_licensing().parse(lic) for lic in scan_result.result
121  ]
122  self.report_files[spdx_id] = file
123  self.license_package_set.update(scan_result.result)
124 
125  def __get_new_spdx_file(self, scan_result: ScanResult, spdx_id: str) -> File:
126  """
127  Create a new SPDX File for given scan result and populate common fields.
128 
129  :param scan_result: Scan result from scanner.
130  :param spdx_id: SPDX ID to use for file.
131  :return: New SPDX File
132  """
133  md5_hash, sha1_hash, sha256_hash = self.__get_file_info__get_file_info(scan_result)
134  file = File(
135  name=scan_result.file,
136  spdx_id=spdx_id,
137  checksums=[
138  Checksum(ChecksumAlgorithm.MD5, md5_hash.hexdigest()),
139  Checksum(ChecksumAlgorithm.SHA1, sha1_hash.hexdigest()),
140  Checksum(ChecksumAlgorithm.SHA256, sha256_hash.hexdigest()),
141  ],
142  file_types=[FileType.SOURCE],
143  license_concluded=SpdxNoAssertion()
144  )
145  return file
146 
147  def add_copyright_file(self, copyright_result: ScanResult):
148  """
149  Add scan result from copyright agent. If the file does not exist, creates a
150  new one.
151 
152  :param copyright_result: Scan result from copyright scanner.
153  """
154  spdx_id = self.__get_file_spdx_id__get_file_spdx_id(copyright_result)
155  if spdx_id in self.report_files:
156  file = self.report_files[spdx_id]
157  else:
158  file = self.__get_new_spdx_file__get_new_spdx_file(copyright_result, spdx_id)
159  file.copyright_text = "\n".join([
160  cpy for cpy in copyright_result.result
161  ])
162 
163  @staticmethod
164  def __get_file_info(scan_result: ScanResult) -> Tuple:
165  """
166  Get different hash for the file in scan result.
167 
168  :param scan_result: Scan result from scanners.
169  :return: Tuple of md5, sha1 and sha256 checksums.
170  """
171  md5_hash = hashlib.md5()
172  sha1_hash = hashlib.sha1()
173  sha256_hash = hashlib.sha256()
174  with open(scan_result.path, "rb") as f:
175  for byte_block in iter(lambda: f.read(4096), b""):
176  md5_hash.update(byte_block)
177  sha1_hash.update(byte_block)
178  sha256_hash.update(byte_block)
179  return md5_hash, sha1_hash, sha256_hash
180 
181  @staticmethod
182  def __get_file_spdx_id(scan_result: ScanResult) -> str:
183  """
184  Generate SPDX ID for file in scan result.
185 
186  :param scan_result: Scan result from scanner.
187  :return: SPDX ID for the file.
188  """
189  spdx_id = "SPDXRef-File" + hashlib.md5(
190  scan_result.file.encode()).hexdigest()
191  return spdx_id
192 
193  def write_report(self, file_name: str):
194  """
195  Validate the document and write the SPDX file.
196 
197  :param file_name: Location to store the report.
198  """
199  validation_messages: List[ValidationMessage] = \
200  validate_full_spdx_document(self.document)
201  for message in validation_messages:
202  logging.warning(message.validation_message)
203  logging.warning(message.context)
204  assert validation_messages == []
205  write_file(self.document, file_name)
206 
207  def finalize_document(self):
208  """
209  Finalize the document by setting relations between packages and files.
210  At the same time, add all the licenses from files to the package and
211  calculate the verification code, without the excluded files.
212  """
213  for spdx_id, file in self.report_files.items():
214  contains_relationship = Relationship("SPDXRef-Package",
215  RelationshipType.CONTAINS, spdx_id)
216  self.document.relationships += [contains_relationship]
217  self.document.files += [file]
218 
219  self.package.license_info_from_files = [
220  get_spdx_licensing().parse(lic) for lic in self.license_package_set
221  ]
222 
223  all_allowed_licenses = all([lic in self.cli_optionscli_options.allowlist['licenses']
224  for lic in self.license_package_set]) is True
225  if all_allowed_licenses:
226  self.package.license_concluded = get_spdx_licensing().parse(" AND ".join([
227  lic for lic in self.license_package_set
228  ]))
229  else:
230  self.package.license_concluded = SpdxNoAssertion()
231  templist = []
232  scanner_obj = Scanners(self.cli_optionscli_options)
233  excluded_files: list[str] = []
234  for f in self.document.files:
235  if scanner_obj.is_excluded_path(f.name):
236  excluded_files.append(f.name)
237  else:
238  for sum in f.checksums:
239  if sum.algorithm == ChecksumAlgorithm.SHA1:
240  templist.append(sum.value)
241  break
242  templist.sort()
243  verificationcode = hashlib.sha1("".join(templist).encode()).hexdigest()
244 
245  self.package.verification_code = PackageVerificationCode(
246  value=verificationcode, excluded_files=excluded_files
247  )
248 
249  def add_license_results(self, scan_results: List[ScanResult]):
250  """
251  Helper function to add scan results to the report from license scanners.
252 
253  :param scan_results: List of scan results from the license scanners.
254  """
255  for result in scan_results:
256  self.add_license_fileadd_license_file(result)
257 
258  def add_copyright_results(self, copyright_results: List[ScanResult]):
259  """
260  Helper function to add scan results to the report from copyright scanner.
261 
262  :param copyright_results: List of scan results from the copyright scanner.
263  """
264  for result in copyright_results:
265  self.add_copyright_fileadd_copyright_file(result)
static int update(int *pid_ptr, agent_t *agent, gpointer unused)
Definition: agent.c:152
Tuple __get_file_info(ScanResult scan_result)
Definition: SpdxReport.py:164
def add_copyright_results(self, List[ScanResult] copyright_results)
Definition: SpdxReport.py:258
def add_copyright_file(self, ScanResult copyright_result)
Definition: SpdxReport.py:147
def add_license_results(self, List[ScanResult] scan_results)
Definition: SpdxReport.py:249
File __get_new_spdx_file(self, ScanResult scan_result, str spdx_id)
Definition: SpdxReport.py:125
str __get_file_spdx_id(ScanResult scan_result)
Definition: SpdxReport.py:182
def add_license_file(self, ScanResult scan_result)
Definition: SpdxReport.py:97
def __init__(self, CliOptions cli_options, ApiConfig api_config)
Definition: SpdxReport.py:51
def write_report(self, str file_name)
Definition: SpdxReport.py:193