11 from datetime
import datetime
13 from license_expression
import (
14 get_spdx_licensing, LicenseExpression, combine_expressions
16 from spdx_tools.spdx.model
import (
17 Actor, ActorType, Checksum, ChecksumAlgorithm, CreationInfo, Document, File,
18 FileType, Package, PackageVerificationCode, Relationship, RelationshipType,
19 SpdxNoAssertion, ExternalPackageRef, ExternalPackageRefCategory, SpdxNone,
20 ExtractedLicensingInfo
22 from spdx_tools.spdx.validation.document_validator
import \
23 validate_full_spdx_document
24 from spdx_tools.spdx.validation.validation_message
import ValidationMessage
25 from spdx_tools.spdx.writer.write_anything
import write_file
27 from .CliOptions
import CliOptions
28 from .Scanners
import Scanners, ScanResultList
35 :ivar cli_options: CliOptions object
36 :ivar report_files: Dictionary of SPDX files with SPDX ID as key
37 :ivar license_package_set: Set of licenses found in package
38 :ivar creation_info: Report creation info
39 :ivar document: Report document
40 :ivar package: Report package
41 :ivar scanner: Scanners object
44 def __init__(self, cli_options: CliOptions, scanner: Scanners):
46 :param cli_options: CliOptions to use
47 :param scanner: Scanners to use
51 self.
cli_optionscli_options.allowlist.get(
'licenses', [])
53 self._license_cache: dict[str, LicenseExpression] = {}
56 self.report_files: dict[str, File] = {}
57 self.license_package_set: set[str] = set()
58 self.package_verification_set: dict[str, dict[str, list[str]]] = {}
59 self.creation_info: CreationInfo = CreationInfo(
60 spdx_version=
"SPDX-2.3",
61 spdx_id=
"SPDXRef-DOCUMENT",
62 name=
"FOSSology CI Report",
63 data_license=
"CC0-1.0",
64 document_namespace=
"https://fossology.org",
67 ActorType.ORGANIZATION,
"FOSSology",
68 "fossology@fossology.org"
70 ], created=datetime.now(),
72 self.document: Document = Document(self.creation_info)
74 parent_package = self.
scannerscanner.get_scan_packages().parent_package
75 project_name = parent_package.get(
'name',
'').strip()
77 project_name = self.
cli_optionscli_options.parser.root_component_name
81 self.package: Package = Package(
83 spdx_id=
"SPDXRef-Package",
85 download_location=SpdxNoAssertion(),
86 release_date=datetime.now(),
88 if parent_package.get(
'description')
is not None:
89 self.package.description = parent_package[
'description']
91 author = parent_package.get(
'author')
92 if author
and author !=
"":
93 self.package.originator = Actor(
94 ActorType.ORGANIZATION,
98 self.package.originator = SpdxNoAssertion()
100 url = parent_package.get(
'url')
101 if url
and url !=
"":
102 self.package.download_location = url
104 self.package.download_location = SpdxNoAssertion()
106 self.document.packages = [self.package]
107 self.dependent_packages: dict[str, Package] = {}
108 self.extracted_licenses: dict[str, ExtractedLicensingInfo] = {}
110 def __get_license_or_ref(self, lic: str) -> LicenseExpression:
111 if lic
in self._license_cache:
112 return self._license_cache[lic]
115 license_spdx = re.sub(
116 r'[^\da-zA-Z.\-_]',
'-',
117 f
"LicenseRef-fossology-{lic}"
119 if license_spdx
not in self.extracted_licenses:
120 self.extracted_licenses[license_spdx] = ExtractedLicensingInfo(
121 license_id=license_spdx,
123 extracted_text=f
"The license text for {license_spdx} has to be "
126 lic_expression = self.
_spdx_lic_cache_spdx_lic_cache.parse(license_spdx)
127 self._license_cache[lic] = lic_expression
128 return lic_expression
132 Add scan result from license scanner to report.
134 :param package: Package to which the file belongs.
135 :param scan_result: Scan result from license scanner.
137 raw_licenses_strings = [lic[
'license']
for lic
in scan_result.result]
139 raw_licenses_strings}
140 parsed_expressions_list =
list(parsed_expressions_set)
142 all_allowed_licenses = all(
148 if all_allowed_licenses:
149 file.license_concluded = combine_expressions(
150 expressions=parsed_expressions_list, relation=
'AND', unique=
False
153 file.license_concluded = SpdxNoAssertion()
155 file.license_info_in_file = parsed_expressions_list
158 package.license_info_from_files =
list(
159 set(package.license_info_from_files) | parsed_expressions_set
163 if file.license_concluded != SpdxNoAssertion():
164 if package.license_concluded
in (SpdxNoAssertion(), SpdxNone()):
165 package.license_concluded = file.license_concluded
167 package.license_concluded = (
168 package.license_concluded & file.license_concluded).simplify()
171 self, scan_result: ScanResultList, package: Package
174 Create a new SPDX File for given scan result and populate common fields.
176 :param scan_result: Scan result from scanner.
177 :param package: Package to which the file belongs.
178 :return: New SPDX File
180 md5_hash, sha1_hash, sha256_hash = self.
__get_file_info__get_file_info(scan_result)
182 if file_spdx_id
not in self.report_files:
184 name=scan_result.file,
185 spdx_id=file_spdx_id,
187 Checksum(ChecksumAlgorithm.MD5, md5_hash),
188 Checksum(ChecksumAlgorithm.SHA1, sha1_hash),
189 Checksum(ChecksumAlgorithm.SHA256, sha256_hash),
191 file_types=[FileType.SOURCE],
192 license_concluded=SpdxNoAssertion()
194 self.report_files[file_spdx_id] = spdx_file
195 contains_relationship = Relationship(
197 RelationshipType.CONTAINS,
200 self.document.relationships.append(contains_relationship)
202 pkg_verification_data = self.package_verification_set.setdefault(
204 'checksums': [],
'excluded_files': []
208 if self.
scannerscanner.is_excluded_path(spdx_file.name):
209 pkg_verification_data[
'excluded_files'].append(spdx_file.name)
211 pkg_verification_data[
'checksums'].append(sha1_hash)
213 return self.report_files[file_spdx_id]
216 self, package: Package, copyright_result: ScanResultList
219 Add scan result from copyright agent. If the file does not exist, creates a
222 :param copyright_result: Scan result from copyright scanner.
225 file.copyright_text =
"\n".join(
227 cpy.get(
'content',
'')
for cpy
in copyright_result.result
234 Get different hash for the file in scan result.
236 :param scan_result: Scan result from scanners.
237 :return: Tuple of md5, sha1 and sha256 checksums.
239 md5_hash = hashlib.md5()
240 sha1_hash = hashlib.sha1()
241 sha256_hash = hashlib.sha256()
242 with open(scan_result.path,
"rb")
as f:
243 for byte_block
in iter(
lambda: f.read(4096), b
""):
244 md5_hash.update(byte_block)
245 sha1_hash.update(byte_block)
246 sha256_hash.update(byte_block)
247 return md5_hash.hexdigest(), sha1_hash.hexdigest(), sha256_hash.hexdigest()
252 Generate SPDX ID for file in scan result.
254 :param sha256_hash: SHA 256 checksum of the file
255 :param pkg_name: Package to which the file belongs.
256 :return: SPDX ID for the file.
258 return f
"SPDXRef-File-{pkg_name}-{sha256_hash}"
263 Generate SPDX ID for a package/component.
265 :param component: Package/component to get SPDX ID for.
266 :return: SPDX ID for the package.
268 pkg_name = component.get(
'name',
'')
269 pkg_version = component.get(
'version',
'')
270 return "SPDXRef-Package-" + hashlib.md5(
271 f
"{pkg_name}_{pkg_version}".encode(
'utf-8', errors=
'ignore')
276 Validate the document and write the SPDX file.
278 :param file_name: Location to store the report.
280 validation_messages: list[ValidationMessage] = validate_full_spdx_document(
284 if validation_messages:
285 for message
in validation_messages:
287 f
"SPDX Validation Warning: {message.validation_message}\n"
288 f
"Context: {message.context}"
291 "SPDX document validation failed. See logs for details."
294 logging.info(
"SPDX document validated successfully.")
298 self.document, file_name, validate=
False
303 Finalize the document by setting relations between packages and files.
304 At the same time, add all the licenses from files to the package and
305 calculate the verification code, without the excluded files.
314 def __create_packages(self) -> None:
315 parent_name = self.
scannerscanner.get_scan_packages().parent_package.get(
319 self.package.spdx_id = re.sub(
320 r'[^A-Za-z0-9\-_.]',
'-',
321 f
"SPDXRef-Package-{parent_name}"
323 describes_relationship = Relationship(
325 RelationshipType.DESCRIBES,
328 self.document.relationships.append(describes_relationship)
330 for purl, component
in (
331 self.
scannerscanner.get_scan_packages().dependencies.items()
334 self.document.packages.append(package)
335 depends_on_relationship = Relationship(
336 self.package.spdx_id,
337 RelationshipType.DEPENDS_ON,
340 self.document.relationships.append(depends_on_relationship)
344 For a given component, create a package and add it to the list.
346 :param component: Component to create package for.
347 :return: Create or get existing package.
350 if pkg_spdx_id
not in self.dependent_packages:
351 self.dependent_packages[pkg_spdx_id] = Package(
353 name=component.get(
'name',
'UNKNOWN'),
354 version=component.get(
'version',
'UNKNOWN'),
355 download_location=component.get(
356 'fossology_download_url', SpdxNoAssertion()
358 license_info_from_files=[],
359 license_concluded=SpdxNone(),
362 purl_ref = ExternalPackageRef(
363 category=ExternalPackageRefCategory.PACKAGE_MANAGER,
364 reference_type=
'purl',
365 locator=component.get(
'purl')
367 self.dependent_packages[pkg_spdx_id].external_references.append(purl_ref)
369 vcs_url = component.get(
'vcs_url')
371 vcs_ref = ExternalPackageRef(
372 category=ExternalPackageRefCategory.OTHER, reference_type=
'vcs',
375 self.dependent_packages[pkg_spdx_id].external_references.append(vcs_ref)
377 homepage_url = component.get(
'homepage_url')
379 homepage_ref = ExternalPackageRef(
380 category=ExternalPackageRefCategory.OTHER, reference_type=
'homepage',
383 self.dependent_packages[pkg_spdx_id].external_references.append(
386 return self.dependent_packages[pkg_spdx_id]
388 def __create_license_files(self) -> None:
390 self.
scannerscanner.get_scan_packages().parent_package, self.package
392 for component
in self.
scannerscanner.get_scan_packages().dependencies.values():
399 def __create_copyright_files(self) -> None:
401 self.
scannerscanner.get_scan_packages().parent_package, self.package
403 for component
in self.
scannerscanner.get_scan_packages().dependencies.values():
410 def __create_license_file_from_component(
411 self, component: dict, package: Package
413 for result
in component.get(
'SCANNER_RESULTS', []):
416 def __create_copyright_file_from_component(
417 self, component: dict, package: Package
419 for result
in component.get(
'COPYRIGHT_RESULT', []):
422 def __add_files_to_document(self) -> None:
423 self.document.files =
list(self.report_files.values())
425 def __add_extracted_licenses(self) -> None:
426 self.document.extracted_licensing_info =
list(
427 self.extracted_licenses.values()
430 def __update_package_verification_code(self) -> None:
431 for package
in self.document.packages:
434 package.verification_code = code
437 self, package_spdx_id: str
438 ) -> PackageVerificationCode |
None:
440 Calculate package verification code for the list of checksums and return it.
442 :param package_spdx_id: Package SPDX ID to calculate the verification
444 :return: Package Verification Code based on SPDX specification.
446 pkg_data = self.package_verification_set.
get(package_spdx_id)
450 checksums = pkg_data.get(
'checksums', [])
451 excluded_files = pkg_data.get(
'excluded_files', [])
455 verification_code = hashlib.sha1(
456 "".join(checksums).encode(
'utf-8', errors=
'ignore')
459 return PackageVerificationCode(
460 value=verification_code,
461 excluded_files=excluded_files
def finalize_document(self)
None __update_package_verification_code(self)
def __init__(self, CliOptions cli_options, Scanners scanner)
None __add_files_to_document(self)
None __add_extracted_licenses(self)
File __get_spdx_file(self, ScanResultList scan_result, Package package)
None __create_copyright_file_from_component(self, dict component, Package package)
def __add_copyright_file(self, Package package, ScanResultList copyright_result)
None __create_copyright_files(self)
Package __get_package_for_component(self, dict component)
LicenseExpression __get_license_or_ref(self, str lic)
None __create_license_files(self)
None __create_license_file_from_component(self, dict component, Package package)
def __add_license_file(self, Package package, ScanResultList scan_result)
None __create_packages(self)
tuple[str, str, str] __get_file_info(ScanResultList scan_result)
str __get_package_spdx_id(dict component)
PackageVerificationCode|None __calculate_verification_code(self, str package_spdx_id)
str __get_file_spdx_id(str sha256_hash, str pkg_name)
def write_report(self, str file_name)
list_t type structure used to keep various lists. (e.g. there are multiple lists).