FOSSology  4.5.1
Open Source License Compliance by Open Source Software
fossologyscanner.py
1 #!/usr/bin/env python3
2 
3 # SPDX-FileCopyrightText: © 2020,2023 Siemens AG
4 # SPDX-FileCopyrightText: © anupam.ghosh@siemens.com
5 # SPDX-FileCopyrightText: © mishra.gaurav@siemens.com
6 
7 # SPDX-License-Identifier: GPL-2.0-only
8 
9 import argparse
10 import json
11 import os
12 import sys
13 import textwrap
14 import logging
15 from typing import List, Union, IO
16 
17 from FoScanner.ApiConfig import (ApiConfig, Runner)
18 from FoScanner.CliOptions import (CliOptions, ReportFormat)
19 from FoScanner.RepoSetup import RepoSetup
20 from FoScanner.Scanners import (Scanners, ScanResult)
21 from FoScanner.SpdxReport import SpdxReport
22 from FoScanner.FormatResults import FormatResult
23 from FoScanner.Utils import (validate_keyword_conf_file, copy_keyword_file_to_destination)
24 from ScanDeps.Parsers import Parser, PythonParser
25 from ScanDeps.Downloader import Downloader
26 
27 def get_api_config() -> ApiConfig:
28  """
29  Set the API configuration based on CI the job is running on
30 
31  :return: ApiConfig object
32  """
33  api_config = ApiConfig()
34  if 'GITLAB_CI' in os.environ:
35  api_config.running_on = Runner.GITLAB
36  api_config.api_url = os.environ['CI_API_V4_URL'] if 'CI_API_V4_URL' in \
37  os.environ else ''
38  api_config.project_id = os.environ['CI_PROJECT_ID'] if 'CI_PROJECT_ID' in \
39  os.environ else ''
40  api_config.mr_iid = os.environ['CI_MERGE_REQUEST_IID'] if \
41  'CI_MERGE_REQUEST_IID' in os.environ else ''
42  api_config.api_token = os.environ['API_TOKEN'] if 'API_TOKEN' in \
43  os.environ else ''
44  api_config.project_name = os.environ['CI_PROJECT_NAME'] if \
45  'CI_PROJECT_NAME' in os.environ else ''
46  api_config.project_desc = os.environ['CI_PROJECT_DESCRIPTION'].strip()
47  if api_config.project_desc == "":
48  api_config.project_desc = None
49  api_config.project_orig = os.environ['CI_PROJECT_NAMESPACE']
50  api_config.project_url = os.environ['CI_PROJECT_URL']
51  elif 'TRAVIS' in os.environ and os.environ['TRAVIS'] == 'true':
52  api_config.running_on = Runner.TRAVIS
53  api_config.travis_repo_slug = os.environ['TRAVIS_REPO_SLUG']
54  api_config.travis_pull_request = os.environ['TRAVIS_PULL_REQUEST']
55  api_config.project_name = os.environ['TRAVIS_REPO_SLUG'].split("/")[-1]
56  api_config.project_orig = "/".join(os.environ['TRAVIS_REPO_SLUG'].
57  split("/")[:-2])
58  api_config.project_url = "https://github.com/" + \
59  os.environ['TRAVIS_REPO_SLUG']
60  elif 'GITHUB_ACTIONS' in os.environ and \
61  os.environ['GITHUB_ACTIONS'] == 'true':
62  api_config.running_on = Runner.GITHUB
63  api_config.api_url = os.environ['GITHUB_API'] if 'GITHUB_API' in \
64  os.environ else 'https://api.github.com'
65  api_config.api_token = os.environ['GITHUB_TOKEN']
66  api_config.github_repo_slug = os.environ['GITHUB_REPOSITORY']
67  api_config.github_pull_request = os.environ['GITHUB_PULL_REQUEST']
68  api_config.project_name = os.environ['GITHUB_REPOSITORY'].split("/")[-1]
69  api_config.project_orig = os.environ['GITHUB_REPO_OWNER']
70  api_config.project_url = os.environ['GITHUB_REPO_URL']
71  return api_config
72 
73 
74 def get_allow_list(path: str = '') -> dict:
75  """
76  Decode json from `allowlist.json`
77 
78  :param: path: path to allowlist file. Default=''
79  :return: allowlist dictionary
80  """
81  if path == '':
82  if os.path.exists('whitelist.json'):
83  file_name = 'whitelist.json'
84  print("Reading whitelist.json file...")
85  logging.warning("Name 'whitelist.json' is deprecated. Please use 'allowlist.json instead'")
86  else:
87  file_name = 'allowlist.json'
88  print("Reading allowlist.json file...")
89  else:
90  file_name = path
91  with open(file_name) as f:
92  data = json.load(f)
93  return data
94 
95 
96 def print_results(name: str, failed_results: List[ScanResult],
97  scan_results_with_line_number:List[dict],
98  result_file: IO):
99  """
100  Print the formatted scanner results
101 
102  :param name: Name of the scanner
103  :param failed_results: formatted scanner results to be printed
104  :param: scan_results_with_line_number : List[dict] List of words mapped to their line numbers
105  :param result_file: File to write results to
106  """
107  for files in failed_results:
108  print(f"File: {files.file}")
109  result_file.write(f"File: {files.file}\n")
110  plural = ""
111  if len(files.result) > 1:
112  plural = "s"
113  print(f"{name}{plural}:")
114  result_file.write(f"{name}{plural}:\n")
115  for result in files.result:
116  for item in scan_results_with_line_number:
117  for scanned_word, lines in item.items():
118  if len(lines) > 1 :
119  plural = "s"
120  else:
121  plural = ""
122  if result == scanned_word:
123  lines_str = ", ".join(lines)
124  result = f"{scanned_word} at line{plural} {lines_str}\n"
125  print("\t" + result)
126  result_file.write("\t" + result + "\n")
127 
128 
129 def print_log_message(filename: str,
130  failed_list: Union[bool, List[ScanResult]],
131  check_value: bool, failure_text: str,
132  acceptance_text: str, scan_type: str,
133  return_val: int, scan_results_with_line_number:List[dict] ) -> int:
134  """
135  Common helper function to print scan results.
136 
137  :param filename: File where results are to be stored.
138  :param failed_list: Failed scan results.
139  :param check_value: Boolean value which failed_list should have.
140  :param failure_text: Message to print in case of failures.
141  :param acceptance_text: Message to print in case of no failures.
142  :param scan_type: Type of scan to print.
143  :param return_val: Return value for program
144  :param: scan_results_with_line_number : List[dict] List of words mapped to their line numbers
145  :return: New return value
146  """
147  report_file = open(filename, 'w')
148  if (isinstance(failed_list, bool) and failed_list is not check_value) or \
149  (isinstance(failed_list, list) and len(failed_list) != 0):
150  print(f"\u2718 {failure_text}:")
151  report_file.write(f"{failure_text}:\n")
152  print_results(scan_type, failed_list, scan_results_with_line_number,report_file)
153  if scan_type == "License":
154  return_val = return_val | 2
155  elif scan_type == "Copyright":
156  return_val = return_val | 4
157  elif scan_type == "Keyword":
158  return_val = return_val | 8
159  else:
160  print(f"\u2714 {acceptance_text}")
161  report_file.write(f"{acceptance_text}\n")
162  print()
163  report_file.close()
164  return return_val
165 
166 def format_keyword_results_with_line_numbers(scanner:Scanners,format_results:FormatResult) \
167  -> List[dict]:
168  """
169  Format the keyword results with line numbers
170 
171  :param: scanner : Scanner Scanner object
172  :param: format_results : FormatResult FormatResult object
173  :return: list of dicts with key as word and value as list of line numbers of the words
174  """
175  keyword_results = scanner.get_keyword_list(whole=True)
176  if keyword_results is False:
177  return []
178  formatted_list_of_keyword_line_numbers = list()
179  for keyword_result in keyword_results:
180  list_of_scan_results = list(keyword_result.result)
181  words_with_line_numbers = format_results.find_word_line_numbers(keyword_result.path,
182  list_of_scan_results, key='content')
183  formatted_list_of_keyword_line_numbers.append(words_with_line_numbers)
184  return formatted_list_of_keyword_line_numbers
185 
186 def format_copyright_results_with_line_numbers(scanner:Scanners,format_results:FormatResult) \
187  -> List[dict]:
188  """
189  Format the copyright results with line numbers
190 
191  :param: scanner : Scanner Scanner object
192  :param: format_results : FormatResult FormatResult object
193  :return: list of dicts with key as word and value as list of line numbers of the words
194  """
195  copyright_results = scanner.get_copyright_list(whole=True)
196  if copyright_results is False:
197  copyright_results = []
198  formatted_list_of_copyright_line_numbers = list()
199  for copyright_result in copyright_results:
200  list_of_scan_results = list(copyright_result.result)
201  words_with_line_numbers = format_results.find_word_line_numbers(
202  copyright_result.path,list_of_scan_results, key='content')
203  formatted_list_of_copyright_line_numbers.append(words_with_line_numbers)
204  return formatted_list_of_copyright_line_numbers
205 
206 def format_license_results_with_line_numbers(scanner:Scanners,format_results:FormatResult) \
207  -> List[dict]:
208  """
209  Format the licenses results with line numbers
210 
211  :param: scanner : Scanner Scanner object
212  :param: format_results : FormatResult FormatResult object
213  :return: list of dicts with key as word and value as list of line numbers of the words
214  """
215  license_results = scanner.results_are_allow_listed(whole=True)
216  if license_results is True or license_results is None:
217  license_results = []
218  formatted_list_of_license_line_numbers = list()
219  for license_result in license_results:
220  list_of_scan_results = list(license_result.result)
221  words_with_line_numbers = format_results.find_word_line_numbers(
222  license_result.path,list_of_scan_results, key='license')
223  formatted_list_of_license_line_numbers.append(words_with_line_numbers)
224  return formatted_list_of_license_line_numbers
225 
226 def text_report(cli_options: CliOptions, result_dir: str, return_val: int,
227  scanner: Scanners, format_results : FormatResult) -> int:
228  """
229  Run scanners and print results in text format.
230 
231  :param cli_options: CLI options
232  :param result_dir: Result directory location
233  :param return_val: Return value of program
234  :param scanner: Scanner object
235  :param: format_results : FormatResult FormatResult object
236  :return: Program's return value
237  """
238  if cli_options.nomos or cli_options.ojo:
239  failed_licenses = scanner.results_are_allow_listed()
240  scan_results_with_line_number = format_license_results_with_line_numbers(
241  scanner=scanner,format_results=format_results)
242  print_log_message(f"{result_dir}/licenses.txt", failed_licenses, True,
243  "Following licenses found which are not allow listed",
244  "No license violation found", "License", return_val,
245  scan_results_with_line_number)
246  if cli_options.copyright:
247  copyright_results = scanner.get_copyright_list()
248  scan_results_with_line_number = format_copyright_results_with_line_numbers(
249  scanner=scanner, format_results=format_results)
250  print_log_message(f"{result_dir}/copyrights.txt", copyright_results, False,
251  "Following copyrights found",
252  "No copyright violation found", "Copyright", return_val,
253  scan_results_with_line_number)
254  if cli_options.keyword:
255  keyword_results = scanner.get_keyword_list()
256  scan_results_with_line_number = format_keyword_results_with_line_numbers(
257  scanner=scanner, format_results=format_results)
258  print_log_message(f"{result_dir}/keywords.txt", keyword_results, False,
259  "Following keywords found",
260  "No keyword violation found", "Keyword", return_val,
261  scan_results_with_line_number)
262  return return_val
263 
264 
265 def bom_report(cli_options: CliOptions, result_dir: str, return_val: int,
266  scanner: Scanners, api_config: ApiConfig, format_results: FormatResult) -> int:
267  """
268  Run scanners and print results as an SBOM.
269 
270  :param cli_options: CLI options
271  :param result_dir: Result directory location
272  :param return_val: Return value
273  :param scanner: Scanner object
274  :param api_config: API config options
275  :param: format_results : FormatResult FormatResult object
276  :return: Program's return value
277  """
278  report_obj = SpdxReport(cli_options, api_config)
279  if cli_options.nomos or cli_options.ojo:
280  scan_results = scanner.get_scanner_results()
281  report_obj.add_license_results(scan_results)
282  scan_results_with_line_number = format_license_results_with_line_numbers(
283  scanner=scanner, format_results=format_results)
284  failed_licenses = scanner.get_non_allow_listed_results(scan_results)
285  return_val = print_log_message(f"{result_dir}/licenses.txt",
286  failed_licenses, True, "Following licenses found which are not allow "
287  "listed", "No license violation found",
288  "License", return_val, scan_results_with_line_number)
289  if cli_options.copyright:
290  copyright_results = scanner.get_copyright_list(all_results=True)
291  if copyright_results is False:
292  copyright_results = []
293  report_obj.add_copyright_results(copyright_results)
294  failed_copyrights = scanner.get_non_allow_listed_copyrights(
295  copyright_results)
296  scan_results_with_line_number = format_copyright_results_with_line_numbers(
297  scanner=scanner, format_results=format_results)
298  return_val = print_log_message(f"{result_dir}/copyrights.txt",
299  failed_copyrights, False, "Following copyrights found",
300  "No copyright violation found", "Copyright", return_val,scan_results_with_line_number)
301  if cli_options.keyword:
302  keyword_results = scanner.get_keyword_list()
303  scan_results_with_line_number = format_keyword_results_with_line_numbers(
304  scanner=scanner, format_results=format_results)
305  return_val = print_log_message(f"{result_dir}/keywords.txt",
306  keyword_results, False, "Following keywords found",
307  "No keyword violation found", "Keyword", return_val, scan_results_with_line_number)
308  report_obj.finalize_document()
309  report_name = f"{result_dir}/sbom_"
310  if cli_options.report_format == ReportFormat.SPDX_JSON:
311  report_name += "spdx.json"
312  elif cli_options.report_format == ReportFormat.SPDX_RDF:
313  report_name += "spdx.rdf"
314  elif cli_options.report_format == ReportFormat.SPDX_TAG_VALUE:
315  report_name += "spdx.spdx"
316  elif cli_options.report_format == ReportFormat.SPDX_YAML:
317  report_name += "spdx.yaml"
318  report_obj.write_report(report_name)
319  print(f"\u2714 Saved SBOM as {report_name}")
320  return return_val
321 
322 
323 def main(parsed_args):
324  """
325  Main
326 
327  :param parsed_args:
328  :return: 0 for success, error code on failure.
329  """
330  api_config = get_api_config()
331  cli_options = CliOptions()
332  cli_options.update_args(parsed_args)
333  try:
334  if cli_options.allowlist_path:
335  allowlist_path = cli_options.allowlist_path
336  print(f"Reading allowlist.json file from the path: '{allowlist_path}'")
337  cli_options.allowlist = get_allow_list(path=allowlist_path)
338  else:
339  cli_options.allowlist = get_allow_list()
340  except FileNotFoundError:
341  print("Unable to find allowlist.json in current dir\n"
342  "Continuing without it.", file=sys.stderr)
343 
344  if cli_options.keyword and cli_options.keyword_conf_file_path:
345  keyword_conf_file_path = cli_options.keyword_conf_file_path
346  destination_path = '/usr/local/share/fossology/keyword/agent/keyword.conf'
347  is_valid,message = validate_keyword_conf_file(keyword_conf_file_path)
348  if is_valid:
349  print(f"Validation of keyword file successful: {message}")
350  copy_keyword_file_to_destination(keyword_conf_file_path,destination_path)
351  else:
352  print(f"Could not validate keyword file: {message}")
353 
354  valid_comps_exist = False
355  if (cli_options.scan_only_deps or cli_options.repo) and cli_options.sbom_path != '':
356  download_list = []
357  save_dir = 'pkg_downloads'
358  sbom_file_path = cli_options.sbom_path
359  parser = Parser(sbom_file_path)
360  parser.classify_components()
361  valid_comps_exist = ( parser.python_components != [] or
362  parser.php_components != [] or
363  parser.npm_components != [] )
364 
365  python_comps = parser.python_components
366  unsupported_comps = parser.unsupported_components
367 
368  if len(python_comps) != 0:
369  python_parser = PythonParser()
370  python_list = python_parser.parse_components(python_comps)
371  download_list += python_list
372 
373  if len(unsupported_comps) != 0:
374  for comp in unsupported_comps:
375  print(f'The purl {comp["purl"]} is not supported. Package will not be downloaded.')
376 
377  try:
378  downloader = Downloader()
379  downloader.download_concurrently(download_list, save_dir)
380  except Exception as e:
381  print("Something went wrong while downloading the dependencies..")
382 
383  repo_setup = RepoSetup(cli_options, api_config)
384  if cli_options.repo is False:
385  cli_options.diff_dir = repo_setup.get_diff_dir()
386 
387  if cli_options.scan_only_deps and valid_comps_exist:
388  cli_options.diff_dir = save_dir
389  if cli_options.scan_dir:
390  cli_options.diff_dir = cli_options.dir_path
391 
392  scanner = Scanners(cli_options)
393  return_val = 0
394 
395  # Populate tmp dir in unified diff format
396  format_results = FormatResult(cli_options)
397  format_results.process_files(scanner.cli_options.diff_dir)
398 
399  # Create result dir
400  result_dir = "results"
401  os.makedirs(name=result_dir, exist_ok=True)
402 
403  print("Preparing scan reports...")
404  if cli_options.report_format == ReportFormat.TEXT:
405  return_val = text_report(cli_options, result_dir, return_val, scanner,
406  format_results)
407  else:
408  return_val = bom_report(cli_options, result_dir, return_val, scanner,
409  api_config, format_results)
410  return return_val
411 
412 
413 if __name__ == "__main__":
414  parser = argparse.ArgumentParser(
415  description=textwrap.dedent("""fossology scanner designed for CI""")
416  )
417  parser.add_argument(
418  "operation", type=str, help="Operations to run.", nargs='*',
419  choices=["nomos", "copyright", "keyword", "ojo", "repo", "differential", "scan-only-deps", "scan-dir"]
420  )
421  parser.add_argument(
422  "--tags", type=str, nargs=2, help="Tags for differential scan. Required if 'differential'" \
423  "is specified."
424  )
425  parser.add_argument(
426  "--report", type=str, help="Type of report to generate. Default 'TEXT'.",
427  choices=[member.name for member in ReportFormat], default=ReportFormat.TEXT.name
428  )
429  parser.add_argument('--keyword-conf', type=str, help='Path to the keyword configuration file.' \
430  'Use only when keyword argument is true'
431  )
432  parser.add_argument('--dir-path', type=str, help='Path to directory for scanning.')
433 
434  parser.add_argument(
435  "--allowlist-path", type=str, help="Pass allowlist.json to allowlist dependencies."
436  )
437  parser.add_argument(
438  "--sbom-path", type=str, help="Path to SBOM file for downloading dependencies."
439  )
440 
441  args = parser.parse_args()
442  sys.exit(main(args))
443 
Store the options sent through the CLI.
list_t type structure used to keep various lists. (e.g. there are multiple lists).
Definition: nomos.h:308