15 from typing
import List, Union, IO
23 from FoScanner.Utils import (validate_keyword_conf_file, copy_keyword_file_to_destination)
27 def get_api_config() -> ApiConfig:
29 Set the API configuration based on CI the job is running on
31 :return: ApiConfig object
33 api_config = ApiConfig()
34 if 'GITLAB_CI' in os.environ:
35 api_config.running_on = Runner.GITLAB
36 api_config.api_url = os.environ[
'CI_API_V4_URL']
if 'CI_API_V4_URL' in \
38 api_config.project_id = os.environ[
'CI_PROJECT_ID']
if 'CI_PROJECT_ID' in \
40 api_config.mr_iid = os.environ[
'CI_MERGE_REQUEST_IID']
if \
41 'CI_MERGE_REQUEST_IID' in os.environ
else ''
42 api_config.api_token = os.environ[
'API_TOKEN']
if 'API_TOKEN' in \
44 api_config.project_name = os.environ[
'CI_PROJECT_NAME']
if \
45 'CI_PROJECT_NAME' in os.environ
else ''
46 api_config.project_desc = os.environ[
'CI_PROJECT_DESCRIPTION'].strip()
47 if api_config.project_desc ==
"":
48 api_config.project_desc =
None
49 api_config.project_orig = os.environ[
'CI_PROJECT_NAMESPACE']
50 api_config.project_url = os.environ[
'CI_PROJECT_URL']
51 elif 'TRAVIS' in os.environ
and os.environ[
'TRAVIS'] ==
'true':
52 api_config.running_on = Runner.TRAVIS
53 api_config.travis_repo_slug = os.environ[
'TRAVIS_REPO_SLUG']
54 api_config.travis_pull_request = os.environ[
'TRAVIS_PULL_REQUEST']
55 api_config.project_name = os.environ[
'TRAVIS_REPO_SLUG'].split(
"/")[-1]
56 api_config.project_orig =
"/".join(os.environ[
'TRAVIS_REPO_SLUG'].
58 api_config.project_url =
"https://github.com/" + \
59 os.environ[
'TRAVIS_REPO_SLUG']
60 elif 'GITHUB_ACTIONS' in os.environ
and \
61 os.environ[
'GITHUB_ACTIONS'] ==
'true':
62 api_config.running_on = Runner.GITHUB
63 api_config.api_url = os.environ[
'GITHUB_API']
if 'GITHUB_API' in \
64 os.environ
else 'https://api.github.com'
65 api_config.api_token = os.environ[
'GITHUB_TOKEN']
66 api_config.github_repo_slug = os.environ[
'GITHUB_REPOSITORY']
67 api_config.github_pull_request = os.environ[
'GITHUB_PULL_REQUEST']
68 api_config.project_name = os.environ[
'GITHUB_REPOSITORY'].split(
"/")[-1]
69 api_config.project_orig = os.environ[
'GITHUB_REPO_OWNER']
70 api_config.project_url = os.environ[
'GITHUB_REPO_URL']
74 def get_allow_list(path: str =
'') -> dict:
76 Decode json from `allowlist.json`
78 :param: path: path to allowlist file. Default=''
79 :return: allowlist dictionary
82 if os.path.exists(
'whitelist.json'):
83 file_name =
'whitelist.json'
84 print(
"Reading whitelist.json file...")
85 logging.warning(
"Name 'whitelist.json' is deprecated. Please use 'allowlist.json instead'")
87 file_name =
'allowlist.json'
88 print(
"Reading allowlist.json file...")
91 with open(file_name)
as f:
96 def print_results(name: str, failed_results: List[ScanResult],
97 scan_results_with_line_number:List[dict],
100 Print the formatted scanner results
102 :param name: Name of the scanner
103 :param failed_results: formatted scanner results to be printed
104 :param: scan_results_with_line_number : List[dict] List of words mapped to their line numbers
105 :param result_file: File to write results to
107 for files
in failed_results:
108 print(f
"File: {files.file}")
109 result_file.write(f
"File: {files.file}\n")
111 if len(files.result) > 1:
113 print(f
"{name}{plural}:")
114 result_file.write(f
"{name}{plural}:\n")
115 for result
in files.result:
116 for item
in scan_results_with_line_number:
117 for scanned_word, lines
in item.items():
122 if result == scanned_word:
123 lines_str =
", ".join(lines)
124 result = f
"{scanned_word} at line{plural} {lines_str}\n"
126 result_file.write(
"\t" + result +
"\n")
129 def print_log_message(filename: str,
130 failed_list: Union[bool, List[ScanResult]],
131 check_value: bool, failure_text: str,
132 acceptance_text: str, scan_type: str,
133 return_val: int, scan_results_with_line_number:List[dict] ) -> int:
135 Common helper function to print scan results.
137 :param filename: File where results are to be stored.
138 :param failed_list: Failed scan results.
139 :param check_value: Boolean value which failed_list should have.
140 :param failure_text: Message to print in case of failures.
141 :param acceptance_text: Message to print in case of no failures.
142 :param scan_type: Type of scan to print.
143 :param return_val: Return value for program
144 :param: scan_results_with_line_number : List[dict] List of words mapped to their line numbers
145 :return: New return value
147 report_file = open(filename,
'w')
148 if (isinstance(failed_list, bool)
and failed_list
is not check_value)
or \
149 (isinstance(failed_list, list)
and len(failed_list) != 0):
150 print(f
"\u2718 {failure_text}:")
151 report_file.write(f
"{failure_text}:\n")
152 print_results(scan_type, failed_list, scan_results_with_line_number,report_file)
153 if scan_type ==
"License":
154 return_val = return_val | 2
155 elif scan_type ==
"Copyright":
156 return_val = return_val | 4
157 elif scan_type ==
"Keyword":
158 return_val = return_val | 8
160 print(f
"\u2714 {acceptance_text}")
161 report_file.write(f
"{acceptance_text}\n")
166 def format_keyword_results_with_line_numbers(scanner:Scanners,format_results:FormatResult) \
169 Format the keyword results with line numbers
171 :param: scanner : Scanner Scanner object
172 :param: format_results : FormatResult FormatResult object
173 :return: list of dicts with key as word and value as list of line numbers of the words
175 keyword_results = scanner.get_keyword_list(whole=
True)
176 if keyword_results
is False:
178 formatted_list_of_keyword_line_numbers =
list()
179 for keyword_result
in keyword_results:
180 list_of_scan_results =
list(keyword_result.result)
181 words_with_line_numbers = format_results.find_word_line_numbers(keyword_result.path,
182 list_of_scan_results, key=
'content')
183 formatted_list_of_keyword_line_numbers.append(words_with_line_numbers)
184 return formatted_list_of_keyword_line_numbers
186 def format_copyright_results_with_line_numbers(scanner:Scanners,format_results:FormatResult) \
189 Format the copyright results with line numbers
191 :param: scanner : Scanner Scanner object
192 :param: format_results : FormatResult FormatResult object
193 :return: list of dicts with key as word and value as list of line numbers of the words
195 copyright_results = scanner.get_copyright_list(whole=
True)
196 if copyright_results
is False:
197 copyright_results = []
198 formatted_list_of_copyright_line_numbers =
list()
199 for copyright_result
in copyright_results:
200 list_of_scan_results =
list(copyright_result.result)
201 words_with_line_numbers = format_results.find_word_line_numbers(
202 copyright_result.path,list_of_scan_results, key=
'content')
203 formatted_list_of_copyright_line_numbers.append(words_with_line_numbers)
204 return formatted_list_of_copyright_line_numbers
206 def format_license_results_with_line_numbers(scanner:Scanners,format_results:FormatResult) \
209 Format the licenses results with line numbers
211 :param: scanner : Scanner Scanner object
212 :param: format_results : FormatResult FormatResult object
213 :return: list of dicts with key as word and value as list of line numbers of the words
215 license_results = scanner.results_are_allow_listed(whole=
True)
216 if license_results
is True or license_results
is None:
218 formatted_list_of_license_line_numbers =
list()
219 for license_result
in license_results:
220 list_of_scan_results =
list(license_result.result)
221 words_with_line_numbers = format_results.find_word_line_numbers(
222 license_result.path,list_of_scan_results, key=
'license')
223 formatted_list_of_license_line_numbers.append(words_with_line_numbers)
224 return formatted_list_of_license_line_numbers
226 def text_report(cli_options: CliOptions, result_dir: str, return_val: int,
227 scanner: Scanners, format_results : FormatResult) -> int:
229 Run scanners and print results in text format.
231 :param cli_options: CLI options
232 :param result_dir: Result directory location
233 :param return_val: Return value of program
234 :param scanner: Scanner object
235 :param: format_results : FormatResult FormatResult object
236 :return: Program's return value
238 if cli_options.nomos
or cli_options.ojo:
239 failed_licenses = scanner.results_are_allow_listed()
240 scan_results_with_line_number = format_license_results_with_line_numbers(
241 scanner=scanner,format_results=format_results)
242 print_log_message(f
"{result_dir}/licenses.txt", failed_licenses,
True,
243 "Following licenses found which are not allow listed",
244 "No license violation found",
"License", return_val,
245 scan_results_with_line_number)
246 if cli_options.copyright:
247 copyright_results = scanner.get_copyright_list()
248 scan_results_with_line_number = format_copyright_results_with_line_numbers(
249 scanner=scanner, format_results=format_results)
250 print_log_message(f
"{result_dir}/copyrights.txt", copyright_results,
False,
251 "Following copyrights found",
252 "No copyright violation found",
"Copyright", return_val,
253 scan_results_with_line_number)
254 if cli_options.keyword:
255 keyword_results = scanner.get_keyword_list()
256 scan_results_with_line_number = format_keyword_results_with_line_numbers(
257 scanner=scanner, format_results=format_results)
258 print_log_message(f
"{result_dir}/keywords.txt", keyword_results,
False,
259 "Following keywords found",
260 "No keyword violation found",
"Keyword", return_val,
261 scan_results_with_line_number)
265 def bom_report(cli_options: CliOptions, result_dir: str, return_val: int,
266 scanner: Scanners, api_config: ApiConfig, format_results: FormatResult) -> int:
268 Run scanners and print results as an SBOM.
270 :param cli_options: CLI options
271 :param result_dir: Result directory location
272 :param return_val: Return value
273 :param scanner: Scanner object
274 :param api_config: API config options
275 :param: format_results : FormatResult FormatResult object
276 :return: Program's return value
278 report_obj = SpdxReport(cli_options, api_config)
279 if cli_options.nomos
or cli_options.ojo:
280 scan_results = scanner.get_scanner_results()
281 report_obj.add_license_results(scan_results)
282 scan_results_with_line_number = format_license_results_with_line_numbers(
283 scanner=scanner, format_results=format_results)
284 failed_licenses = scanner.get_non_allow_listed_results(scan_results)
285 return_val = print_log_message(f
"{result_dir}/licenses.txt",
286 failed_licenses,
True,
"Following licenses found which are not allow "
287 "listed",
"No license violation found",
288 "License", return_val, scan_results_with_line_number)
289 if cli_options.copyright:
290 copyright_results = scanner.get_copyright_list(all_results=
True)
291 if copyright_results
is False:
292 copyright_results = []
293 report_obj.add_copyright_results(copyright_results)
294 failed_copyrights = scanner.get_non_allow_listed_copyrights(
296 scan_results_with_line_number = format_copyright_results_with_line_numbers(
297 scanner=scanner, format_results=format_results)
298 return_val = print_log_message(f
"{result_dir}/copyrights.txt",
299 failed_copyrights,
False,
"Following copyrights found",
300 "No copyright violation found",
"Copyright", return_val,scan_results_with_line_number)
301 if cli_options.keyword:
302 keyword_results = scanner.get_keyword_list()
303 scan_results_with_line_number = format_keyword_results_with_line_numbers(
304 scanner=scanner, format_results=format_results)
305 return_val = print_log_message(f
"{result_dir}/keywords.txt",
306 keyword_results,
False,
"Following keywords found",
307 "No keyword violation found",
"Keyword", return_val, scan_results_with_line_number)
308 report_obj.finalize_document()
309 report_name = f
"{result_dir}/sbom_"
310 if cli_options.report_format == ReportFormat.SPDX_JSON:
311 report_name +=
"spdx.json"
312 elif cli_options.report_format == ReportFormat.SPDX_RDF:
313 report_name +=
"spdx.rdf"
314 elif cli_options.report_format == ReportFormat.SPDX_TAG_VALUE:
315 report_name +=
"spdx.spdx"
316 elif cli_options.report_format == ReportFormat.SPDX_YAML:
317 report_name +=
"spdx.yaml"
318 report_obj.write_report(report_name)
319 print(f
"\u2714 Saved SBOM as {report_name}")
323 def main(parsed_args):
328 :return: 0 for success, error code on failure.
330 api_config = get_api_config()
332 cli_options.update_args(parsed_args)
334 if cli_options.allowlist_path:
335 allowlist_path = cli_options.allowlist_path
336 print(f
"Reading allowlist.json file from the path: '{allowlist_path}'")
337 cli_options.allowlist = get_allow_list(path=allowlist_path)
339 cli_options.allowlist = get_allow_list()
340 except FileNotFoundError:
341 print(
"Unable to find allowlist.json in current dir\n"
342 "Continuing without it.", file=sys.stderr)
344 if cli_options.keyword
and cli_options.keyword_conf_file_path:
345 keyword_conf_file_path = cli_options.keyword_conf_file_path
346 destination_path =
'/usr/local/share/fossology/keyword/agent/keyword.conf'
347 is_valid,message = validate_keyword_conf_file(keyword_conf_file_path)
349 print(f
"Validation of keyword file successful: {message}")
350 copy_keyword_file_to_destination(keyword_conf_file_path,destination_path)
352 print(f
"Could not validate keyword file: {message}")
354 valid_comps_exist =
False
355 if (cli_options.scan_only_deps
or cli_options.repo)
and cli_options.sbom_path !=
'':
357 save_dir =
'pkg_downloads'
358 sbom_file_path = cli_options.sbom_path
359 parser = Parser(sbom_file_path)
360 parser.classify_components()
361 valid_comps_exist = ( parser.python_components != []
or
362 parser.php_components != []
or
363 parser.npm_components != [] )
365 python_comps = parser.python_components
366 unsupported_comps = parser.unsupported_components
368 if len(python_comps) != 0:
369 python_parser = PythonParser()
370 python_list = python_parser.parse_components(python_comps)
371 download_list += python_list
373 if len(unsupported_comps) != 0:
374 for comp
in unsupported_comps:
375 print(f
'The purl {comp["purl"]} is not supported. Package will not be downloaded.')
378 downloader = Downloader()
379 downloader.download_concurrently(download_list, save_dir)
380 except Exception
as e:
381 print(
"Something went wrong while downloading the dependencies..")
383 repo_setup = RepoSetup(cli_options, api_config)
384 if cli_options.repo
is False:
385 cli_options.diff_dir = repo_setup.get_diff_dir()
387 if cli_options.scan_only_deps
and valid_comps_exist:
388 cli_options.diff_dir = save_dir
389 if cli_options.scan_dir:
390 cli_options.diff_dir = cli_options.dir_path
392 scanner = Scanners(cli_options)
396 format_results = FormatResult(cli_options)
397 format_results.process_files(scanner.cli_options.diff_dir)
400 result_dir =
"results"
401 os.makedirs(name=result_dir, exist_ok=
True)
403 print(
"Preparing scan reports...")
404 if cli_options.report_format == ReportFormat.TEXT:
405 return_val = text_report(cli_options, result_dir, return_val, scanner,
408 return_val = bom_report(cli_options, result_dir, return_val, scanner,
409 api_config, format_results)
413 if __name__ ==
"__main__":
414 parser = argparse.ArgumentParser(
415 description=textwrap.dedent(
"""fossology scanner designed for CI""")
418 "operation", type=str, help=
"Operations to run.", nargs=
'*',
419 choices=[
"nomos",
"copyright",
"keyword",
"ojo",
"repo",
"differential",
"scan-only-deps",
"scan-dir"]
422 "--tags", type=str, nargs=2, help=
"Tags for differential scan. Required if 'differential'" \
426 "--report", type=str, help=
"Type of report to generate. Default 'TEXT'.",
427 choices=[member.name
for member
in ReportFormat], default=ReportFormat.TEXT.name
429 parser.add_argument(
'--keyword-conf', type=str, help=
'Path to the keyword configuration file.' \
430 'Use only when keyword argument is true'
432 parser.add_argument(
'--dir-path', type=str, help=
'Path to directory for scanning.')
435 "--allowlist-path", type=str, help=
"Pass allowlist.json to allowlist dependencies."
438 "--sbom-path", type=str, help=
"Path to SBOM file for downloading dependencies."
441 args = parser.parse_args()
Store the options sent through the CLI.
list_t type structure used to keep various lists. (e.g. there are multiple lists).