FOSSology  4.5.1
Open Source License Compliance by Open Source Software
Scanners.py
1 #!/usr/bin/env python3
2 
3 # SPDX-FileCopyrightText: © 2023 Siemens AG
4 # SPDX-FileContributor: Gaurav Mishra <mishra.gaurav@siemens.com>
5 
6 # SPDX-License-Identifier: GPL-2.0-only
7 
8 import fnmatch
9 import json
10 import multiprocessing
11 from subprocess import Popen, PIPE
12 from typing import List, Set, Union
13 
14 from .CliOptions import CliOptions
15 
16 
17 class ScanResult:
18  """
19  Store scan results from agents.
20 
21  :ivar file: File location
22  :ivar path: Actual location of file
23  :ivar result: License list for file
24  """
25  file: str = None
26  path: str = None
27  result: Set[str] = None
28 
29  def __init__(self, file: str, path: str, result: Set[str]):
30  self.filefile = file
31  self.pathpath = path
32  self.resultresult = result
33 
34 
36  """
37  Store scan results from agents with result as a list of dictionaries.
38 
39  :ivar file: File location
40  :ivar path: Actual location of file
41  :ivar result: License list for file as a list of dictionaries
42  """
43  file: str = None
44  path: str = None
45  result: List[dict] = None
46 
47  def __init__(self, file: str, path: str, result: List[dict]):
48  self.filefilefile = file
49  self.pathpathpath = path
50  self.resultresultresult = result
51 
52 
53 class Scanners:
54  """
55  Handle all the data from different scanners.
56 
57  :ivar nomos_path: path to nomos bin
58  :ivar copyright_path: path to copyright bin
59  :ivar keyword_path: path to keyword bin
60  :ivar ojo_path: path to ojo bin
61  :ivar cli_options: CliOptions object
62  """
63  nomos_path: str = '/bin/nomossa'
64  copyright_path: str = '/bin/copyright'
65  keyword_path: str = '/bin/keyword'
66  ojo_path: str = '/bin/ojo'
67 
68  def __init__(self, cli_options: CliOptions):
69  """
70  Initialize the cli_options
71 
72  :param cli_options: CliOptions object to use
73  :type cli_options: CliOptions
74  """
75  self.cli_options: CliOptions = cli_options
76 
77  def is_excluded_path(self, path: str) -> bool:
78  """
79  Check if the path is allow listed
80 
81  The function used fnmatch to check if the path is in allow list or not.
82 
83  :param path: path to check
84  :return: True if the path is in allow list, False otherwise
85  """
86  path_is_excluded = False
87  for pattern in self.cli_options.allowlist['exclude']:
88  if fnmatch.fnmatchcase(path, pattern):
89  path_is_excluded = True
90  break
91  return path_is_excluded
92 
93  def __normalize_path(self, path: str) -> str:
94  """
95  Normalize the given path to repository root
96 
97  :param path: path to normalize
98  :return: Normalized path
99  """
100  return path.replace(f"{self.cli_options.diff_dir}/", '')
101 
102  def __get_nomos_result(self) -> dict:
103  """
104  Get the raw results from nomos scanner
105 
106  :return: raw json from nomos
107  """
108  nomossa_process = Popen([self.nomos_path, "-S", "-J", "-l", "-d",
109  self.cli_options.diff_dir, "-n",
110  str(multiprocessing.cpu_count() - 1)], stdout=PIPE)
111  result = nomossa_process.communicate()[0]
112  return json.loads(result.decode('UTF-8').strip())
113 
114  def __get_ojo_result(self) -> dict:
115  """
116  Get the raw results from ojo scanner
117 
118  :return: raw json from ojo
119  """
120  ojo_process = Popen([self.ojo_path, "-J", "-d", self.cli_options.diff_dir],
121  stdout=PIPE)
122  result = ojo_process.communicate()[0]
123  return json.loads(result.decode('UTF-8').strip())
124 
125  def __get_copyright_results(self) -> dict:
126  """
127  Get the raw results from copyright scanner
128 
129  :return: raw json from copyright
130  """
131  copyright_process = Popen([self.copyright_path, "-J", "-d",
132  self.cli_options.diff_dir], stdout=PIPE)
133  result = copyright_process.communicate()[0]
134  return json.loads(result.decode('UTF-8').strip())
135 
136  def __get_keyword_results(self) -> dict:
137  """
138  Get the raw results from keyword scanner
139 
140  :return: raw json from keyword
141  """
142  keyword_process = Popen([self.keyword_path, "-J", "-d",
143  self.cli_options.diff_dir], stdout=PIPE)
144  result = keyword_process.communicate()[0]
145  return json.loads(result.decode('UTF-8').strip())
146 
147  def get_copyright_list(self, all_results: bool = False, whole: bool = False) \
148  -> Union[List[ScanResult],List[ScanResultList],bool]:
149  """
150  Get the formatted results from copyright scanner
151 
152  :param all_results: Get all results even excluded files?
153  :type all_results: bool
154  :param: whole: return whole content from scanner
155  :return: list of findings
156  :rtype: List[ScanResult] | List[ScanResultList]
157  """
158  copyright_results = self.__get_copyright_results__get_copyright_results()
159  copyright_list = list()
160  for result in copyright_results:
161  path = self.__normalize_path__normalize_path(result['file'])
162  if self.cli_options.repo is True and all_results is False and \
163  self.is_excluded_pathis_excluded_path(path) is True:
164  continue
165  if result['results'] is not None and result['results'] != "Unable to " \
166  "read file":
167  contents = set()
168  json_copyright_info = list()
169  for finding in result['results']:
170  if whole:
171  if finding is not None and finding['type'] == "statement" and finding['content'] != "":
172  json_copyright_info.append(finding)
173  else:
174  if finding is not None and finding['type'] == "statement":
175  content = finding['content'].strip()
176  if content != "":
177  contents.add(content)
178  if whole and len(json_copyright_info) > 0:
179  copyright_list.append(ScanResultList(path, result['file'], json_copyright_info))
180  elif not whole and len(contents) > 0:
181  copyright_list.append(ScanResult(path, result['file'], contents))
182  if len(copyright_list) > 0:
183  return copyright_list
184  return False
185 
186  def get_keyword_list(self, whole:bool = False) -> Union[List[ScanResult],List[ScanResultList],bool]:
187  """
188  Get the formatted results from keyword scanner
189 
190  :param: whole: return whole content from scanner
191  :return: List of findings
192  :rtype: List[ScanResult] | List[ScanResultList] | bool
193  """
194  keyword_results = self.__get_keyword_results__get_keyword_results()
195  keyword_list = list()
196  for result in keyword_results:
197  path = self.__normalize_path__normalize_path(result['file'])
198  if self.cli_options.repo is True and self.is_excluded_pathis_excluded_path(path) is \
199  True:
200  continue
201  if result['results'] is not None and result['results'] != "Unable to " \
202  "read file":
203  contents = set()
204  json_keyword_info = list()
205  for finding in result['results']:
206  if whole:
207  if finding is not None and finding['content'] != "":
208  json_keyword_info.append(finding)
209  else:
210  if finding is not None:
211  content = finding['content'].strip()
212  if content != "":
213  contents.add(content)
214  if whole and len(json_keyword_info) > 0:
215  keyword_list.append(ScanResultList(path, result['file'], json_keyword_info))
216  elif not whole and len(contents) > 0:
217  keyword_list.append(ScanResult(path, result['file'], contents))
218  if len(keyword_list) > 0:
219  return keyword_list
220  return False
221 
222  def __get_license_nomos(self, whole: bool = False) -> Union[List[ScanResult],List[ScanResultList]]:
223  """
224  Get the formatted results from nomos scanner
225 
226  :param: whole: return whole content from scanner
227  :return: list of findings
228  :rtype: List[ScanResult] | List[ScanResultList]
229  """
230 
231  nomos_result = self.__get_nomos_result__get_nomos_result()
232  scan_result = list()
233  for result in nomos_result['results']: # result is an item of list and is a dict
234  path = self.__normalize_path__normalize_path(result['file'])
235  licenses = set()
236  json_license_info = list()
237  for scan_license in result['licenses']:
238  if whole:
239  if scan_license['license'] != "No_license_found":
240  json_license_info.append(scan_license)
241  else:
242  if scan_license['license'] != 'No_license_found':
243  licenses.add(scan_license['license'])
244  if whole and len(json_license_info) > 0:
245  scan_result.append(ScanResultList(path,result['file'], json_license_info))
246  elif not whole and len(licenses) > 0:
247  scan_result.append(ScanResult(path, result['file'], licenses))
248  return scan_result
249 
250  def __get_license_ojo(self, whole:bool = False) -> Union[List[ScanResult],List[ScanResultList]]:
251  """
252  Get the formatted results from ojo scanner
253 
254  :param: whole: return whole content from scanner
255  :return: list of findings
256  :rtype: List[ScanResult] | List[ScanResultList]
257  """
258  ojo_result = self.__get_ojo_result__get_ojo_result()
259  scan_result = list()
260  for result in ojo_result:
261  path = self.__normalize_path__normalize_path(result['file'])
262  if result['results'] is not None and result['results'] != 'Unable to ' \
263  'read file':
264  licenses = set()
265  json_license_info = list()
266  for finding in result['results']:
267  if whole:
268  if finding['license'] is not None:
269  json_license_info.append(finding)
270  else:
271  if finding['license'] is not None:
272  licenses.add(finding['license'].strip())
273  if len(licenses) > 0:
274  scan_result.append(ScanResult(path, result['file'], licenses))
275  elif len(json_license_info) > 0:
276  scan_result.append(ScanResultList(path, result['file'], json_license_info))
277  return scan_result
278 
279  def __merge_nomos_ojo(self, nomos_licenses: List[ScanResult],
280  ojo_licenses: List[ScanResult]) -> List[ScanResult]:
281  """
282  Merge the results from nomos and ojo based on file name
283 
284  :param nomos_licenses: formatted result form nomos
285  :param ojo_licenses: formatted result form ojo
286 
287  :return: merged list of scanner findings
288  """
289  for ojo_entry in ojo_licenses:
290  for nomos_entry in nomos_licenses:
291  if ojo_entry.file == nomos_entry.file:
292  nomos_entry.result.update(ojo_entry.result)
293  break
294  else:
295  nomos_licenses.append(ojo_entry)
296  return nomos_licenses
297 
298  def get_non_allow_listed_results(self, scan_results: List[ScanResult]= None, \
299  scan_results_whole: List[ScanResultList]= None, \
300  whole:bool = False) \
301  -> Union[List[ScanResult],List[ScanResultList]]:
302  """
303  Get results where license check failed.
304 
305  :param scan_results: Scan result from ojo/nomos
306  :param scan_results_whole: Whole scan result from ojo/nomos
307  :param: whole: return whole content from scanner
308 
309  :return: List of results with only not allowed licenses
310  :rtype: List[ScanResult] | List[ScanResultList]
311  """
312  final_results = []
313  if whole and scan_results_whole is not None:
314  for row in scan_results_whole:
315  if self.cli_options.repo is True and self.is_excluded_pathis_excluded_path(row.file) \
316  is True:
317  continue
318  license_info_list = row.result
319  failed_licenses_list = list([lic for lic in license_info_list if lic['license'] not in
320  self.cli_options.allowlist['licenses']])
321  if len(failed_licenses_list) > 0:
322  final_results.append(ScanResultList(row.file, row.path, failed_licenses_list))
323  elif not whole and scan_results is not None:
324  for row in scan_results:
325  if self.cli_options.repo is True and self.is_excluded_pathis_excluded_path(row.file) \
326  is True:
327  continue
328  license_set = row.result
329  failed_licenses = set([lic for lic in license_set if lic not in
330  self.cli_options.allowlist['licenses']])
331  if len(failed_licenses) > 0:
332  final_results.append(ScanResult(row.file, row.path, failed_licenses))
333  return final_results
334 
336  copyright_results: List[ScanResult]) \
337  -> List[ScanResult]:
338  """
339  Get copyrights from files which are not allow listed.
340 
341  :param copyright_results: Copyright results from copyright agent
342  :return: List of scan results where copyrights found.
343  """
344  return [
345  row for row in copyright_results if self.cli_options.repo is True and
346  self.is_excluded_pathis_excluded_path(row.file) is
347  False
348  ]
349 
350  def results_are_allow_listed(self, whole:bool = False) \
351  -> Union[List[ScanResult],List[ScanResultList],bool]:
352  """
353  Get the formatted list of license scanner findings
354 
355  The list contains the merged result of nomos/ojo scanner based on
356  cli_options passed
357 
358  :param: whole: return whole content from scanner
359  :return: merged list of scanner findings
360  :rtype: List[ScanResult] | List[ScanResultList] | bool
361  """
362  failed_licenses = None
363  nomos_licenses = []
364 
365  if self.cli_options.nomos:
366  if whole is True:
367  nomos_licenses = self.__get_license_nomos__get_license_nomos(whole=True)
368  else:
369  nomos_licenses = self.__get_license_nomos__get_license_nomos()
370  if self.cli_options.ojo is False:
371  if whole is True:
372  failed_licenses = self.get_non_allow_listed_resultsget_non_allow_listed_results(
373  scan_results_whole=nomos_licenses, whole=True)
374  else:
375  failed_licenses = self.get_non_allow_listed_resultsget_non_allow_listed_results(scan_results=nomos_licenses)
376  if self.cli_options.ojo:
377  if whole is True:
378  ojo_licenses = self.__get_license_ojo__get_license_ojo(whole=True)
379  else:
380  ojo_licenses = self.__get_license_ojo__get_license_ojo()
381  if self.cli_options.nomos is False:
382  if whole is True:
383  failed_licenses = self.get_non_allow_listed_resultsget_non_allow_listed_results(
384  scan_results_whole=ojo_licenses, whole=True)
385  else:
386  failed_licenses = self.get_non_allow_listed_resultsget_non_allow_listed_results(scan_results=ojo_licenses)
387  else:
388  if whole is True:
389  failed_licenses = self.get_non_allow_listed_resultsget_non_allow_listed_results(
390  scan_results_whole=nomos_licenses + ojo_licenses, whole=True)
391  else:
392  failed_licenses = self.get_non_allow_listed_resultsget_non_allow_listed_results(
393  scan_results=self.__merge_nomos_ojo__merge_nomos_ojo(nomos_licenses, ojo_licenses))
394  if len(failed_licenses) > 0:
395  return failed_licenses
396  return True
397 
398  def get_scanner_results(self, whole:bool = False) \
399  -> Union[List[ScanResult],List[ScanResultList]]:
400  """
401  Get scan results from nomos and ojo scanners (whichever is selected).
402 
403  :param: whole: return whole content from scanner
404  :return: List of scan results
405  :rtype: List[ScanResult] | List[ScanResultList]
406  """
407  nomos_licenses = []
408  ojo_licenses = []
409 
410  if self.cli_options.nomos:
411  if whole:
412  nomos_licenses = self.__get_license_nomos__get_license_nomos(whole=True)
413  else:
414  nomos_licenses = self.__get_license_nomos__get_license_nomos()
415  if self.cli_options.ojo:
416  if whole:
417  ojo_licenses = self.__get_license_ojo__get_license_ojo(whole=True)
418  else:
419  ojo_licenses = self.__get_license_ojo__get_license_ojo()
420 
421  if self.cli_options.nomos and self.cli_options.ojo:
422  if whole:
423  return nomos_licenses + ojo_licenses
424  else:
425  return self.__merge_nomos_ojo__merge_nomos_ojo(nomos_licenses, ojo_licenses)
426  elif self.cli_options.nomos:
427  return nomos_licenses
428  else:
429  return ojo_licenses
Union[List[ScanResult], List[ScanResultList], bool] get_copyright_list(self, bool all_results=False, bool whole=False)
Definition: Scanners.py:148
List[ScanResult] get_non_allow_listed_copyrights(self, List[ScanResult] copyright_results)
Definition: Scanners.py:337
Union[List[ScanResult], List[ScanResultList]] get_scanner_results(self, bool whole=False)
Definition: Scanners.py:399
List[ScanResult] __merge_nomos_ojo(self, List[ScanResult] nomos_licenses, List[ScanResult] ojo_licenses)
Definition: Scanners.py:280
Union[List[ScanResult], List[ScanResultList]] __get_license_ojo(self, bool whole=False)
Definition: Scanners.py:250
Union[List[ScanResult], List[ScanResultList], bool] get_keyword_list(self, bool whole=False)
Definition: Scanners.py:186
Union[List[ScanResult], List[ScanResultList]] get_non_allow_listed_results(self, List[ScanResult] scan_results=None, List[ScanResultList] scan_results_whole=None, bool whole=False)
Definition: Scanners.py:301
dict __get_keyword_results(self)
Definition: Scanners.py:136
str __normalize_path(self, str path)
Definition: Scanners.py:93
dict __get_copyright_results(self)
Definition: Scanners.py:125
def __init__(self, CliOptions cli_options)
Definition: Scanners.py:68
Union[List[ScanResult], List[ScanResultList]] __get_license_nomos(self, bool whole=False)
Definition: Scanners.py:222
dict __get_nomos_result(self)
Definition: Scanners.py:102
bool is_excluded_path(self, str path)
Definition: Scanners.py:77
Union[List[ScanResult], List[ScanResultList], bool] results_are_allow_listed(self, bool whole=False)
Definition: Scanners.py:351
list_t type structure used to keep various lists. (e.g. there are multiple lists).
Definition: nomos.h:308