FOSSology  4.4.0
Open Source License Compliance by Open Source Software
Scanners.py
1 #!/usr/bin/env python3
2 
3 # SPDX-FileCopyrightText: © 2023 Siemens AG
4 # SPDX-FileContributor: Gaurav Mishra <mishra.gaurav@siemens.com>
5 
6 # SPDX-License-Identifier: GPL-2.0-only
7 
8 import fnmatch
9 import json
10 import multiprocessing
11 from subprocess import Popen, PIPE
12 from typing import List, Set, Union
13 
14 from .CliOptions import CliOptions
15 
16 
17 class ScanResult:
18  """
19  Store scan results from agents.
20 
21  :ivar file: File location
22  :ivar path: Actual location of file
23  :ivar result: License list for file
24  """
25  file: str = None
26  path: str = None
27  result: Set[str] = None
28 
29  def __init__(self, file: str, path: str, result: Set[str]):
30  self.filefile = file
31  self.pathpath = path
32  self.resultresult = result
33 
34 
35 class Scanners:
36  """
37  Handle all the data from different scanners.
38 
39  :ivar nomos_path: path to nomos bin
40  :ivar copyright_path: path to copyright bin
41  :ivar keyword_path: path to keyword bin
42  :ivar ojo_path: path to ojo bin
43  :ivar cli_options: CliOptions object
44  """
45  nomos_path: str = '/bin/nomossa'
46  copyright_path: str = '/bin/copyright'
47  keyword_path: str = '/bin/keyword'
48  ojo_path: str = '/bin/ojo'
49 
50  def __init__(self, cli_options: CliOptions):
51  """
52  Initialize the cli_options
53 
54  :param cli_options: CliOptions object to use
55  :type cli_options: CliOptions
56  """
57  self.cli_options: CliOptions = cli_options
58 
59  def is_excluded_path(self, path: str) -> bool:
60  """
61  Check if the path is allow listed
62 
63  The function used fnmatch to check if the path is in allow list or not.
64 
65  :param path: path to check
66  :return: True if the path is in allow list, False otherwise
67  """
68  path_is_excluded = False
69  for pattern in self.cli_options.allowlist['exclude']:
70  if fnmatch.fnmatchcase(path, pattern):
71  path_is_excluded = True
72  break
73  return path_is_excluded
74 
75  def __normalize_path(self, path: str) -> str:
76  """
77  Normalize the given path to repository root
78 
79  :param path: path to normalize
80  :return: Normalized path
81  """
82  return path.replace(f"{self.cli_options.diff_dir}/", '')
83 
84  def __get_nomos_result(self) -> dict:
85  """
86  Get the raw results from nomos scanner
87 
88  :return: raw json from nomos
89  """
90  nomossa_process = Popen([self.nomos_path, "-J", "-l", "-d",
91  self.cli_options.diff_dir, "-n",
92  str(multiprocessing.cpu_count() - 1)], stdout=PIPE)
93  result = nomossa_process.communicate()[0]
94  return json.loads(result.decode('UTF-8').strip())
95 
96  def __get_ojo_result(self) -> dict:
97  """
98  Get the raw results from ojo scanner
99 
100  :return: raw json from ojo
101  """
102  ojo_process = Popen([self.ojo_path, "-J", "-d", self.cli_options.diff_dir],
103  stdout=PIPE)
104  result = ojo_process.communicate()[0]
105  return json.loads(result.decode('UTF-8').strip())
106 
107  def __get_copyright_results(self) -> dict:
108  """
109  Get the raw results from copyright scanner
110 
111  :return: raw json from copyright
112  """
113  copyright_process = Popen([self.copyright_path, "-J", "-d",
114  self.cli_options.diff_dir], stdout=PIPE)
115  result = copyright_process.communicate()[0]
116  return json.loads(result.decode('UTF-8').strip())
117 
118  def __get_keyword_results(self) -> dict:
119  """
120  Get the raw results from keyword scanner
121 
122  :return: raw json from keyword
123  """
124  keyword_process = Popen([self.keyword_path, "-J", "-d",
125  self.cli_options.diff_dir], stdout=PIPE)
126  result = keyword_process.communicate()[0]
127  return json.loads(result.decode('UTF-8').strip())
128 
129  def get_copyright_list(self, all_results: bool = False) \
130  -> Union[List[ScanResult], bool]:
131  """
132  Get the formatted results from copyright scanner
133 
134  :param all_results: Get all results even excluded files?
135  :type all_results: bool
136  :return: list of findings
137  :rtype: list[ScanResult]
138  """
139  copyright_results = self.__get_copyright_results__get_copyright_results()
140  copyright_list = list()
141  for result in copyright_results:
142  path = self.__normalize_path__normalize_path(result['file'])
143  if self.cli_options.repo is True and all_results is False and \
144  self.is_excluded_pathis_excluded_path(path) is True:
145  continue
146  if result['results'] is not None and result['results'] != "Unable to " \
147  "read file":
148  contents = set()
149  for finding in result['results']:
150  if finding is not None and finding['type'] == "statement":
151  content = finding['content'].strip()
152  if content != "":
153  contents.add(content)
154  if len(contents) > 0:
155  copyright_list.append(ScanResult(path, result['file'], contents))
156  if len(copyright_list) > 0:
157  return copyright_list
158  return False
159 
160  def get_keyword_list(self) -> Union[List[ScanResult], bool]:
161  """
162  Get the formatted results from keyword scanner
163 
164  :return: list of findings
165  """
166  keyword_results = self.__get_keyword_results__get_keyword_results()
167  keyword_list = list()
168  for result in keyword_results:
169  path = self.__normalize_path__normalize_path(result['file'])
170  if self.cli_options.repo is True and self.is_excluded_pathis_excluded_path(path) is \
171  True:
172  continue
173  if result['results'] is not None and result['results'] != "Unable to " \
174  "read file":
175  contents = set()
176  for finding in result['results']:
177  if finding is not None:
178  content = finding['content'].strip()
179  if content != "":
180  contents.add(content)
181  if len(contents) > 0:
182  keyword_list.append(ScanResult(path, result['file'], contents))
183  if len(keyword_list) > 0:
184  return keyword_list
185  return False
186 
187  def __get_license_nomos(self) -> List[ScanResult]:
188  """
189  Get the formatted results from nomos scanner
190 
191  :return: list of findings
192  """
193  nomos_result = self.__get_nomos_result__get_nomos_result()
194  scan_result = list()
195  for result in nomos_result['results']:
196  path = self.__normalize_path__normalize_path(result['file'])
197  licenses = set()
198  for scan_license in result['licenses']:
199  if scan_license != 'No_license_found':
200  licenses.add(scan_license.strip())
201  if len(licenses) > 0:
202  scan_result.append(ScanResult(path, result['file'], licenses))
203  return scan_result
204 
205  def __get_license_ojo(self) -> List[ScanResult]:
206  """
207  Get the formatted results from ojo scanner
208 
209  :return: list of findings
210  """
211  ojo_result = self.__get_ojo_result__get_ojo_result()
212  scan_result = list()
213  for result in ojo_result:
214  path = self.__normalize_path__normalize_path(result['file'])
215  if result['results'] is not None and result['results'] != 'Unable to ' \
216  'read file':
217  licenses = set()
218  for finding in result['results']:
219  if finding['license'] is not None:
220  licenses.add(finding['license'].strip())
221  if len(licenses) > 0:
222  scan_result.append(ScanResult(path, result['file'], licenses))
223  return scan_result
224 
225  def __merge_nomos_ojo(self, nomos_licenses: List[ScanResult],
226  ojo_licenses: List[ScanResult]) -> List[ScanResult]:
227  """
228  Merge the results from nomos and ojo based on file name
229 
230  :param nomos_licenses: formatted result form nomos
231  :param ojo_licenses: formatted result form ojo
232 
233  :return: merged list of scanner findings
234  """
235  for ojo_entry in ojo_licenses:
236  for nomos_entry in nomos_licenses:
237  if ojo_entry.file == nomos_entry.file:
238  nomos_entry.result.update(ojo_entry.result)
239  break
240  else:
241  nomos_licenses.append(ojo_entry)
242  return nomos_licenses
243 
244  def get_non_allow_listed_results(self, scan_results: List[ScanResult]) \
245  -> List[ScanResult]:
246  """
247  Get results where license check failed.
248 
249  :param scan_results: Scan result from ojo/nomos
250  :return: List of results with only not allowed licenses
251  """
252  final_results = []
253  for row in scan_results:
254  if self.cli_options.repo is True and self.is_excluded_pathis_excluded_path(row.file) \
255  is True:
256  continue
257  license_set = row.result
258  failed_licenses = set([lic for lic in license_set if lic not in
259  self.cli_options.allowlist['licenses']])
260  if len(failed_licenses) > 0:
261  final_results.append(ScanResult(row.file, row.path, failed_licenses))
262  return final_results
263 
265  copyright_results: List[ScanResult]) \
266  -> List[ScanResult]:
267  """
268  Get copyrights from files which are not allow listed.
269 
270  :param copyright_results: Copyright results from copyright agent
271  :return: List of scan results where copyrights found.
272  """
273  return [
274  row for row in copyright_results if self.cli_options.repo is True and
275  self.is_excluded_pathis_excluded_path(row.file) is
276  False
277  ]
278 
279  def results_are_allow_listed(self) -> Union[List[ScanResult], bool]:
280  """
281  Get the formatted list of license scanner findings
282 
283  The list contains the merged result of nomos/ojo scanner based on
284  cli_options passed
285 
286  :return: merged list of scanner findings
287  """
288  failed_licenses = None
289  nomos_licenses = []
290 
291  if self.cli_options.nomos:
292  nomos_licenses = self.__get_license_nomos__get_license_nomos()
293  if self.cli_options.ojo is False:
294  failed_licenses = self.get_non_allow_listed_resultsget_non_allow_listed_results(nomos_licenses)
295  if self.cli_options.ojo:
296  ojo_licenses = self.__get_license_ojo__get_license_ojo()
297  if self.cli_options.nomos is False:
298  failed_licenses = self.get_non_allow_listed_resultsget_non_allow_listed_results(ojo_licenses)
299  else:
300  failed_licenses = self.get_non_allow_listed_resultsget_non_allow_listed_results(
301  self.__merge_nomos_ojo__merge_nomos_ojo(nomos_licenses, ojo_licenses))
302  if len(failed_licenses) > 0:
303  return failed_licenses
304  return True
305 
306  def get_scanner_results(self) -> List[ScanResult]:
307  """
308  Get scan results from nomos and ojo scanners (whichever is selected).
309 
310  :return: List of scan results
311  """
312  nomos_licenses = []
313  ojo_licenses = []
314 
315  if self.cli_options.nomos:
316  nomos_licenses = self.__get_license_nomos__get_license_nomos()
317  if self.cli_options.ojo:
318  ojo_licenses = self.__get_license_ojo__get_license_ojo()
319 
320  if self.cli_options.nomos and self.cli_options.ojo:
321  return self.__merge_nomos_ojo__merge_nomos_ojo(nomos_licenses, ojo_licenses)
322  elif self.cli_options.nomos:
323  return nomos_licenses
324  else:
325  return ojo_licenses
List[ScanResult] get_non_allow_listed_copyrights(self, List[ScanResult] copyright_results)
Definition: Scanners.py:266
List[ScanResult] get_scanner_results(self)
Definition: Scanners.py:306
List[ScanResult] __merge_nomos_ojo(self, List[ScanResult] nomos_licenses, List[ScanResult] ojo_licenses)
Definition: Scanners.py:226
Union[List[ScanResult], bool] results_are_allow_listed(self)
Definition: Scanners.py:279
Union[List[ScanResult], bool] get_copyright_list(self, bool all_results=False)
Definition: Scanners.py:130
dict __get_keyword_results(self)
Definition: Scanners.py:118
List[ScanResult] __get_license_nomos(self)
Definition: Scanners.py:187
str __normalize_path(self, str path)
Definition: Scanners.py:75
Union[List[ScanResult], bool] get_keyword_list(self)
Definition: Scanners.py:160
dict __get_copyright_results(self)
Definition: Scanners.py:107
def __init__(self, CliOptions cli_options)
Definition: Scanners.py:50
dict __get_nomos_result(self)
Definition: Scanners.py:84
List[ScanResult] __get_license_ojo(self)
Definition: Scanners.py:205
dict __get_ojo_result(self)
Definition: Scanners.py:96
List[ScanResult] get_non_allow_listed_results(self, List[ScanResult] scan_results)
Definition: Scanners.py:245
bool is_excluded_path(self, str path)
Definition: Scanners.py:59
list_t type structure used to keep various lists. (e.g. there are multiple lists).
Definition: nomos.h:308