FOSSology  4.5.1
Open Source License Compliance by Open Source Software
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
FormatResults.py
1 #!/usr/bin/env python3
2 
3 # SPDX-FileContributor: © Rajul Jha <rajuljha49@gmail.com>
4 
5 # SPDX-License-Identifier: GPL-2.0-only
6 
7 import re
8 import os
9 
10 from .CliOptions import CliOptions
11 
13  """
14  For formatting the results from scanners with line number information
15 
16  :ivar cli_options: CliOptions object
17  """
18  cli_options : CliOptions = None
19 
20  def __init__(self,cli_options:CliOptions):
21  self.cli_optionscli_options = cli_options
22 
23  def format_diff(self,diff_content):
24  """
25  Format the diff content in a particular format with corrected line numbers.
26 
27  :param: diff_content: str String to format
28  :return: str formatted_string
29  """
30  formatted_diff = []
31  diff_lines = diff_content.splitlines()
32  left = right = 0
33  left_num_len = right_num_len = 0
34  for line in diff_lines:
35  match = re.match(r'^@@ -([0-9]+),([0-9]+) [+]([0-9]+),([0-9]+) @@', line)
36  if match:
37  left = int(match.group(1))
38  left_num_len = len(match.group(2))
39  right = int(match.group(3))
40  right_num_len = len(match.group(4))
41  formatted_diff.append(line)
42  continue
43 
44  if re.match(r'^(---|\+\+\+|[^-+ ])', line):
45  formatted_diff.append(line)
46  continue
47  line_content = line[1:]
48  if line.startswith('-'):
49  padding = ' ' * right_num_len
50  formatted_diff.append(f"-{left:<{left_num_len}} {padding}:{line_content}")
51  left += 1
52  elif line.startswith('+'):
53  padding = ' ' * left_num_len
54  formatted_diff.append(f"+{padding} {right:<{right_num_len}}:{line_content}")
55  right += 1
56  else:
57  formatted_diff.append(f" {left:<{left_num_len}} {right:<{right_num_len}}:{line_content}")
58  left += 1
59  right += 1
60 
61  return "\n".join(formatted_diff)
62 
63  def find_line_numbers(self, diff_string, word_start_byte, word_end_byte):
64  """
65  Find line numbers from formmatted diff data
66 
67  :param: diff_string : str Formatted diff string
68  :param: word_start_byte : int Start byte of scanner result
69  :param: word_end_byte : int End byte of scanner result
70  :return: List of line_numbers found for a given word
71  """
72  escaped_word = re.escape(diff_string[word_start_byte:word_end_byte])
73  pattern = re.compile(r'(\d+):.*?' + escaped_word)
74  matches = pattern.findall(diff_string)
75  return matches
76 
77  def find_word_line_numbers(self, file_path, words:list, key:str) -> dict:
78  """
79  Find the line number of each word found for a given file path
80 
81  :param: file_path : str Path of the file to scan
82  :param: words: list List of words(ScanResult Objects) to be scanned for
83  :param: Key to scan: 'contents' for copyright and keyword and 'license' for nomos and ojo
84  :return: found_words_with_line_number : dict Dictionary of scanned results
85  with key as scanned word and value as list of line_numbers where
86  it is found.
87  """
88  found_words_with_line_number = {}
89  if self.cli_optionscli_options.repo or self.cli_optionscli_options.scan_only_deps or self.cli_optionscli_options.scan_dir:
90  try:
91  with open(file_path, 'rb') as file:
92  binary_data = file.read()
93  string_data = binary_data.decode('utf-8', errors='ignore')
94  for i in range(0,len(words)):
95  line_numbers_list = []
96  line_number = string_data[0:words[i]['start']].count("\n") + 1
97  line_numbers_list.append(str(line_number))
98  found_words_with_line_number[words[i][f'{key}']] = line_numbers_list
99  return found_words_with_line_number
100  except FileNotFoundError:
101  print(f"The file {file_path} does not exist.")
102  return None
103  except IOError as e:
104  print(f"An I/O error occurred: {e}")
105  return None
106  except Exception as e:
107  print(f"An error occurred: {e}")
108  return None
109  else:
110  with open(file_path, 'r') as file:
111  content = file.read()
112  for i in range(0,len(words)):
113  line_numbers = self.find_line_numbersfind_line_numbers(content, words[i]['start'], words[i]['end'])
114  found_words_with_line_number[words[i][f'{key}']] = line_numbers
115  return found_words_with_line_number
116 
117  def process_files(self, root_dir):
118  """
119  Format the files according to unified diff format
120 
121  :param: root_dir : str Path of the temp dir root to format the files
122  :return: None
123  """
124  if self.cli_optionscli_options.repo or self.cli_optionscli_options.scan_only_deps or self.cli_optionscli_options.scan_dir:
125  return None
126  for root, dirs, files in os.walk(root_dir):
127  for file_name in files:
128  file_path = os.path.join(root, file_name)
129  with open(file_path, 'r', encoding='UTF-8') as file:
130  file_contents = file.read()
131  try:
132  normal_string = file_contents.encode().decode('unicode_escape')
133  except UnicodeDecodeError:
134  normal_string = file_contents
135  formatted_diff = self.format_diffformat_diff(normal_string)
136  with open(file_path, 'w', encoding='utf-8') as file:
137  file.write(formatted_diff)
138  return None
dict find_word_line_numbers(self, file_path, list words, str key)
def format_diff(self, diff_content)
def find_line_numbers(self, diff_string, word_start_byte, word_end_byte)