10 from .CliOptions
import CliOptions
14 For formatting the results from scanners with line number information
16 :ivar cli_options: CliOptions object
18 cli_options : CliOptions =
None
20 def __init__(self,cli_options:CliOptions):
25 Format the diff content in a particular format with corrected line numbers.
27 :param: diff_content: str String to format
28 :return: str formatted_string
31 diff_lines = diff_content.splitlines()
33 left_num_len = right_num_len = 0
34 for line
in diff_lines:
35 match = re.match(
r'^@@ -([0-9]+),([0-9]+) [+]([0-9]+),([0-9]+) @@', line)
37 left = int(match.group(1))
38 left_num_len = len(match.group(2))
39 right = int(match.group(3))
40 right_num_len = len(match.group(4))
41 formatted_diff.append(line)
44 if re.match(
r'^(---|\+\+\+|[^-+ ])', line):
45 formatted_diff.append(line)
47 line_content = line[1:]
48 if line.startswith(
'-'):
49 padding =
' ' * right_num_len
50 formatted_diff.append(f
"-{left:<{left_num_len}} {padding}:{line_content}")
52 elif line.startswith(
'+'):
53 padding =
' ' * left_num_len
54 formatted_diff.append(f
"+{padding} {right:<{right_num_len}}:{line_content}")
57 formatted_diff.append(f
" {left:<{left_num_len}} {right:<{right_num_len}}:{line_content}")
61 return "\n".join(formatted_diff)
65 Find line numbers from formmatted diff data
67 :param: diff_string : str Formatted diff string
68 :param: word_start_byte : int Start byte of scanner result
69 :param: word_end_byte : int End byte of scanner result
70 :return: List of line_numbers found for a given word
72 escaped_word = re.escape(diff_string[word_start_byte:word_end_byte])
73 pattern = re.compile(
r'(\d+):.*?' + escaped_word)
74 matches = pattern.findall(diff_string)
79 Find the line number of each word found for a given file path
81 :param: file_path : str Path of the file to scan
82 :param: words: list List of words(ScanResult Objects) to be scanned for
83 :param: Key to scan: 'contents' for copyright and keyword and 'license' for nomos and ojo
84 :return: found_words_with_line_number : dict Dictionary of scanned results
85 with key as scanned word and value as list of line_numbers where
88 found_words_with_line_number = {}
91 with open(file_path,
'rb')
as file:
92 binary_data = file.read()
93 string_data = binary_data.decode(
'utf-8', errors=
'ignore')
94 for i
in range(0,len(words)):
95 line_numbers_list = []
96 line_number = string_data[0:words[i][
'start']].count(
"\n") + 1
97 line_numbers_list.append(str(line_number))
98 found_words_with_line_number[words[i][f
'{key}']] = line_numbers_list
99 return found_words_with_line_number
100 except FileNotFoundError:
101 print(f
"The file {file_path} does not exist.")
104 print(f
"An I/O error occurred: {e}")
106 except Exception
as e:
107 print(f
"An error occurred: {e}")
110 with open(file_path,
'r')
as file:
111 content = file.read()
112 for i
in range(0,len(words)):
113 line_numbers = self.
find_line_numbersfind_line_numbers(content, words[i][
'start'], words[i][
'end'])
114 found_words_with_line_number[words[i][f
'{key}']] = line_numbers
115 return found_words_with_line_number
119 Format the files according to unified diff format
121 :param: root_dir : str Path of the temp dir root to format the files
126 for root, dirs, files
in os.walk(root_dir):
127 for file_name
in files:
128 file_path = os.path.join(root, file_name)
129 with open(file_path,
'r', encoding=
'UTF-8')
as file:
130 file_contents = file.read()
132 normal_string = file_contents.encode().decode(
'unicode_escape')
133 except UnicodeDecodeError:
134 normal_string = file_contents
135 formatted_diff = self.
format_diffformat_diff(normal_string)
136 with open(file_path,
'w', encoding=
'utf-8')
as file:
137 file.write(formatted_diff)