4 Copyright (C) 2023 Sushant Kumar (sushantmishra02102002@gmail.com)
6 SPDX-License-Identifier: GPL-2.0-only
14 script_directory = os.path.dirname(os.path.abspath(__file__))
15 os.environ[
"SCANCODE_CACHE"] = os.path.join(script_directory,
'.cache')
17 from scancode
import api
21 Extracts relevant information from the 'licenses' data.
23 licenses (dict): A dictionary containing license information.
25 list: A list of dictionaries containing relevant license information.
28 keys_to_extract_from_licenses = [
'spdx_license_key',
'score',
'name',
'text_url',
'start_line',
'matched_text']
30 for key, value
in licenses.items():
33 updated_licenses.append({key: license[key]
for key
in keys_to_extract_from_licenses
if key
in license})
35 return updated_licenses
37 def update_copyright(copyrights):
39 Extracts relevant information from the 'copyrights' data.
41 copyrights (dict): A dictionary containing copyright information.
43 tuple: A tuple of two lists. The first list contains updated copyright information,
44 and the second list contains updated holder information.
46 updated_copyrights = []
48 keys_to_extract_from_copyrights = [
'copyright',
'start_line']
49 keys_to_extract_from_holders = [
'holder',
'start_line']
51 'start_line':
'start',
56 for key, value
in copyrights.items():
57 if key ==
'copyrights':
58 for copyright
in value:
59 updated_copyrights.append({key_mapping.get(key, key): copyright[key]
for key
in keys_to_extract_from_copyrights
if key
in copyright})
62 updated_holders.append({key_mapping.get(key, key): holder[key]
for key
in keys_to_extract_from_holders
if key
in holder})
63 return updated_copyrights, updated_holders
65 def update_emails(emails):
67 Extracts relevant information from the 'emails' data.
69 emails (dict): A dictionary containing email information.
71 list: A list of dictionaries containing relevant email information.
74 keys_to_extract_from_emails = [
'email',
'start_line']
76 'start_line':
'start',
80 for key, value
in emails.items():
83 updated_emails.append({key_mapping.get(key, key): email[key]
for key
in keys_to_extract_from_emails
if key
in email})
87 def update_urls(urls):
89 Extracts relevant information from the 'urls' data.
91 urls (dict): A dictionary containing url information.
93 list: A list of dictionaries containing relevant url information.
96 keys_to_extract_from_urls = [
'url',
'start_line']
98 'start_line':
'start',
102 for key, value
in urls.items():
105 updated_urls.append({key_mapping.get(key, key): url[key]
for key
in keys_to_extract_from_urls
if key
in url})
109 def scan(line, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score):
111 Processes a single file and returns the results.
113 line (str): A line from the file containing the list of files to scan.
114 scan_copyrights (bool):
115 scan_licenses (bool):
119 result = {
'file': line.strip()}
120 result[
'licenses'] = []
121 result[
'copyrights'] = []
122 result[
'holders'] = []
123 result[
'emails'] = []
127 copyrights = api.get_copyrights(result[
'file'])
128 updated_copyrights, updated_holders = update_copyright(copyrights)
129 result[
'copyrights'] = updated_copyrights
130 result[
'holders'] = updated_holders
133 licenses = api.get_licenses(result[
'file'], include_text=
True, min_score=min_score)
135 result[
'licenses'] = updated_licenses
138 emails = api.get_emails(result[
'file'])
139 updated_emails = update_emails(emails)
140 result[
'emails'] = updated_emails
143 urls = api.get_urls(result[
'file'])
144 updated_urls = update_urls(urls)
145 result[
'urls'] = updated_urls
149 def process_files(file_location, outputFile, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score):
151 Processes the file containing the list of files to scan.
153 scan_copyrights (bool):
154 scan_licenses (bool):
159 with open(file_location,
"r")
as locations:
161 with open(outputFile,
"w")
as json_file:
163 first_iteration =
True
164 for line
in locations:
166 result = scan(line, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score)
168 if not first_iteration:
169 json_file.write(
',\n')
171 first_iteration =
False
173 json.dump(result, json_file)
175 except Exception
as e:
176 print(f
"An error occurred for file '{line.strip()}': {e}")
178 json_file.write(
'\n]')
180 if __name__ ==
"__main__":
181 parser = argparse.ArgumentParser(description=
"Process a file specified by its location.")
182 parser.add_argument(
"-c",
"--scan-copyrights", action=
"store_true", help=
"Scan for copyrights")
183 parser.add_argument(
"-l",
"--scan-licenses", action=
"store_true", help=
"Scan for licenses")
184 parser.add_argument(
"-e",
"--scan-emails", action=
"store_true", help=
"Scan for emails")
185 parser.add_argument(
"-u",
"--scan-urls", action=
"store_true", help=
"Scan for urls")
186 parser.add_argument(
"-m",
"--min-score", dest=
"min_score", type=int, default=0, help=
"Minimum score for a license to be included in the results")
187 parser.add_argument(
'file_location', type=str, help=
'Path to the file you want to process')
188 parser.add_argument(
'outputFile', type=str, help=
'Path to the file you want save results to')
190 args = parser.parse_args()
191 scan_copyrights = args.scan_copyrights
192 scan_licenses = args.scan_licenses
193 scan_emails = args.scan_emails
194 scan_urls = args.scan_urls
195 min_score = args.min_score
196 file_location = args.file_location
197 outputFile = args.outputFile
199 process_files(file_location, outputFile, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score)
update_license($old_rf_pk, $new_rf_pk)
update license from old to new 1) update license_file set rf_fk=new_rf_pk where rf_fk=old_rf_pk 2) up...