4 Copyright (C) 2023 Sushant Kumar (sushantmishra02102002@gmail.com)
6 SPDX-License-Identifier: GPL-2.0-only
14 script_directory = os.path.dirname(os.path.abspath(__file__))
15 os.environ[
"SCANCODE_CACHE"] = os.path.join(script_directory,
'.cache')
17 from scancode
import api
21 Extracts relevant information from the 'licenses' data.
23 licenses (dict): A dictionary containing license information.
25 list: A list of dictionaries containing relevant license information.
28 keys_to_extract_from_licenses = [
'license_expression_spdx',
'score',
'license_expression',
'rule_url',
'start_line',
'matched_text']
30 for license
in licenses:
31 for matches
in license.get(
"matches", []):
32 updated_licenses.append({
33 key: matches[key]
for key
in keys_to_extract_from_licenses
if key
in matches
36 return updated_licenses
38 def update_copyright(copyrights):
40 Extracts relevant information from the 'copyrights' data.
42 copyrights (dict): A dictionary containing copyright information.
44 tuple: A tuple of two lists. The first list contains updated copyright information,
45 and the second list contains updated holder information.
47 updated_copyrights = []
49 keys_to_extract_from_copyrights = [
'copyright',
'start_line']
50 keys_to_extract_from_holders = [
'holder',
'start_line']
52 'start_line':
'start',
57 for key, value
in copyrights.items():
58 if key ==
'copyrights':
59 for copyright
in value:
60 updated_copyrights.append({key_mapping.get(key, key): copyright[key]
for key
in keys_to_extract_from_copyrights
if key
in copyright})
63 updated_holders.append({key_mapping.get(key, key): holder[key]
for key
in keys_to_extract_from_holders
if key
in holder})
64 return updated_copyrights, updated_holders
66 def update_emails(emails):
68 Extracts relevant information from the 'emails' data.
70 emails (dict): A dictionary containing email information.
72 list: A list of dictionaries containing relevant email information.
75 keys_to_extract_from_emails = [
'email',
'start_line']
77 'start_line':
'start',
81 for key, value
in emails.items():
84 updated_emails.append({key_mapping.get(key, key): email[key]
for key
in keys_to_extract_from_emails
if key
in email})
88 def update_urls(urls):
90 Extracts relevant information from the 'urls' data.
92 urls (dict): A dictionary containing url information.
94 list: A list of dictionaries containing relevant url information.
97 keys_to_extract_from_urls = [
'url',
'start_line']
99 'start_line':
'start',
103 for key, value
in urls.items():
106 updated_urls.append({key_mapping.get(key, key): url[key]
for key
in keys_to_extract_from_urls
if key
in url})
110 def scan(line, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score):
112 Processes a single file and returns the results.
114 line (str): A line from the file containing the list of files to scan.
115 scan_copyrights (bool):
116 scan_licenses (bool):
120 result = {
'file': line.strip()}
121 result[
'licenses'] = []
122 result[
'copyrights'] = []
123 result[
'holders'] = []
124 result[
'emails'] = []
128 copyrights = api.get_copyrights(result[
'file'])
129 updated_copyrights, updated_holders = update_copyright(copyrights)
130 result[
'copyrights'] = updated_copyrights
131 result[
'holders'] = updated_holders
134 licenses = api.get_licenses(result[
'file'], include_text=
True, min_score=min_score)
135 updated_licenses =
update_license(licenses.get(
"license_detections", []))
136 result[
'licenses'] = updated_licenses
139 emails = api.get_emails(result[
'file'])
140 updated_emails = update_emails(emails)
141 result[
'emails'] = updated_emails
144 urls = api.get_urls(result[
'file'])
145 updated_urls = update_urls(urls)
146 result[
'urls'] = updated_urls
150 def process_files(file_location, outputFile, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score):
152 Processes the file containing the list of files to scan.
154 scan_copyrights (bool):
155 scan_licenses (bool):
160 with open(file_location,
"r")
as locations:
162 with open(outputFile,
"w")
as json_file:
164 first_iteration =
True
165 for line
in locations:
167 result = scan(line, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score)
169 if not first_iteration:
170 json_file.write(
',\n')
172 first_iteration =
False
174 json.dump(result, json_file)
176 except Exception
as e:
177 print(f
"An error occurred for file '{line.strip()}': {e}")
179 json_file.write(
'\n]')
181 if __name__ ==
"__main__":
182 parser = argparse.ArgumentParser(description=
"Process a file specified by its location.")
183 parser.add_argument(
"-c",
"--scan-copyrights", action=
"store_true", help=
"Scan for copyrights")
184 parser.add_argument(
"-l",
"--scan-licenses", action=
"store_true", help=
"Scan for licenses")
185 parser.add_argument(
"-e",
"--scan-emails", action=
"store_true", help=
"Scan for emails")
186 parser.add_argument(
"-u",
"--scan-urls", action=
"store_true", help=
"Scan for urls")
187 parser.add_argument(
"-m",
"--min-score", dest=
"min_score", type=int, default=0, help=
"Minimum score for a license to be included in the results")
188 parser.add_argument(
'file_location', type=str, help=
'Path to the file you want to process')
189 parser.add_argument(
'outputFile', type=str, help=
'Path to the file you want save results to')
191 args = parser.parse_args()
192 scan_copyrights = args.scan_copyrights
193 scan_licenses = args.scan_licenses
194 scan_emails = args.scan_emails
195 scan_urls = args.scan_urls
196 min_score = args.min_score
197 file_location = args.file_location
198 outputFile = args.outputFile
200 process_files(file_location, outputFile, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score)
update_license($old_rf_pk, $new_rf_pk)
update license from old to new 1) update license_file set rf_fk=new_rf_pk where rf_fk=old_rf_pk 2) up...