FOSSology  4.4.0
Open Source License Compliance by Open Source Software
runscanonfiles.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 """
4 Copyright (C) 2023 Sushant Kumar (sushantmishra02102002@gmail.com)
5 
6 SPDX-License-Identifier: GPL-2.0-only
7 """
8 
9 import os
10 import json
11 import argparse
12 
13 # Set SCANCODE_CACHE environment variable
14 script_directory = os.path.dirname(os.path.abspath(__file__))
15 os.environ["SCANCODE_CACHE"] = os.path.join(script_directory, '.cache')
16 
17 from scancode import api
18 
19 def update_license(licenses):
20  """
21  Extracts relevant information from the 'licenses' data.
22  Parameters:
23  licenses (dict): A dictionary containing license information.
24  Returns:
25  list: A list of dictionaries containing relevant license information.
26  """
27  updated_licenses = []
28  keys_to_extract_from_licenses = ['spdx_license_key', 'score', 'name', 'text_url', 'start_line', 'matched_text']
29 
30  for key, value in licenses.items():
31  if key == 'licenses':
32  for license in value:
33  updated_licenses.append({key: license[key] for key in keys_to_extract_from_licenses if key in license})
34 
35  return updated_licenses
36 
37 def update_copyright(copyrights):
38  """
39  Extracts relevant information from the 'copyrights' data.
40  Parameters:
41  copyrights (dict): A dictionary containing copyright information.
42  Returns:
43  tuple: A tuple of two lists. The first list contains updated copyright information,
44  and the second list contains updated holder information.
45  """
46  updated_copyrights = []
47  updated_holders = []
48  keys_to_extract_from_copyrights = ['copyright', 'start_line']
49  keys_to_extract_from_holders = ['holder', 'start_line']
50  key_mapping = {
51  'start_line': 'start',
52  'copyright': 'value',
53  'holder': 'value'
54  }
55 
56  for key, value in copyrights.items():
57  if key == 'copyrights':
58  for copyright in value:
59  updated_copyrights.append({key_mapping.get(key, key): copyright[key] for key in keys_to_extract_from_copyrights if key in copyright})
60  if key == 'holders':
61  for holder in value:
62  updated_holders.append({key_mapping.get(key, key): holder[key] for key in keys_to_extract_from_holders if key in holder})
63  return updated_copyrights, updated_holders
64 
65 def update_emails(emails):
66  """
67  Extracts relevant information from the 'emails' data.
68  Parameters:
69  emails (dict): A dictionary containing email information.
70  Returns:
71  list: A list of dictionaries containing relevant email information.
72  """
73  updated_emails = []
74  keys_to_extract_from_emails = ['email', 'start_line']
75  key_mapping = {
76  'start_line': 'start',
77  'email': 'value'
78  }
79 
80  for key, value in emails.items():
81  if key == 'emails':
82  for email in value:
83  updated_emails.append({key_mapping.get(key, key): email[key] for key in keys_to_extract_from_emails if key in email})
84 
85  return updated_emails
86 
87 def update_urls(urls):
88  """
89  Extracts relevant information from the 'urls' data.
90  Parameters:
91  urls (dict): A dictionary containing url information.
92  Returns:
93  list: A list of dictionaries containing relevant url information.
94  """
95  updated_urls = []
96  keys_to_extract_from_urls = ['url', 'start_line']
97  key_mapping = {
98  'start_line': 'start',
99  'url': 'value'
100  }
101 
102  for key, value in urls.items():
103  if key == 'urls':
104  for url in value:
105  updated_urls.append({key_mapping.get(key, key): url[key] for key in keys_to_extract_from_urls if key in url})
106 
107  return updated_urls
108 
109 def scan(line, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score):
110  """
111  Processes a single file and returns the results.
112  Parameters:
113  line (str): A line from the file containing the list of files to scan.
114  scan_copyrights (bool):
115  scan_licenses (bool):
116  scan_emails (bool):
117  scan_urls (bool):
118  """
119  result = {'file': line.strip()}
120  result['licenses'] = []
121  result['copyrights'] = []
122  result['holders'] = []
123  result['emails'] = []
124  result['urls'] = []
125 
126  if scan_copyrights:
127  copyrights = api.get_copyrights(result['file'])
128  updated_copyrights, updated_holders = update_copyright(copyrights)
129  result['copyrights'] = updated_copyrights
130  result['holders'] = updated_holders
131 
132  if scan_licenses:
133  licenses = api.get_licenses(result['file'], include_text=True, min_score=min_score)
134  updated_licenses = update_license(licenses)
135  result['licenses'] = updated_licenses
136 
137  if scan_emails:
138  emails = api.get_emails(result['file'])
139  updated_emails = update_emails(emails)
140  result['emails'] = updated_emails
141 
142  if scan_urls:
143  urls = api.get_urls(result['file'])
144  updated_urls = update_urls(urls)
145  result['urls'] = updated_urls
146 
147  return result
148 
149 def process_files(file_location, outputFile, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score):
150  """
151  Processes the file containing the list of files to scan.
152  Parameters:
153  scan_copyrights (bool):
154  scan_licenses (bool):
155  scan_emails (bool):
156  scan_urls (bool):
157  """
158  # Open the file containing the list of files to scan
159  with open(file_location, "r") as locations:
160  # Read and process each line
161  with open(outputFile, "w") as json_file:
162  json_file.write('[')
163  first_iteration = True
164  for line in locations:
165  try:
166  result = scan(line, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score)
167 
168  if not first_iteration: # Check if it's not the first result
169  json_file.write(',\n') # Add a comma to separate elements in the JSON array
170  else:
171  first_iteration = False
172 
173  json.dump(result, json_file)
174 
175  except Exception as e:
176  print(f"An error occurred for file '{line.strip()}': {e}")
177  continue
178  json_file.write('\n]')
179 
180 if __name__ == "__main__":
181  parser = argparse.ArgumentParser(description="Process a file specified by its location.")
182  parser.add_argument("-c", "--scan-copyrights", action="store_true", help="Scan for copyrights")
183  parser.add_argument("-l", "--scan-licenses", action="store_true", help="Scan for licenses")
184  parser.add_argument("-e", "--scan-emails", action="store_true", help="Scan for emails")
185  parser.add_argument("-u", "--scan-urls", action="store_true", help="Scan for urls")
186  parser.add_argument("-m", "--min-score", dest="min_score", type=int, default=0, help="Minimum score for a license to be included in the results")
187  parser.add_argument('file_location', type=str, help='Path to the file you want to process')
188  parser.add_argument('outputFile', type=str, help='Path to the file you want save results to')
189 
190  args = parser.parse_args()
191  scan_copyrights = args.scan_copyrights
192  scan_licenses = args.scan_licenses
193  scan_emails = args.scan_emails
194  scan_urls = args.scan_urls
195  min_score = args.min_score
196  file_location = args.file_location
197  outputFile = args.outputFile
198 
199  process_files(file_location, outputFile, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score)
update_license($old_rf_pk, $new_rf_pk)
update license from old to new 1) update license_file set rf_fk=new_rf_pk where rf_fk=old_rf_pk 2) up...