FOSSology  4.5.1
Open Source License Compliance by Open Source Software
runscanonfiles.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
3 """
4 Copyright (C) 2023 Sushant Kumar (sushantmishra02102002@gmail.com)
5 
6 SPDX-License-Identifier: GPL-2.0-only
7 """
8 
9 import os
10 import json
11 import argparse
12 
13 # Set SCANCODE_CACHE environment variable
14 script_directory = os.path.dirname(os.path.abspath(__file__))
15 os.environ["SCANCODE_CACHE"] = os.path.join(script_directory, '.cache')
16 
17 from scancode import api
18 
19 def update_license(licenses):
20  """
21  Extracts relevant information from the 'licenses' data.
22  Parameters:
23  licenses (dict): A dictionary containing license information.
24  Returns:
25  list: A list of dictionaries containing relevant license information.
26  """
27  updated_licenses = []
28  keys_to_extract_from_licenses = ['license_expression_spdx', 'score', 'license_expression', 'rule_url', 'start_line', 'matched_text']
29 
30  for license in licenses:
31  for matches in license.get("matches", []):
32  updated_licenses.append({
33  key: matches[key] for key in keys_to_extract_from_licenses if key in matches
34  })
35 
36  return updated_licenses
37 
38 def update_copyright(copyrights):
39  """
40  Extracts relevant information from the 'copyrights' data.
41  Parameters:
42  copyrights (dict): A dictionary containing copyright information.
43  Returns:
44  tuple: A tuple of two lists. The first list contains updated copyright information,
45  and the second list contains updated holder information.
46  """
47  updated_copyrights = []
48  updated_holders = []
49  keys_to_extract_from_copyrights = ['copyright', 'start_line']
50  keys_to_extract_from_holders = ['holder', 'start_line']
51  key_mapping = {
52  'start_line': 'start',
53  'copyright': 'value',
54  'holder': 'value'
55  }
56 
57  for key, value in copyrights.items():
58  if key == 'copyrights':
59  for copyright in value:
60  updated_copyrights.append({key_mapping.get(key, key): copyright[key] for key in keys_to_extract_from_copyrights if key in copyright})
61  if key == 'holders':
62  for holder in value:
63  updated_holders.append({key_mapping.get(key, key): holder[key] for key in keys_to_extract_from_holders if key in holder})
64  return updated_copyrights, updated_holders
65 
66 def update_emails(emails):
67  """
68  Extracts relevant information from the 'emails' data.
69  Parameters:
70  emails (dict): A dictionary containing email information.
71  Returns:
72  list: A list of dictionaries containing relevant email information.
73  """
74  updated_emails = []
75  keys_to_extract_from_emails = ['email', 'start_line']
76  key_mapping = {
77  'start_line': 'start',
78  'email': 'value'
79  }
80 
81  for key, value in emails.items():
82  if key == 'emails':
83  for email in value:
84  updated_emails.append({key_mapping.get(key, key): email[key] for key in keys_to_extract_from_emails if key in email})
85 
86  return updated_emails
87 
88 def update_urls(urls):
89  """
90  Extracts relevant information from the 'urls' data.
91  Parameters:
92  urls (dict): A dictionary containing url information.
93  Returns:
94  list: A list of dictionaries containing relevant url information.
95  """
96  updated_urls = []
97  keys_to_extract_from_urls = ['url', 'start_line']
98  key_mapping = {
99  'start_line': 'start',
100  'url': 'value'
101  }
102 
103  for key, value in urls.items():
104  if key == 'urls':
105  for url in value:
106  updated_urls.append({key_mapping.get(key, key): url[key] for key in keys_to_extract_from_urls if key in url})
107 
108  return updated_urls
109 
110 def scan(line, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score):
111  """
112  Processes a single file and returns the results.
113  Parameters:
114  line (str): A line from the file containing the list of files to scan.
115  scan_copyrights (bool):
116  scan_licenses (bool):
117  scan_emails (bool):
118  scan_urls (bool):
119  """
120  result = {'file': line.strip()}
121  result['licenses'] = []
122  result['copyrights'] = []
123  result['holders'] = []
124  result['emails'] = []
125  result['urls'] = []
126 
127  if scan_copyrights:
128  copyrights = api.get_copyrights(result['file'])
129  updated_copyrights, updated_holders = update_copyright(copyrights)
130  result['copyrights'] = updated_copyrights
131  result['holders'] = updated_holders
132 
133  if scan_licenses:
134  licenses = api.get_licenses(result['file'], include_text=True, min_score=min_score)
135  updated_licenses = update_license(licenses.get("license_detections", []))
136  result['licenses'] = updated_licenses
137 
138  if scan_emails:
139  emails = api.get_emails(result['file'])
140  updated_emails = update_emails(emails)
141  result['emails'] = updated_emails
142 
143  if scan_urls:
144  urls = api.get_urls(result['file'])
145  updated_urls = update_urls(urls)
146  result['urls'] = updated_urls
147 
148  return result
149 
150 def process_files(file_location, outputFile, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score):
151  """
152  Processes the file containing the list of files to scan.
153  Parameters:
154  scan_copyrights (bool):
155  scan_licenses (bool):
156  scan_emails (bool):
157  scan_urls (bool):
158  """
159  # Open the file containing the list of files to scan
160  with open(file_location, "r") as locations:
161  # Read and process each line
162  with open(outputFile, "w") as json_file:
163  json_file.write('[')
164  first_iteration = True
165  for line in locations:
166  try:
167  result = scan(line, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score)
168 
169  if not first_iteration: # Check if it's not the first result
170  json_file.write(',\n') # Add a comma to separate elements in the JSON array
171  else:
172  first_iteration = False
173 
174  json.dump(result, json_file)
175 
176  except Exception as e:
177  print(f"An error occurred for file '{line.strip()}': {e}")
178  continue
179  json_file.write('\n]')
180 
181 if __name__ == "__main__":
182  parser = argparse.ArgumentParser(description="Process a file specified by its location.")
183  parser.add_argument("-c", "--scan-copyrights", action="store_true", help="Scan for copyrights")
184  parser.add_argument("-l", "--scan-licenses", action="store_true", help="Scan for licenses")
185  parser.add_argument("-e", "--scan-emails", action="store_true", help="Scan for emails")
186  parser.add_argument("-u", "--scan-urls", action="store_true", help="Scan for urls")
187  parser.add_argument("-m", "--min-score", dest="min_score", type=int, default=0, help="Minimum score for a license to be included in the results")
188  parser.add_argument('file_location', type=str, help='Path to the file you want to process')
189  parser.add_argument('outputFile', type=str, help='Path to the file you want save results to')
190 
191  args = parser.parse_args()
192  scan_copyrights = args.scan_copyrights
193  scan_licenses = args.scan_licenses
194  scan_emails = args.scan_emails
195  scan_urls = args.scan_urls
196  min_score = args.min_score
197  file_location = args.file_location
198  outputFile = args.outputFile
199 
200  process_files(file_location, outputFile, scan_copyrights, scan_licenses, scan_emails, scan_urls, min_score)
update_license($old_rf_pk, $new_rf_pk)
update license from old to new 1) update license_file set rf_fk=new_rf_pk where rf_fk=old_rf_pk 2) up...