9 import concurrent.futures
19 from .Parsers
import DOWNLOAD_URL_KEY, Parser
24 Class for parallely downloading dependencies from download urls.
29 self.
locklock = threading.Lock()
34 Determines the base directory within an extracted archive.
35 Assumes a common pattern where archives may contain a single top-level
40 if zipfile.is_zipfile(archive_path):
41 with zipfile.ZipFile(archive_path,
'r')
as zip_ref:
42 members = zip_ref.namelist()
46 first_part = members[0].split(os.sep)[0]
48 m.startswith(first_part + os.sep)
or m == first_part
for m
in
54 elif tarfile.is_tarfile(archive_path):
55 with tarfile.open(archive_path,
'r:*')
as tar_ref:
56 members = tar_ref.getnames()
58 first_part = members[0].split(os.sep)[0]
60 m.startswith(first_part + os.sep)
or m == first_part
for m
in
67 zipfile.BadZipFile, tarfile.ReadError, tarfile.FilterError, IOError
70 f
"Could not inspect archive {archive_path} for base directory: {e}"
74 def __download_package(self, component: dict) ->
None:
75 download_url = component.get(DOWNLOAD_URL_KEY)
78 "No download URL found for component: "
79 f
"{component.get('name', 'N/A')}. Skipping."
83 package_name = component.get(
'name',
'unknown_package')
84 package_folder = component.get(
'download_dir')
86 if not package_folder:
88 f
"Download directory not specified for {package_name}. Skipping."
92 os.makedirs(package_folder, exist_ok=
True)
94 parsed_url = urllib.parse.urlparse(download_url)
95 filename = os.path.basename(parsed_url.path)
97 filename = f
"{package_name}_download"
101 archive_extensions = [
102 '.tar.gz',
'.tgz',
'.tar.bz2',
'.tbz',
'.tar.xz',
'.txz',
'.zip',
'.whl',
106 for ext
in archive_extensions:
107 if filename.lower().endswith(ext):
110 if not file_extension:
112 _, file_extension = os.path.splitext(filename)
113 if not file_extension:
114 file_extension =
'.bin'
116 file_path = os.path.join(package_folder, f
"{package_name}{file_extension}")
118 temp_archive_path = file_path
122 f
"Downloading {package_name} from {download_url} to {temp_archive_path}"
124 response = requests.get(
127 response.raise_for_status()
130 with open(temp_archive_path,
'wb')
as f:
131 for chunk
in response.iter_content(chunk_size=8192):
133 logging.info(f
"Downloaded {package_name} to {temp_archive_path}")
135 if temp_archive_path.lower().endswith(
'.zip'):
136 with zipfile.ZipFile(temp_archive_path,
'r')
as zip_ref:
137 zip_ref.extractall(package_folder)
141 elif temp_archive_path.lower().endswith(
142 (
'.tar.gz',
'.tgz',
'.tar.bz2',
'.tbz',
'.tar.xz',
'.txz',
'.tar')
144 with tarfile.open(temp_archive_path,
'r:*')
as tar_ref:
145 tar_ref.extractall(package_folder)
151 f
"Unsupported file format for extraction: {file_extension} for "
152 f
"{package_name}. File downloaded but not extracted."
157 purl = component.get(
'purl')
158 if purl
and self.
parserparser
and purl
in self.
parserparser.parsed_components:
160 self.
parserparser.parsed_components[purl][
'base_dir'] = base_dir
163 f
"Exported {package_name} to {package_folder} (base_dir: '{base_dir}')"
166 except requests.exceptions.Timeout:
168 f
"Timeout occurred while downloading {package_name} from {download_url}"
170 except requests.exceptions.HTTPError
as e:
172 f
"HTTP error {e.response.status_code} while downloading "
173 f
"{package_name} from {download_url}: {e}"
175 except requests.exceptions.RequestException
as e:
177 f
"Network error while downloading {package_name} from {download_url}: "
180 except (zipfile.BadZipFile, tarfile.ReadError, tarfile.FilterError)
as e:
182 f
"Error extracting archive for {package_name} from "
183 f
"{temp_archive_path}: {e}"
187 f
"File I/O error during download or extraction for {package_name}: {e}"
189 except Exception
as e:
191 f
"An unexpected error occurred during download or extraction for "
192 f
"{package_name}: {e}"
196 if os.path.exists(temp_archive_path):
198 os.remove(temp_archive_path)
201 f
"Could not remove temporary archive file {temp_archive_path}: {e}"
206 Download files concurrently from a list of urls
208 self.
parserparser = parser
211 component
for component
in parser.parsed_components.values()
212 if component.get(DOWNLOAD_URL_KEY,
None)
215 if not download_list:
216 logging.info(
"No packages with download URLs found to download.")
217 return "0 packages downloaded."
220 f
"Attempting to download {len(download_list)} packages concurrently..."
223 with concurrent.futures.ThreadPoolExecutor(
224 max_workers=os.cpu_count()
or 4
228 for comp
in download_list
231 for future
in concurrent.futures.as_completed(futures):
234 except Exception
as e:
235 logging.error(f
"Error downloading package: {e}")
238 f
"Finished concurrent download process for {len(download_list)} packages."
240 return f
"{len(download_list)} packages downloaded."
def download_concurrently(self, Parser parser)
None __download_package(self, dict component)
str __get_archive_base_dir(self, str archive_path)