From 285a95500835248045b0736469e382a1f73fc6be Mon Sep 17 00:00:00 2001 From: Mart Raudsepp Date: Tue, 21 Mar 2017 00:52:44 +0200 Subject: gnome: make the cache.json requests parallel; reduces a run from 3m01 to 0m23 for me This relies on the requests-futures package, which in turn relies on python-3.2+ Futures (or a backport of it). If requests-futures import fail, it will fall back to the old slower fetching one by one. --- modules/gnome_module.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/modules/gnome_module.py b/modules/gnome_module.py index afba235..e6544b6 100644 --- a/modules/gnome_module.py +++ b/modules/gnome_module.py @@ -4,8 +4,16 @@ # vim: set sts=4 sw=4 et tw=0 : import requests +try: + from requests_futures.sessions import FuturesSession + parallel_requests = True +except: + print("requests-futures not found for parallel fetching - will fallback to slower one-by-one version retrieval for latest version") + parallel_requests = False + import package_module, clioptions_module +MAX_WORKERS = 10 DEBUG = False @@ -34,12 +42,17 @@ class GNOME: gnome_release_list[1] = str(int(gnome_release_list[1]) + 1) self.gnome_release = ".".join(gnome_release_list[:2]) - self.http = requests.session() + if parallel_requests: + self.http = FuturesSession(max_workers=MAX_WORKERS) + else: + self.http = requests.session() self.url_base = "https://download.gnome.org/" self.release_versions_file_path = self.url_base + 'teams/releng/' def generate_data_from_versions_markup(self, url): data = self.http.get(url) + if parallel_requests: + data = data.result() if not data: raise ValueError("Couldn't open %s" % url) @@ -61,11 +74,20 @@ class GNOME: def generate_data_individual(self, release_packages): ret = [] + # First query all results; if parallel_requests==True, this will run in parallel + for pkg in release_packages: + name = pkg.name.split('/')[-1] + if name in name_mapping: + name = name_mapping[name] + pkg.requests_result = self.http.get(self.url_base + '/sources/' + name + '/cache.json') + + # And now handle the results - this is a separate loop for parallel fetch support for pkg in release_packages: name = pkg.name.split('/')[-1] if name in name_mapping: name = name_mapping[name] - data = self.http.get(self.url_base + '/sources/' + name + '/cache.json') + # pkg.requests_result is the resulting Response if parallel_requests else Future that we need to call result() on to wait/retrieve the response + data = pkg.requests_result.result() if parallel_requests else pkg.requests_results if not data: print("Warning: Unable to read cache.json for %s" % pkg.name) continue -- cgit v1.2.3-65-gdbad