9 files changed, 672 insertions, 104 deletions
diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..cb2f802
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,4 @@
+[flake8]
+ignore = E401
+max-line-length = 120
+max-complexity = 15
diff --git a/.gitignore b/.gitignore
index b057d7f..61be068 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 MANIFEST
 *.pyc
+bugzilla_api_key
diff --git a/generate-libvpx-test-tarball.sh b/generate-libvpx-test-tarball.sh
new file mode 100755
index 0000000..f8a3844
--- /dev/null
+++ b/generate-libvpx-test-tarball.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# This script fetches the libvpx sources, checks out the appropriate tag
+# and generates a tarball that can be placed in a devspace or other
+# web-accessible site and added to SRC_URI for a given libvpx release.
+# Legacy manual instructions:
+# To create a new testdata tarball:
+# 1. Unpack source tarball or checkout git tag
+# 2. mkdir libvpx-testdata
+# 3. export LIBVPX_TEST_DATA_PATH=libvpx-testdata
+# 4. ./configure --enable-unit-tests --enable-vp9-highbitdepth
+# 5. make testdata
+# 6. tar -caf libvpx-testdata-${MY_PV}.tar.xz libvpx-testdata
+
+set -e
+
+if [ -d /tmp/libvpx ]; then
+    rm -rf /tmp/libvpx
+fi
+
+git clone https://github.com/webmproject/libvpx.git /tmp/libvpx
+
+pushd /tmp/libvpx
+    # Assume we're getting the latest tag if not in env;
+    # we're typically only packaging the latest version.
+    LATEST_TAG="$(git tag --sort=taggerdate | tail -1)"
+    TAG="${1:-$LATEST_TAG}"
+
+    if [ -d "/tmp/libvpx-${TAG:1}-testdata" ]; then
+        rm -rf "/tmp/libvpx-${TAG:1}-testdata"
+    fi
+
+    mkdir -p "/tmp/libvpx-${TAG:1}-testdata"
+
+    echo "Packaging libvpx testdata for ${TAG}"
+    git checkout "tags/${TAG}"
+
+    ./configure --enable-unit-tests --enable-vp9-highbitdepth
+    LIBVPX_TEST_DATA_PATH="/tmp/libvpx-${TAG:1}-testdata" make -j$(nproc) testdata
+popd
+pushd /tmp
+    XZ_OPT="-T0 -9" tar cvaf "libvpx-${TAG:1}-testdata.tar.xz" "libvpx-${TAG:1}-testdata"
+popd
diff --git a/get-chromium-toolchain-strings.py b/get-chromium-toolchain-strings.py
new file mode 100755
index 0000000..d7c124d
--- /dev/null
+++ b/get-chromium-toolchain-strings.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+# This script extracts the revision and sub-revision from the update.py and update_rust.py files in the Chromium source code.
+# The revision and sub-revision are used to identify the version of Clang and Rust used in the Chromium toolchain.
+
+import json
+import requests
+import sys
+
+def get_testfonts(url) -> str:
+    """
+    Reads the DEPS file (gclient) and extracts the testfonts SHA which is used
+    as the object name (SHA256 as of 2024)
+    deps['src/third_party/test_fonts']['objects'][0]['object_name']
+
+    Args:
+        url (str): The URL of the DEPS file on GitHub's raw endpoint.
+
+    Returns:
+        str: The SHA256 of the testfonts, or None if not found.
+        """
+
+    # We're not properly parsing the DEPS file, but it's 'close enough' to JSON that
+    # we can throw away the preamble and do some remediation to read the values in.
+
+    testfonts = None
+    response = requests.get(url)
+    if response.status_code == 200:
+        text = response.content.decode('utf-8')
+        lines = text.splitlines()
+        # throw away everything up to `deps = {`
+        # We'll add our own opening brace to make it valid JSON
+        start = 0
+        for idx, line in enumerate(lines):
+            if line.startswith("deps = {"):
+                start = idx + 1
+                break
+
+        # throw away everything after the variable ends `}`
+        length = 0
+        for idx, line in enumerate(lines):
+            if idx < start:
+                continue
+            if line.startswith("}"):
+                length = idx
+                break
+
+        deps: list[str] = ['{', '}']
+        deps[1:1] = lines[start:length]
+
+        # remove any comments, because JSON doesn't like them
+        deps = [line for line in deps if not line.strip().startswith('#')]
+
+        # I hate to do this, but we need to remediate the JSON - single quotes to double quotes ho!
+        deps = [line.replace("'", '"') for line in deps]
+        # the `condition` variable is always a python comparison. Let's not even try to parse it.
+        # we don't care so just drop the whole line
+        deps = [line for line in deps if "condition" not in line]
+        # ditto `Var()`
+        deps = [line for line in deps if "Var(" not in line]
+        # if a line ends in ' +' it's a python thing and we probably already stripped whatever is being
+        # concatenated, so we can just remove the '+' and append a ','.
+        deps = [line.replace(" +", ",") if line.endswith(" +") else line for line in deps]
+        # strip ' "@",' from any lines... No idea what gclient does with this
+        deps = [line.replace(' "@",', "") for line in deps]
+
+
+        # If we encounter '[{' or '}]' we should expand them onto individual lines.
+        # for '[{', remove the { and add it on a new line, for '}]' remove the ] and add it on a new line.
+        # every instance so far has been '}],' so let's assume that holds true?
+        newdeps = []
+        for line in deps:
+            if '[{' in line:
+                # 'blah: [', '{'
+                newdeps.append(line[:-1])
+                newdeps.append('{')
+            elif '}]' in line:
+                # '},', '],'
+                newdeps.append(line[:-2])
+                newdeps.append('],')
+            else:
+                newdeps.append(line)
+
+        deps = newdeps
+
+        # if the last thing in an object has a trailing comma, it's invalid JSON so we need to remove it,
+        # probably easiest to do if we check that the next line is '}' when stripped and remediate that
+        newdeps = []
+        for idx, line in enumerate(deps):
+            if line.endswith(",") and deps[idx + 1].strip() == "}":
+                newdeps.append(line.replace(",", ""))
+            elif line.endswith(",") and deps[idx + 1].strip() == "},":
+                newdeps.append(line.replace(",", ""))
+            else:
+                newdeps.append(line)
+
+        deps = newdeps
+        newdeps = []
+
+        for idx, line in enumerate(deps):
+            if line.endswith("},") and deps[idx + 1].strip() == "]":
+                newdeps.append(line.replace(",", ""))
+            elif line.endswith("},") and deps[idx + 1].strip() == "],":
+                newdeps.append(line.replace(",", ""))
+            else:
+                newdeps.append(line)
+
+        deps = newdeps
+
+        # If the line does not contain a colon _and_ the previous and next lines contain '{' and '}' respectively,
+        # it's very likely a naked sha and json can't parse it. We can just strip it.
+        newdeps = []
+        for idx, line in enumerate(deps):
+            if ":" not in line and "{" in deps[idx - 1] and '}' in deps[idx + 1]:
+                continue
+            else:
+                newdeps.append(line)
+
+        deps = newdeps
+
+        # final blacklist; not worth writing a rule for this
+        bad_lines = [
+            '+ "@" + "42e892d96e47b1f6e29844cc705e148ec4856448", # release 1.9.4',
+        ]
+        deps = [line for line in deps if line.strip() not in bad_lines]
+
+        # Clean up any keys with no values. Always do this last
+        newdeps = []
+        for idx, line in enumerate(deps):
+            if line.endswith(":") and deps[idx + 1].strip() == "":
+                continue
+            else:
+                newdeps.append(line)
+
+        deps = newdeps
+
+        # debug_lines = range(1460, 1500)
+        # for idx, line in enumerate(deps):
+        #     if idx in debug_lines:
+        #         print(f"{idx}: {line}")
+
+        # Now we have a list of strings that should be valid JSON
+        # We can join them and load them
+        deps = json.loads('\n'.join(deps))
+        # Now we can get the testfonts SHA
+        return deps['src/third_party/test_fonts/test_fonts']['objects'][0]['object_name']
+    else:
+        raise ValueError(f"Failed to get revision info. Status code: {response.status_code}")
+
+    return testfonts
+
+
+def get_revision_info(url) -> str:
+    """
+    Extracts revision and sub-revision from a Chromium source file URL.
+
+    Args:
+        url (str): The URL of the source file on GitHub's raw endpoint.
+
+    Returns:
+        tuple: A tuple containing the revision (str) and sub-revision (int)
+               or (None, None) if not found.
+    """
+    response = requests.get(url)
+    if response.status_code == 200:
+        text = response.content.decode('utf-8')  # Decode to UTF-8
+        lines = text.splitlines()
+        revision = None
+        sub_revision = None
+        for line in lines:
+            if line.startswith("CLANG_REVISION") and not line.startswith("PACKAGE_VERSION"):
+                revision = line.split("=")[1].strip().strip("'")
+            elif line.startswith("CLANG_SUB_REVISION") and not line.startswith("PACKAGE_VERSION"):
+                sub_revision = int(line.split("=")[1].strip())
+            elif line.startswith("RUST_REVISION") and not line.startswith("specieid") and not line.startswith("    return"):
+                # I know that's spelt wrong, but apparently google cant't spell
+                revision = line.split("=")[1].strip().strip("'")
+            elif line.startswith("RUST_SUB_REVISION") and not line.startswith("specieid") and not line.startswith("    return"):
+                sub_revision = int(line.split("=")[1].strip()[-1])
+        if revision is None or sub_revision is None:
+            raise ValueError("Failed to extract revision and sub-revision")
+        return revision, sub_revision
+    else:
+        raise ValueError(f"Failed to get revision info. Status code: {response.status_code}")
+
+
+def main():
+    version = sys.argv[1] if len(sys.argv) > 1 else "128.0.6613.113"
+    # It's a lot easier to use GH raw URLs for this
+    base_url = "https://raw.githubusercontent.com/chromium/chromium/"
+    clang_url = f"{base_url}{version}/tools/clang/scripts/update.py"
+    rust_url = f"{base_url}{version}/tools/rust/update_rust.py"
+    deps_url = f"{base_url}{version}/DEPS"
+    clang_revision, clang_sub_revision = get_revision_info(clang_url)
+    rust_revision, rust_sub_revision = get_revision_info(rust_url)
+    testfonts = get_testfonts(deps_url)
+    if clang_revision and clang_sub_revision:
+        print(f"clang revision: {clang_revision}-{clang_sub_revision}")
+    else:
+        print("clang revision not found")
+    if rust_revision and rust_sub_revision:
+        print(f"rust revision: {rust_revision}-{rust_sub_revision}")
+    else:
+        print("rust revision not found")
+    if testfonts:
+        print(f"test fonts: {testfonts}")
+    else:
+        print("test fonts not found")
+
+if __name__ == "__main__":
+    main()
diff --git a/get-edge-cves.py b/get-edge-cves.py
new file mode 100755
index 0000000..44b2eef
--- /dev/null
+++ b/get-edge-cves.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python
+
+# SPDX-License-Identifier: GPL-2.0-or-later
+# This script extracts the Chromium version mapping for Microsoft Edge based on a given CVE ID.
+# It uses the Microsoft Security Response Center (MSRC) API to get the Common Vulnerability Reporting Framework (CVRF)
+# for a given month and extracts the Chromium version mapping for Microsoft Edge (Chromium-based) from the CVRF.
+
+# API Docs https://api.msrc.microsoft.com/cvrf/v3.0/swagger/v3/swagger.json
+
+# We can use the CVRF API to get the Common Vulnerability Reporting Framework (CVRF) for a given month.
+# We can query the API via CVE ID to get the CVRF for a specific CVE, but that just leads us back to querying
+# the month. Stretch goal to ingest directly from bgo ticket aliases and confirm the month & version?
+# https://api.msrc.microsoft.com/cvrf/v3.0/updates/CVE-2024-7969
+
+# https://api.msrc.microsoft.com/cvrf/v3.0/cvrf/2024-Aug
+# is the URL for the CVRF for August 2024
+
+# The XML looks like this:
+# <cvrfdoc
+#  . . .
+# <vuln:Vulnerability
+#     Ordinal="261">
+#     <vuln:Title>Chromium: CVE-2024-7969 Type Confusion in V8</vuln:Title>
+#     . . .
+#     <vuln:ProductStatuses>
+#       <vuln:Status
+#         Type="Known Affected">
+#         <vuln:ProductID>11655</vuln:ProductID>
+#         . . .
+#     </vuln:ProductStatuses>
+#     . . .
+#     <vuln:CVE>CVE-2024-7969</vuln:CVE>
+#     . . .
+#     <vuln:Remediations>
+#       <vuln:Remediation
+#         Type="Vendor Fix">
+#         <vuln:Description>Release Notes</vuln:Description>
+#         <vuln:URL />
+#         <vuln:ProductID>11655</vuln:ProductID>
+#         <vuln:AffectedFiles />
+#         <vuln:RestartRequired>No</vuln:RestartRequired>
+#         <vuln:SubType>Security Update</vuln:SubType>
+#         <vuln:FixedBuild>128.0.2739.42</vuln:FixedBuild>
+#         . . .
+#     </vuln:Remediations>
+#     . . .
+# </vuln:Vulnerability>
+
+# Process: Pick a month, get the CVRF for that month, then iterate over vulnerabilities to find the ones
+# that are for Microsoft Edge (Chromium-based) `<vuln:ProductID>11655</vuln:ProductID>`.
+# Extract the <vuln:CVE>CVE-2024-7969</vuln:CVE> to extract a CVE ID and
+# map to Chromium versions using the <vuln:FixedBuild>128.0.2739.42</vuln:FixedBuild> tag (or the notes if we _have_ to).
+
+import argparse, calendar, dataclasses, datetime, os, sys
+import xml.etree.ElementTree as ET
+
+from bs4 import BeautifulSoup
+from portage import versions as portage_versions
+import bugzilla, requests
+
+
+@dataclasses.dataclass
+class EdgeCVE:
+    cve: str
+    title: str
+    fixedbuild: str | None
+
+    def __str__(self):
+        return f"{self.cve}: {self.title}: Fixed {self.fixedbuild if not None else 'unknown'}"
+
+
+def get_edge_cves(year, month) -> list[EdgeCVE]:
+    """
+    Queries the Microsoft Security Response Center (MSRC) API for the Common Vulnerability Reporting Framework (CVRF)
+    for a given month and extracts the Chromium version mapping for Microsoft Edge (Chromium-based) from the CVRF.
+
+    Args:
+        year: The year to query.
+        month: The month to query.
+
+    Returns:
+        list[EdgeCVE]: A list of EdgeCVE objects.
+    """
+
+    msrcapi = f"https://api.msrc.microsoft.com/cvrf/v3.0/cvrf/{year}-{month}"
+
+    # Get the CVRF for the specified month
+    response = requests.get(msrcapi)
+
+    if response.status_code != 200:
+        print(f"Website returned {response.status_code}")
+        print(f"Failed to get CVRF for {year}-{month}")
+        sys.exit(1)
+
+    # Parse the XML
+    root = ET.fromstring(response.text)
+
+    # Find all the vulnerabilities
+    vulnerabilities = root.findall(".//{http://www.icasi.org/CVRF/schema/vuln/1.1}Vulnerability")
+
+    edge_cves = []  # Store the edge cves here
+    for vulnerability in vulnerabilities:
+        productstatuses = vulnerability.findall(".//{http://www.icasi.org/CVRF/schema/vuln/1.1}ProductStatuses")
+        for productstatus in productstatuses:
+            productid = productstatus.find(".//{http://www.icasi.org/CVRF/schema/vuln/1.1}ProductID")
+            if productid.text == "11655":
+                # This is a Microsoft Edge (Chromium-based) vulnerability
+                cve_id = vulnerability.find(".//{http://www.icasi.org/CVRF/schema/vuln/1.1}CVE").text
+                cve_title = vulnerability.find(".//{http://www.icasi.org/CVRF/schema/vuln/1.1}Title").text
+                remediations = vulnerability.findall(".//{http://www.icasi.org/CVRF/schema/vuln/1.1}Remediations")
+                for remediation in remediations:
+                    fixedbuild = remediation.find(".//{http://www.icasi.org/CVRF/schema/vuln/1.1}FixedBuild")
+                    if fixedbuild is not None:
+                        edge_cves.append(
+                            EdgeCVE(cve_id, cve_title, fixedbuild.text)
+                        )
+                    else:
+                        # Fall back to parsing that horrible, horrible table in the notes
+                        notes = vulnerability.find(".//{http://www.icasi.org/CVRF/schema/vuln/1.1}Notes")
+                        # There appear to be multiple notes, but only one has content that we want:
+                        # <vuln:Note Title="FAQ" Type="FAQ" Ordinal="10">&lt;p&gt;&lt;strong&gt;What is the version information for this release?&lt;/strong&gt;&lt;/p&gt; # noqa: E501
+                        found = False
+                        for note in notes:
+                            if note.attrib['Title'] == "FAQ" and note.attrib['Type'] == "FAQ":
+
+                                # The note contains a table with the chromium and edge versions, written in "HTML"
+                                # &lt;td&gt;8/22/2024&lt;/td&gt;
+                                content = note.text
+
+                                soup = BeautifulSoup(content, 'html.parser')
+                                rows = soup.find_all('tr')
+                                # We want the second row, second cell
+                                if len(rows) > 1:
+                                    cells = rows[1].find_all('td')
+                                    if len(cells) > 1:
+                                        # We want the second cell (1st is channel, 3rd is chromium version)
+                                        edge_version = cells[1].text
+                                        if portage_versions.ververify(edge_version):
+                                            found = True
+                                            edge_cves.append(
+                                                EdgeCVE(cve_id, cve_title, edge_version)
+                                            )
+
+                        if not found:
+                            edge_cves.append(
+                                EdgeCVE(cve_id, cve_title, None)
+                            )
+
+    return edge_cves
+
+
+def get_cve_from_bug_alias(bugnumber: int) -> list[str]:
+    """
+    Queries the Gentoo bugzilla instance for the list of CVEs associated with a given bug.
+
+    Since we, by convention, alias bugs to CVEs, we can just query the alias field.
+
+    Args:
+        bugnumber (int): The bug number to query.
+
+    Returns:
+        list[str]: A list of CVEs associated with the bug.s
+
+    """
+    url = "bugs.gentoo.org"
+    keyfile = open(os.path.abspath('./bugzilla_api_key'))
+    api_key = keyfile.read().replace('\n','')
+    print('connecting to b.g.o')
+    bzapi = bugzilla.Bugzilla(url, api_key)
+    bug = bzapi.getbug(bugnumber)
+    cves = bug.alias
+    print(f'Bug: {bug} has {len(cves)} CVEs:\n\t{', '.join(cves)}')
+
+    return cves
+
+
+def get_msrc_for_cve(cve: str) -> str:
+    """
+    Do a simple webrquest to get the CVRF for a given CVE.
+
+    Args:
+        cve (str): The CVE to query.
+
+    Returns:
+        str: The CVRF for the CVE.
+    """
+
+    msrcapi = f"https://api.msrc.microsoft.com/cvrf/v3.0/updates/{cve}"
+    response = requests.get(msrcapi)
+
+    if response.status_code != 200:
+        print(f"Website returned {response.status_code}")
+        print(f"Failed to get CVRF for {cve}")
+        sys.exit(1)
+
+    # This is JSON, we want { "value": [ { "ID": "2024-Aug" }, ] }
+    return response.json().get('value')[0].get('ID')
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="Script to get Edge CVEs.")
+    parser.add_argument('-m', '--month', type=int, help='Month as a number (1-12)', default=datetime.datetime.now().month)
+    parser.add_argument('-y', '--year', type=int, help='Year as a four-digit number', default=datetime.datetime.now().year)
+    parser.add_argument('-b', '--bug', nargs='*', help='List of bug identifiers')
+    parser.add_argument('-c', '--cve', nargs='*', help='List of CVE identifiers')
+    return parser.parse_args()
+
+
+def main():
+    args = parse_arguments()
+
+    # If we have a CVE to query (bugs contain them in the Alias field) we can query the API directly
+    # and work out which CVRF(s) to query.
+    if not args.bug and not args.cve:
+        month = calendar.month_name[args.month][0:3]
+        for cve in get_edge_cves(args.year, month):
+            print(cve)
+
+    # If we have a bug, we can query the bugzilla API to get the CVEs associated with it
+    elif args.bug:
+        for bug in args.bug:
+            cves = get_cve_from_bug_alias(bug)
+
+            msrcs = []
+            for cve in cves:
+                msrcs.append(get_msrc_for_cve(cve))
+
+            # Dedupe
+            msrcs = list(set(msrcs))
+
+            for msrc in msrcs:
+                for cve in get_edge_cves(msrc.split('-')[0], msrc.split('-')[1]):
+                    if cve.cve in cves:
+                        print(cve)
+
+    # If we have a CVE (or list of CVEs), we can query the API directly to identify the CVRFs to query
+    elif args.cve:
+        msrcs = []
+        cves = []
+        for cve_id in args.cve:
+            cves.append(cve_id)
+            msrcs.append(get_msrc_for_cve(cve_id))
+
+        # Dedupe
+        msrcs = list(set(msrcs))
+
+        for msrc in msrcs:
+            for cve in get_edge_cves(msrc.split('-')[0], msrc.split('-')[1]):
+                if cve.cve in cves:
+                    print(cve)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/get-opera-version-mapping.py b/get-opera-version-mapping.py
index 2b515b4..015fd21 100755
--- a/get-opera-version-mapping.py
+++ b/get-opera-version-mapping.py
@@ -1,29 +1,51 @@
 #!/usr/bin/env python
+
+# SPDX-License-Identifier: GPL-2.0-or-later
+# This script is used to extract Opera and Chromium versions from the Opera changelog (blog)
+# This is incomplete data, so we need to fill in the gaps with the Chromium version from the previous known version
+# The intent here is to have _some_ sort of datasource to identify a potentially-fixed version of Opera based on
+# the Chromium version it includes.
+# High level logic:
+# We can fetch the opera blog posts that relate to a major version of Opera as long as they don't change their URIs.
+# We iterate over H4 elements to get the Opera version (and date, though we throw that away)
+# We then iterate over child elements until we find an "Update Chromium" entry, which we can use to get the
+# Chromium version (in which case we bail early) Or we exhaust the children and give up.
+# Lather, rinse, repeat.
+
+import argparse, dataclasses
+
 import requests
 from bs4 import BeautifulSoup
+from packaging.version import Version
+
+
+@dataclasses.dataclass
+class OperaChromiumVersion:
+    opera_version: Version
+    chromium_version: Version
+
+    def __str__(self):
+        chromium_version_str = 'unknown' if self.chromium_version == Version('0.0.0.0') else str(self.chromium_version)
+        return f"Opera Version: {self.opera_version}, Chromium Version: {chromium_version_str}"
 
 
 def get_opera_chromium_versions(base_url, start_version, end_version):
     """
-    Extracts Opera and Chromium versions from the given base URL with version placeholders, 
+    Extracts Opera and Chromium versions from the given base URL with version placeholders,
     parsing content sections for versions from start_version to end_version (inclusive).
 
     Args:
-        base_url: The base URL for Opera changelogs with a version placeholder (e.g., "https://blogs.opera.com/desktop/changelog-for-{version}/").
+        base_url: The base URL for Opera changelogs with a version placeholder (e.g.,
+            "https://blogs.opera.com/desktop/changelog-for-{version}/").
         start_version: The starting version to extract information for (inclusive).
         end_version: The ending version to extract information for (inclusive).
 
     Returns:
-        A dictionary mapping Opera version to Chromium version. 
-        If no update is mentioned, the previous Chromium version is used.
-        For missing data or errors, "unknown" is used.
+        A list of OperaChromiumVersion objects containing the extracted version information.
     """
-    versions = {}
-    chromium_version = None
+    versions: list[OperaChromiumVersion] = []
 
     for version in range(start_version, end_version + 1):
-        # Fix formatting issue:
-        # OR  url = base_url.format(version)
         url = base_url.format(version)
         print(f"Processing version {version}")
 
@@ -37,8 +59,8 @@ def get_opera_chromium_versions(base_url, start_version, end_version):
 
             # Iterate through each section starting with an H4 element
             for section in content.find_all('h4'):
+                chromium_version = None
                 version_str, date_str = section.text.strip().split(' – ')
-                versions[version_str] = chromium_version
 
                 # Process all content elements (including nested ones) until the next H4
                 next_sibling = section.find_next_sibling(
@@ -62,7 +84,12 @@ def get_opera_chromium_versions(base_url, start_version, end_version):
 
                 # Handle missing Chromium version
                 if not chromium_version:
-                    chromium_version = "unknown"
+                    chromium_version = '0.0.0.0'
+
+                versions.append(OperaChromiumVersion(
+                    Version(version_str),
+                    Version(chromium_version)
+                ))
 
         except requests.exceptions.RequestException as e:
             if e.args and e.args[0] == 404:
@@ -75,41 +102,69 @@ def get_opera_chromium_versions(base_url, start_version, end_version):
             print(f"Unexpected error: {e}")
             chromium_version = None  # Reset chromium_version for next iteration
 
-    return versions
+    # We're broadly sorted by major version, but within each major version we get newer entries first
+    # Sort by Opera version to get the correct order
+    sorted_versions = sorted(versions, key=lambda x: x.opera_version)
+    return sorted_versions
 
 
 def remediate_unknown_versions(versions):
     """
-    Remediates entries with "unknown" values in the versions dictionary by
+    Remediates entries with '0.0.0.0' values in the versions dictionary by
     assuming no change from the previous known version.
 
     Args:
-        versions: A dictionary mapping Opera version to Chromium version.
+        versions: A list of OperaChromiumVersion objects containing the extracted version information.
 
     Returns:
-        The modified versions dictionary with "unknown" values replaced based on previous entries.
+        A list of OperaChromiumVersion objects with '0.0.0.0' values replaced
+        by the previous known version if available.
     """
-    previous_version = None
-    for version, chromium_version in versions.items():
-        if chromium_version == "unknown":
-            if previous_version is not None:
-                # Update with previous version
-                versions[version] = previous_version
+    previous_version: Version = Version('0.0.0.0')
+    fixed_versions: list[OperaChromiumVersion] = []
+
+    for mapping in versions:
+        if mapping.chromium_version == Version('0.0.0.0') and previous_version is not Version('0.0.0.0'):
+            # Update with previous version
+            fixed_versions.append(OperaChromiumVersion(mapping.opera_version, previous_version))
         else:
-            previous_version = chromium_version  # Update known version for future references
-    return versions
+            # This should be fine, we're always parsing from oldest to newest
+            if previous_version < mapping.chromium_version:
+                previous_version = mapping.chromium_version
+            fixed_versions.append(mapping)
+
+    return fixed_versions
+
+
+def parse_arguments():
+    """
+    Parses the command line arguments and returns the parsed values.
+
+    Returns:
+        The parsed command line arguments.
+    """
+    parser = argparse.ArgumentParser(description='Get Opera and Chromium versions.')
+    parser.add_argument('start_ver', type=int, help='starting version', default=110)
+    parser.add_argument('end_ver', type=int, help='ending version', default=115)
+    return parser.parse_args()
+
+
+def main():
+    args = parse_arguments()
+
+    # Base URL with version placeholder
+    base_url = "https://blogs.opera.com/desktop/changelog-for-{}/"
 
+    opera_chromium_versions = get_opera_chromium_versions(base_url, args.start_ver, args.end_ver)
+    fixed_versions = remediate_unknown_versions(opera_chromium_versions)
 
-# Example usage
-# Base URL with version placeholder
-base_url = "https://blogs.opera.com/desktop/changelog-for-{}/"
-opera_chromium_versions = get_opera_chromium_versions(base_url, 100, 110)
+    # Print the versions
+    if fixed_versions:
+        for mapping in fixed_versions:
+            print(mapping)
+    else:
+        print("Failed to extract any versions.")
 
-opera_chromium_versions = remediate_unknown_versions(opera_chromium_versions)
 
-if opera_chromium_versions:
-    for opera_version, chromium_version in opera_chromium_versions.items():
-        print(
-            f"Opera Version: {opera_version}, Chromium Version: {chromium_version}")
-else:
-    print("Failed to extract any versions.")
+if __name__ == "__main__":
+    main()
diff --git a/get_chromium_toolchain_strings.sh b/get_chromium_toolchain_strings.sh
deleted file mode 100755
index 483d66f..0000000
--- a/get_chromium_toolchain_strings.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/bash
-
-# This script extracts version information from Chromium sources by way of a Gentoo ebuild
-# then plugs the version information into the ebuild file. This is useful for updating the
-# toolchain versions in the ebuild file when a new (major) version of Chromium is released.
-
-# Usage: get_chromium_toolchain_strings.sh <ebuild_file>
-#   <ebuild_file>: The path to the Chromium ebuild file
-
-# Extract the version string from an ebuild
-get_version() {
-  local filename="$1"
-  [[ -z "$filename" ]] && return 1  # Check for empty filename
-  local version_match="${filename##*-}";  # Extract everything after the last hyphen
-  version_match="${version_match%.*}"  # Remove extension (.ebuild)
-  echo "$version_match"
-}
-
-# Display script usage
-usage() {
-  echo "Usage: get_chromium_toolchain_strings.sh <ebuild_file>"
-  echo "  <ebuild_file>: The path to the Chromium ebuild file"
-}
-
-# Get the ebuild filename as the first argument
-ebuild_file="$1"
-
-# Check for missing argument
-if [[ -z "$ebuild_file" ]]; then
-  echo "Error: Please provide an ebuild filename as an argument."
-  usage
-  exit 1
-fi
-
-# Extract version from filename
-version="$(get_version "$ebuild_file")"
-
-# Check if version extraction failed (function return code)
-if [[ $? -ne 0 ]]; then
-  echo "Error: Could not extract version from filename."
-  exit 1
-fi
-
-# Construct S string based on version
-# Bad luck if you don't use /var/tmp/portage, I guess.
-S="/var/tmp/portage/www-client/chromium-${version}/work/chromium-${version}/"
-
-# Run ebuild with clean and unpack options
-ebuild "$ebuild_file" clean unpack
-
-# No secret sauce here - it's just simpler to set the field separator to a single quote
-# and then extract the final character from the sub-revision field.
-# This is a bit of a hack, but it works for now - I haven't see upstream go past the
-# 9th sub-revision yet!
-
-llvm_version=$(awk -F"'" '
-/CLANG_REVISION =/ { revision = $2 }
-/CLANG_SUB_REVISION =/ { printf("%s-%d\n", revision, substr($1, length($1), 1)) }
-' "${S}/tools/clang/scripts/update.py")
-
-rust_version=$(awk -F"'" '
-/RUST_REVISION =/ { revision = $2 }
-/RUST_SUB_REVISION =/ { printf("%s-%d\n", revision, substr($1, length($1), 1)) }
-' "${S}/tools/rust/update_rust.py")
-
-# Substitute versions into ebuild (assuming specific locations)
-sed -i "s/GOOGLE_CLANG_VER=.*/GOOGLE_CLANG_VER=${llvm_version}/" "$ebuild_file"
-sed -i "s/GOOGLE_RUST_VER=.*/GOOGLE_RUST_VER=${rust_version}/" "$ebuild_file"
-
-echo "Successfully substituted versions into $ebuild_file"
diff --git a/iterate-over-ebuild.sh b/iterate-over-ebuild.sh
new file mode 100755
index 0000000..7cd0f64
--- /dev/null
+++ b/iterate-over-ebuild.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+# spdx-license-identifier: GPL-2.0-or-later
+# Script to iterate over `ebuild foo-1.2.3.ebuild clean merge` and automatically add values to keeplibs.
+# Usage: ./iterate-over-ebuild.sh foo-1.2.3.ebuild
+# This script will run until the ebuild is merged, or until you interrupt it with Ctrl+C.
+# It will add the libraries to keeplibs in the ebuild as it goes.
+
+package="${1%.ebuild}"
+tmpfile=$(mktemp)
+iter=0
+added=()
+timeout_secs=300
+
+# Trap for Ctrl+C
+trap 'cleanup' INT
+
+cleanup() {
+  echo "[$(date)]: Script interrupted."
+  echo "$tmpfile" for this iteration\'s logs.
+  exit 1
+}
+
+while true; do
+  start_time=$(date +%s)
+  libs=()
+  echo "[$(date)]: Processing $package; iteration $((++iter))"
+  echo "So far, we've added:"
+  if [ ${#added[@]} -eq 0 ]; then
+    echo "  Nothing"
+  fi
+  for i in "${added[@]}"; do
+    echo "  $i"
+  done
+  ebuild "${1}" clean merge 2>&1 | tee "$tmpfile"
+
+  # Should only ever be one but whatever
+  mapfile -t libs < <(grep 'ninja: error:' "$tmpfile" | awk '{print $3}' | cut -c 8- | awk -F/ '{OFS="/"; NF--; print}')
+
+  if [ ${#libs[@]} -eq 0 ]; then
+    echo "[$(date)]: No new libraries to whitelist."
+  else
+    for lib in "${libs[@]}"; do
+      echo "[$(date)]: Whitelisting $lib"
+      if grep -q "$lib$" "${1}"; then
+        # Something went wrong if we're here but whatever.
+        echo "[$(date)]: $lib already exists in keeplibs"
+      else
+        echo "[$(date)]: Adding $lib to keeplibs"
+        sed -i "/^\s*local keeplibs=/a $lib" "${1}"
+        added+=("$lib")
+      fi
+    done
+  fi
+
+  if grep -q "www-client/$package merged" "$tmpfile"; then
+    rm "$tmpfile"
+    break
+  fi
+
+  end_time=$(date +%s)
+  elapsed_time=$((end_time - start_time))
+  if [ $elapsed_time -gt $timeout_secs ]; then
+    echo "[$(date)]: Ebuild execution took longer than the timeout. This is likely a build failure that requires patching. Exiting."
+    echo "$tmpfile" for this iteration\'s logs.
+    exit 1
+  fi
+
+  # Start with a clean slate for the next iteration
+  rm "$tmpfile"
+done
diff --git a/opera-bump b/opera-bump
index c1e3c46..9f6a964 100755
--- a/opera-bump
+++ b/opera-bump
@@ -393,5 +393,6 @@ def main():
                                 f"www-client/{pkg}: remove old",
                                 "-s", "-S")
 
+
 if __name__ == "__main__":
     main()