Meta: Make import-wpt-test.py resolve “..” parent refs in URLs/pathnames

This change makes the Meta/import-wpt-test.py script handle URLs such as
https://wpt.live//WebCryptoAPI/generateKey/../util/helpers.js and paths
containing, e.g., wpt-import/WebCryptoAPI/generateKey/../util/helpers.js
(that is, URLs and paths with “..” parent-directory references in them).

Otherwise, without this change, when the import-wpt-test.py script tries
a URL like https://wpt.live//WebCryptoAPI/generateKey/../util/helpers.js
which contains a “..” parent-directory reference, the script fails with
a “urllib.error.HTTPError: HTTP Error 404: Not Found” error message.

(cherry picked from commit cf7a1f6a5297f4b01a98a39a3d9da6dd4cbcf5d2)
This commit is contained in:
sideshowbarker 2024-10-31 12:43:09 +09:00 committed by Nico Weber
parent 4181fe7453
commit c5b9ce19eb

View file

@ -4,6 +4,7 @@ import os
import sys
from pathlib import Path
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from urllib.request import urlopen
from collections import namedtuple
@ -81,23 +82,26 @@ def download_files(filepaths):
downloaded_files = []
for file in filepaths:
if (file.destination.exists()):
print(f"Skipping {file.destination} as it already exists")
source = urljoin(file.source, "/".join(file.source.split('/')[3:]))
destination = Path(file.destination).absolute()
if destination.exists():
print(f"Skipping {destination} as it already exists")
continue
print(f"Downloading {file.source} to {file.destination}")
print(f"Downloading {source} to {destination}")
connection = urlopen(file.source)
connection = urlopen(source)
if connection.status != 200:
print(f"Failed to download {file.source}")
continue
os.makedirs(file.destination.parent, exist_ok=True)
os.makedirs(destination.parent, exist_ok=True)
with open(file.destination, 'wb') as f:
with open(destination, 'wb') as f:
f.write(connection.read())
downloaded_files.append(file.destination)
downloaded_files.append(destination)
return downloaded_files