mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-01-23 01:32:14 -05:00
286511c4cf
This change switches the Meta/import-wpt-test.py script to using the standard html.parser module rather than BeautifulSoup. Otherwise, without this change, when a contributor first tries to run the script, if they don’t have BeautifulSoup installed, it will fail. Note that this patch also includes an unrelated small change that switches to using os.path.normpath — rather than Path.absolute() — to “normalize” the destination names of the downloaded test files.
145 lines
4.4 KiB
Python
Executable file
145 lines
4.4 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import sys
|
|
|
|
from html.parser import HTMLParser
|
|
from pathlib import Path
|
|
from urllib.parse import urljoin
|
|
from urllib.request import urlopen
|
|
from collections import namedtuple
|
|
|
|
wpt_base_url = 'https://wpt.live/'
|
|
wpt_import_path = 'Tests/LibWeb/Text/input/wpt-import'
|
|
wpt_expected_path = 'Tests/LibWeb/Text/expected/wpt-import'
|
|
PathMapping = namedtuple('PathMapping', ['source', 'destination'])
|
|
|
|
src_values = []
|
|
|
|
|
|
class ScriptSrcValueFinder(HTMLParser):
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag == "script":
|
|
attr_dict = dict(attrs)
|
|
if "src" in attr_dict:
|
|
src_values.append(attr_dict["src"])
|
|
|
|
|
|
def map_to_path(sources, is_resource=True, resource_path=None):
|
|
if is_resource:
|
|
# Add it as a sibling path if it's a relative resource
|
|
sibling_location = Path(resource_path).parent.__str__()
|
|
sibling_import_path = wpt_import_path + '/' + sibling_location
|
|
|
|
def remapper(x):
|
|
if x.startswith('/'):
|
|
return wpt_import_path + x
|
|
return sibling_import_path + '/' + x
|
|
|
|
filepaths = list(map(remapper, sources))
|
|
filepaths = list(map(lambda x: Path(x), filepaths))
|
|
else:
|
|
# Add the wpt_import_path to the sources if root files
|
|
def remapper(x):
|
|
return wpt_import_path + '/' + x
|
|
|
|
filepaths = list(map(lambda x: Path(remapper(x)), sources))
|
|
|
|
# Map to source and destination
|
|
def path_mapper(x):
|
|
output_path = wpt_base_url + x.__str__().replace(wpt_import_path, '')
|
|
return PathMapping(output_path, x.absolute())
|
|
filepaths = list(map(path_mapper, filepaths))
|
|
|
|
return filepaths
|
|
|
|
|
|
def modify_sources(files):
|
|
for file in files:
|
|
# Get the distance to the wpt-imports folder
|
|
folder_index = str(file).find(wpt_import_path)
|
|
non_prefixed_path = str(file)[folder_index + len(wpt_import_path):]
|
|
parent_folder_count = len(Path(non_prefixed_path).parent.parts) - 1
|
|
parent_folder_path = '../' * parent_folder_count
|
|
|
|
with open(file, 'r') as f:
|
|
page_source = f.read()
|
|
|
|
# Iterate all scripts and overwrite the src attribute
|
|
for i, src_value in enumerate(src_values):
|
|
if src_value.startswith('/'):
|
|
new_src_value = parent_folder_path + src_value[1::]
|
|
page_source = page_source.replace(src_value, new_src_value)
|
|
with open(file, 'w') as f:
|
|
f.write(str(page_source))
|
|
|
|
|
|
def download_files(filepaths):
|
|
downloaded_files = []
|
|
|
|
for file in filepaths:
|
|
source = urljoin(file.source, "/".join(file.source.split('/')[3:]))
|
|
destination = Path(os.path.normpath(file.destination))
|
|
|
|
if destination.exists():
|
|
print(f"Skipping {destination} as it already exists")
|
|
continue
|
|
|
|
print(f"Downloading {source} to {destination}")
|
|
|
|
connection = urlopen(source)
|
|
if connection.status != 200:
|
|
print(f"Failed to download {file.source}")
|
|
continue
|
|
|
|
os.makedirs(destination.parent, exist_ok=True)
|
|
|
|
with open(destination, 'wb') as f:
|
|
f.write(connection.read())
|
|
|
|
downloaded_files.append(destination)
|
|
|
|
return downloaded_files
|
|
|
|
|
|
def create_expectation_files(files):
|
|
for file in files:
|
|
new_path = str(file.destination).replace(wpt_import_path, wpt_expected_path)
|
|
new_path = new_path.rsplit(".", 1)[0] + '.txt'
|
|
|
|
expected_file = Path(new_path)
|
|
if expected_file.exists():
|
|
print(f"Skipping {expected_file} as it already exists")
|
|
continue
|
|
|
|
os.makedirs(expected_file.parent, exist_ok=True)
|
|
expected_file.touch()
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) != 2:
|
|
print("Usage: import-wpt-test.py <url>")
|
|
return
|
|
|
|
url_to_import = sys.argv[1]
|
|
resource_path = '/'.join(Path(url_to_import).parts[2::])
|
|
|
|
main_file = [resource_path]
|
|
main_paths = map_to_path(main_file, False)
|
|
files_to_modify = download_files(main_paths)
|
|
create_expectation_files(main_paths)
|
|
|
|
with urlopen(url_to_import) as response:
|
|
page = response.read().decode("utf-8")
|
|
|
|
parser = ScriptSrcValueFinder()
|
|
parser.feed(page)
|
|
|
|
modify_sources(files_to_modify)
|
|
script_paths = map_to_path(src_values, True, resource_path)
|
|
download_files(script_paths)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|