mirror of
https://github.com/SerenityOS/serenity.git
synced 2025-01-22 09:21:57 -05:00
ff4c99ec0a
(cherry picked from commit 3833049fc8680d4e6349511cf566f27a25225ada)
147 lines
4.5 KiB
Python
Executable file
147 lines
4.5 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
import os
|
|
import sys
|
|
|
|
from html.parser import HTMLParser
|
|
from pathlib import Path
|
|
from urllib.parse import urljoin
|
|
from urllib.request import urlopen
|
|
from collections import namedtuple
|
|
|
|
wpt_base_url = 'https://wpt.live/'
|
|
wpt_import_path = 'Tests/LibWeb/Text/input/wpt-import'
|
|
wpt_expected_path = 'Tests/LibWeb/Text/expected/wpt-import'
|
|
PathMapping = namedtuple('PathMapping', ['source', 'destination'])
|
|
|
|
src_values = []
|
|
|
|
|
|
class ScriptSrcValueFinder(HTMLParser):
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag == "script":
|
|
attr_dict = dict(attrs)
|
|
if "src" in attr_dict:
|
|
src_values.append(attr_dict["src"])
|
|
|
|
|
|
def map_to_path(sources, is_resource=True, resource_path=None):
|
|
if is_resource:
|
|
# Add it as a sibling path if it's a relative resource
|
|
sibling_location = Path(resource_path).parent.__str__()
|
|
sibling_import_path = wpt_import_path + '/' + sibling_location
|
|
|
|
def remapper(x):
|
|
if x.startswith('/'):
|
|
return wpt_import_path + x
|
|
return sibling_import_path + '/' + x
|
|
|
|
filepaths = list(map(remapper, sources))
|
|
filepaths = list(map(lambda x: Path(x), filepaths))
|
|
else:
|
|
# Add the wpt_import_path to the sources if root files
|
|
def remapper(x):
|
|
return wpt_import_path + '/' + x
|
|
|
|
filepaths = list(map(lambda x: Path(remapper(x)), sources))
|
|
|
|
# Map to source and destination
|
|
def path_mapper(x):
|
|
output_path = wpt_base_url + x.__str__().replace(wpt_import_path, '')
|
|
return PathMapping(output_path, x.absolute())
|
|
filepaths = list(map(path_mapper, filepaths))
|
|
|
|
return filepaths
|
|
|
|
|
|
def modify_sources(files):
|
|
for file in files:
|
|
# Get the distance to the wpt-imports folder
|
|
folder_index = str(file).find(wpt_import_path)
|
|
non_prefixed_path = str(file)[folder_index + len(wpt_import_path):]
|
|
parent_folder_count = len(Path(non_prefixed_path).parent.parts) - 1
|
|
parent_folder_path = '../' * parent_folder_count
|
|
|
|
with open(file, 'r') as f:
|
|
page_source = f.read()
|
|
|
|
page_source = page_source.replace('/fonts/ahem.css', '../' * parent_folder_count + 'fonts/ahem.css')
|
|
|
|
# Iterate all scripts and overwrite the src attribute
|
|
for i, src_value in enumerate(src_values):
|
|
if src_value.startswith('/'):
|
|
new_src_value = parent_folder_path + src_value[1::]
|
|
page_source = page_source.replace(src_value, new_src_value)
|
|
with open(file, 'w') as f:
|
|
f.write(str(page_source))
|
|
|
|
|
|
def download_files(filepaths):
|
|
downloaded_files = []
|
|
|
|
for file in filepaths:
|
|
source = urljoin(file.source, "/".join(file.source.split('/')[3:]))
|
|
destination = Path(os.path.normpath(file.destination))
|
|
|
|
if destination.exists():
|
|
print(f"Skipping {destination} as it already exists")
|
|
continue
|
|
|
|
print(f"Downloading {source} to {destination}")
|
|
|
|
connection = urlopen(source)
|
|
if connection.status != 200:
|
|
print(f"Failed to download {file.source}")
|
|
continue
|
|
|
|
os.makedirs(destination.parent, exist_ok=True)
|
|
|
|
with open(destination, 'wb') as f:
|
|
f.write(connection.read())
|
|
|
|
downloaded_files.append(destination)
|
|
|
|
return downloaded_files
|
|
|
|
|
|
def create_expectation_files(files):
|
|
for file in files:
|
|
new_path = str(file.destination).replace(wpt_import_path, wpt_expected_path)
|
|
new_path = new_path.rsplit(".", 1)[0] + '.txt'
|
|
|
|
expected_file = Path(new_path)
|
|
if expected_file.exists():
|
|
print(f"Skipping {expected_file} as it already exists")
|
|
continue
|
|
|
|
os.makedirs(expected_file.parent, exist_ok=True)
|
|
expected_file.touch()
|
|
|
|
|
|
def main():
|
|
if len(sys.argv) != 2:
|
|
print("Usage: import-wpt-test.py <url>")
|
|
return
|
|
|
|
url_to_import = sys.argv[1]
|
|
resource_path = '/'.join(Path(url_to_import).parts[2::])
|
|
|
|
main_file = [resource_path]
|
|
main_paths = map_to_path(main_file, False)
|
|
files_to_modify = download_files(main_paths)
|
|
create_expectation_files(main_paths)
|
|
|
|
with urlopen(url_to_import) as response:
|
|
page = response.read().decode("utf-8")
|
|
|
|
parser = ScriptSrcValueFinder()
|
|
parser.feed(page)
|
|
|
|
modify_sources(files_to_modify)
|
|
script_paths = map_to_path(src_values, True, resource_path)
|
|
download_files(script_paths)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|