From 09a3dcb9c9f52a8a64ca495a79fab593350c4880 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 22 Jun 2024 18:53:11 +0200 Subject: [PATCH] LibWeb: Implement very basic in-memory HTTP caching This patch adds a simple in-memory HTTP cache to each WebContent process. It's currently off by default (turn it on with --enable-http-cache) since the validation logic is lacking and incomplete. (cherry picked from commit a3c8e60710451c1325f6718b253e8d1ee2029a30) --- Ladybird/AppKit/main.mm | 3 + Ladybird/HelperProcess.cpp | 2 + Ladybird/Qt/main.cpp | 3 + Ladybird/Types.h | 6 + Ladybird/WebContent/main.cpp | 10 + .../LibWeb/Fetch/Fetching/Fetching.cpp | 223 +++++++++++++++--- 6 files changed, 214 insertions(+), 33 deletions(-) diff --git a/Ladybird/AppKit/main.mm b/Ladybird/AppKit/main.mm index 0efca8d491c..402b43b7488 100644 --- a/Ladybird/AppKit/main.mm +++ b/Ladybird/AppKit/main.mm @@ -89,6 +89,7 @@ ErrorOr serenity_main(Main::Arguments arguments) bool use_gpu_painting = false; bool debug_web_content = false; bool log_all_js_exceptions = false; + bool enable_http_cache = false; bool new_window = false; bool force_new_process = false; bool allow_popups = false; @@ -101,6 +102,7 @@ ErrorOr serenity_main(Main::Arguments arguments) args_parser.add_option(debug_web_content, "Wait for debugger to attach to WebContent", "debug-web-content"); args_parser.add_option(certificates, "Path to a certificate file", "certificate", 'C', "certificate"); args_parser.add_option(log_all_js_exceptions, "Log all JavaScript exceptions", "log-all-js-exceptions"); + args_parser.add_option(enable_http_cache, "Enable HTTP cache", "enable-http-cache"); args_parser.add_option(new_window, "Force opening in a new window", "new-window", 'n'); args_parser.add_option(force_new_process, "Force creation of new browser/chrome process", "force-new-process"); args_parser.add_option(allow_popups, "Disable popup blocking by default", "allow-popups"); @@ -143,6 +145,7 @@ ErrorOr serenity_main(Main::Arguments arguments) .enable_gpu_painting = use_gpu_painting ? Ladybird::EnableGPUPainting::Yes : Ladybird::EnableGPUPainting::No, .wait_for_debugger = debug_web_content ? Ladybird::WaitForDebugger::Yes : Ladybird::WaitForDebugger::No, .log_all_js_exceptions = log_all_js_exceptions ? Ladybird::LogAllJSExceptions::Yes : Ladybird::LogAllJSExceptions::No, + .enable_http_cache = enable_http_cache ? Ladybird::EnableHTTPCache::Yes : Ladybird::EnableHTTPCache::No, }; auto* delegate = [[ApplicationDelegate alloc] init:sanitize_urls(raw_urls) diff --git a/Ladybird/HelperProcess.cpp b/Ladybird/HelperProcess.cpp index 3c6da4acd20..941465a4e77 100644 --- a/Ladybird/HelperProcess.cpp +++ b/Ladybird/HelperProcess.cpp @@ -124,6 +124,8 @@ ErrorOr> launch_web_content_process( arguments.append("--log-all-js-exceptions"sv); if (web_content_options.enable_idl_tracing == Ladybird::EnableIDLTracing::Yes) arguments.append("--enable-idl-tracing"sv); + if (web_content_options.enable_http_cache == Ladybird::EnableHTTPCache::Yes) + arguments.append("--enable-http-cache"sv); if (web_content_options.expose_internals_object == Ladybird::ExposeInternalsObject::Yes) arguments.append("--expose-internals-object"sv); if (auto server = mach_server_name(); server.has_value()) { diff --git a/Ladybird/Qt/main.cpp b/Ladybird/Qt/main.cpp index 8afa5c40bc0..f0d7a9ea124 100644 --- a/Ladybird/Qt/main.cpp +++ b/Ladybird/Qt/main.cpp @@ -99,6 +99,7 @@ ErrorOr serenity_main(Main::Arguments arguments) bool debug_web_content = false; bool log_all_js_exceptions = false; bool enable_idl_tracing = false; + bool enable_http_cache = false; bool new_window = false; bool force_new_process = false; bool allow_popups = false; @@ -116,6 +117,7 @@ ErrorOr serenity_main(Main::Arguments arguments) args_parser.add_option(certificates, "Path to a certificate file", "certificate", 'C', "certificate"); args_parser.add_option(log_all_js_exceptions, "Log all JavaScript exceptions", "log-all-js-exceptions"); args_parser.add_option(enable_idl_tracing, "Enable IDL tracing", "enable-idl-tracing"); + args_parser.add_option(enable_http_cache, "Enable HTTP cache", "enable-http-cache"); args_parser.add_option(expose_internals_object, "Expose internals object", "expose-internals-object"); args_parser.add_option(new_window, "Force opening in a new window", "new-window", 'n'); args_parser.add_option(force_new_process, "Force creation of new browser/chrome process", "force-new-process"); @@ -183,6 +185,7 @@ ErrorOr serenity_main(Main::Arguments arguments) .wait_for_debugger = debug_web_content ? Ladybird::WaitForDebugger::Yes : Ladybird::WaitForDebugger::No, .log_all_js_exceptions = log_all_js_exceptions ? Ladybird::LogAllJSExceptions::Yes : Ladybird::LogAllJSExceptions::No, .enable_idl_tracing = enable_idl_tracing ? Ladybird::EnableIDLTracing::Yes : Ladybird::EnableIDLTracing::No, + .enable_http_cache = enable_http_cache ? Ladybird::EnableHTTPCache::Yes : Ladybird::EnableHTTPCache::No, .expose_internals_object = expose_internals_object ? Ladybird::ExposeInternalsObject::Yes : Ladybird::ExposeInternalsObject::No, }; diff --git a/Ladybird/Types.h b/Ladybird/Types.h index 3565b15d477..183edf2491f 100644 --- a/Ladybird/Types.h +++ b/Ladybird/Types.h @@ -50,6 +50,11 @@ enum class EnableIDLTracing { Yes }; +enum class EnableHTTPCache { + No, + Yes +}; + enum class ExposeInternalsObject { No, Yes @@ -66,6 +71,7 @@ struct WebContentOptions { WaitForDebugger wait_for_debugger { WaitForDebugger::No }; LogAllJSExceptions log_all_js_exceptions { LogAllJSExceptions::No }; EnableIDLTracing enable_idl_tracing { EnableIDLTracing::No }; + EnableHTTPCache enable_http_cache { EnableHTTPCache::No }; ExposeInternalsObject expose_internals_object { ExposeInternalsObject::No }; }; diff --git a/Ladybird/WebContent/main.cpp b/Ladybird/WebContent/main.cpp index 6b7344f5f50..48e46ff2695 100644 --- a/Ladybird/WebContent/main.cpp +++ b/Ladybird/WebContent/main.cpp @@ -61,6 +61,10 @@ namespace Web::WebIDL { extern bool g_enable_idl_tracing; } +namespace Web::Fetch::Fetching { +extern bool g_http_cache_enabled; +} + ErrorOr serenity_main(Main::Arguments arguments) { AK::set_rich_debug_enabled(true); @@ -101,6 +105,7 @@ ErrorOr serenity_main(Main::Arguments arguments) bool wait_for_debugger = false; bool log_all_js_exceptions = false; bool enable_idl_tracing = false; + bool enable_http_cache = false; Core::ArgsParser args_parser; args_parser.add_option(command_line, "Chrome process command line", "command-line", 0, "command_line"); @@ -115,6 +120,7 @@ ErrorOr serenity_main(Main::Arguments arguments) args_parser.add_option(mach_server_name, "Mach server name", "mach-server-name", 0, "mach_server_name"); args_parser.add_option(log_all_js_exceptions, "Log all JavaScript exceptions", "log-all-js-exceptions"); args_parser.add_option(enable_idl_tracing, "Enable IDL tracing", "enable-idl-tracing"); + args_parser.add_option(enable_http_cache, "Enable HTTP cache", "enable-http-cache"); args_parser.parse(arguments); @@ -136,6 +142,10 @@ ErrorOr serenity_main(Main::Arguments arguments) WebContent::PageClient::set_use_experimental_cpu_transform_support(); } + if (enable_http_cache) { + Web::Fetch::Fetching::g_http_cache_enabled = true; + } + #if defined(AK_OS_MACOS) if (!mach_server_name.is_empty()) { Core::Platform::register_with_mach_server(mach_server_name); diff --git a/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp b/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp index 07937601266..65ca81051d3 100644 --- a/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp +++ b/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp @@ -59,6 +59,8 @@ namespace Web::Fetch::Fetching { +bool g_http_cache_enabled; + #define TRY_OR_IGNORE(expression) \ ({ \ auto&& _temporary_result = (expression); \ @@ -1296,33 +1298,188 @@ WebIDL::ExceptionOr> http_redirect_fetch(JS::Realm& r return main_fetch(realm, fetch_params, recursive); } -struct CachedResponse { - Vector headers; +class CachedResponse : public RefCounted { +public: + HTTP::HeaderMap headers; ByteBuffer body; + Infrastructure::Response::BodyInfo body_info; ByteBuffer method; URL::URL url; UnixDateTime current_age; }; -class CachePartition { +class CachePartition : public RefCounted { public: - // FIXME: Copy the headers... less - Optional select_response(URL::URL const& url, ReadonlyBytes method, Vector const& headers) const + // https://httpwg.org/specs/rfc9111.html#constructing.responses.from.caches + JS::GCPtr select_response(JS::Realm& realm, URL::URL const& url, ReadonlyBytes method, Vector const& headers) const { + // When presented with a request, a cache MUST NOT reuse a stored response unless: + + // - the presented target URI (Section 7.1 of [HTTP]) and that of the stored response match, and auto it = m_cache.find(url); - if (it == m_cache.end()) + if (it == m_cache.end()) { + dbgln("\033[31;1mHTTP CACHE MISS!\033[0m {}", url); return {}; + } + auto const& cached_response = *it->value; - auto const& cached_response = it->value; + // - the request method associated with the stored response allows it to be used for the presented request, and + if (method != cached_response.method) { + dbgln("\033[31;1mHTTP CACHE MISS!\033[0m (Bad method) {}", url); + return {}; + } - // FIXME: Validate headers and method - (void)method; + // FIXME: - request header fields nominated by the stored response (if any) match those presented (see Section 4.1), and (void)headers; - return cached_response; + + // FIXME: - the stored response does not contain the no-cache directive (Section 5.2.2.4), unless it is successfully validated (Section 4.3), and + // the stored response is one of the following: + // + fresh (see Section 4.2), or + // + allowed to be served stale (see Section 4.2.4), or + // + successfully validated (see Section 4.3). + + dbgln("\033[32;1mHTTP CACHE HIT!\033[0m {}", url); + + auto [body, _] = MUST(extract_body(realm, ReadonlyBytes(cached_response.body))); + auto response = Infrastructure::Response::create(realm.vm()); + response->set_body(body); + response->set_body_info(cached_response.body_info); + for (auto& [name, value] : cached_response.headers.headers()) { + response->header_list()->append(Infrastructure::Header::from_string_pair(name, value)); + } + return response; + } + + void store_response(Infrastructure::Request const& http_request, Infrastructure::Response const& response) + { + if (!is_cacheable(http_request, response)) + return; + + auto cached_response = adopt_ref(*new CachedResponse); + store_header_and_trailer_fields(response, cached_response->headers); + cached_response->body = response.body()->source().get(); + cached_response->body_info = response.body_info(); + cached_response->method = MUST(ByteBuffer::copy(http_request.method())); + cached_response->url = http_request.current_url(); + cached_response->current_age = UnixDateTime::now(); + m_cache.set(http_request.current_url(), move(cached_response)); + } + + // https://httpwg.org/specs/rfc9111.html#freshening.responses + void freshen_stored_responses_upon_validation(Infrastructure::Request const& http_request, Infrastructure::Response const& response) + { + auto it = m_cache.find(http_request.current_url()); + if (it == m_cache.end()) + return; + + // For each stored response identified, the cache MUST update its header fields + // with the header fields provided in the 304 (Not Modified) response, as per Section 3.2. + auto& cached_response = *it->value; + update_stored_header_fields(response, cached_response.headers); } private: - HashMap m_cache; + // https://httpwg.org/specs/rfc9111.html#update + void update_stored_header_fields(Infrastructure::Response const& response, HTTP::HeaderMap& headers) + { + for (auto& header : *response.header_list()) { + auto name = StringView(header.name); + if (name.is_one_of_ignoring_ascii_case( + "Connection"sv, + "Proxy-Connection"sv, + "Keep-Alive"sv, + "TE"sv, + "Transfer-Encoding"sv, + "Upgrade"sv, + "Content-Length"sv)) { + continue; + } + headers.set(ByteString::copy(header.name), ByteString::copy(header.value)); + } + } + + // https://httpwg.org/specs/rfc9111.html#storing.fields + void store_header_and_trailer_fields(Infrastructure::Response const& response, HTTP::HeaderMap& headers) + { + // Caches MUST include all received response header fields — including unrecognized ones — when storing a response; + // this assures that new HTTP header fields can be successfully deployed. However, the following exceptions are made: + + // - The Connection header field and fields whose names are listed in it are required by Section 7.6.1 of [HTTP] + // to be removed before forwarding the message. This MAY be implemented by doing so before storage. + + // - Likewise, some fields' semantics require them to be removed before forwarding the message, and this MAY be + // implemented by doing so before storage; see Section 7.6.1 of [HTTP] for some examples. + + // FIXME: - The no-cache (Section 5.2.2.4) and private (Section 5.2.2.7) cache directives can have arguments that + // prevent storage of header fields by all caches and shared caches, respectively. + + // FIXME: - Header fields that are specific to the proxy that a cache uses when forwarding a request MUST NOT be stored, + // unless the cache incorporates the identity of the proxy into the cache key. + // Effectively, this is limited to Proxy-Authenticate (Section 11.7.1 of [HTTP]), Proxy-Authentication-Info (Section 11.7.3 of [HTTP]), and Proxy-Authorization (Section 11.7.2 of [HTTP]). + + for (auto& header : *response.header_list()) { + auto name = StringView(header.name); + if (name.is_one_of_ignoring_ascii_case( + "Connection"sv, + "Proxy-Connection"sv, + "Keep-Alive"sv, + "TE"sv, + "Transfer-Encoding"sv, + "Upgrade"sv)) { + continue; + } + headers.set(ByteString::copy(header.name), ByteString::copy(header.value)); + } + } + + // https://httpwg.org/specs/rfc9111.html#response.cacheability + static bool is_cacheable(Infrastructure::Request const& request, Infrastructure::Response const& response) + { + // A cache MUST NOT store a response to a request unless: + + // - AD-HOC: For now, we simply don't cache responses without a simple ByteBuffer body. + if (!response.body() || !response.body()->source().has()) + return false; + + // - the request method is understood by the cache; + if (request.method() != "GET"sv.bytes() && request.method() != "HEAD"sv.bytes()) + return false; + + // - the response status code is final (see Section 15 of [HTTP]); + if (response.status() < 200) + return false; + + // - if the response status code is 206 or 304, + // or the must-understand cache directive (see Section 5.2.2.3) is present: + // the cache understands the response status code; + if (response.status() == 206 || response.status() == 304) { + // FIXME: Implement must-understand cache directive + } + + // - the no-store cache directive is not present in the response (see Section 5.2.2.5); + if (request.cache_mode() == Infrastructure::Request::CacheMode::NoStore) + return false; + + // FIXME: - if the cache is shared: the private response directive is either not present + // or allows a shared cache to store a modified response; see Section 5.2.2.7); + + // FIXME: - if the cache is shared: the Authorization header field is not present in the + // request (see Section 11.6.2 of [HTTP]) or a response directive is present + // that explicitly allows shared caching (see Section 3.5); and + + // FIXME: - the response contains at least one of the following: + // + a public response directive (see Section 5.2.2.9); + // + a private response directive, if the cache is not shared (see Section 5.2.2.7); + // + an Expires header field (see Section 5.3); + // + a max-age response directive (see Section 5.2.2.1); + // + if the cache is shared: an s-maxage response directive (see Section 5.2.2.10); + // + a cache extension that allows it to be cached (see Section 5.2.3); or + // + a status code that is defined as heuristically cacheable (see Section 4.2.2). + + return true; + } + + HashMap> m_cache; }; class HTTPCache { @@ -1330,7 +1487,7 @@ public: CachePartition& get(Infrastructure::NetworkPartitionKey const& key) { return *m_cache.ensure(key, [] { - return make(); + return adopt_ref(*new CachePartition); }); } @@ -1341,18 +1498,21 @@ public: } private: - HashMap> m_cache; + HashMap> m_cache; }; // https://fetch.spec.whatwg.org/#determine-the-http-cache-partition -static Optional determine_the_http_cache_partition(Infrastructure::Request const& request) +static RefPtr determine_the_http_cache_partition(Infrastructure::Request const& request) { + if (!g_http_cache_enabled) + return nullptr; + // 1. Let key be the result of determining the network partition key given request. auto key = Infrastructure::determine_the_network_partition_key(request); // 2. If key is null, then return null. if (!key.has_value()) - return OptionalNone {}; + return nullptr; // 3. Return the unique HTTP cache associated with key. [HTTP-CACHING] return HTTPCache::the().get(key.value()); @@ -1383,7 +1543,7 @@ WebIDL::ExceptionOr> http_network_or_cache_fet // 6. Let httpCache be null. // (Typeless until we actually implement it, needed for checks below) - Optional http_cache; + RefPtr http_cache; // 7. Let the revalidatingFlag be unset. auto revalidating_flag = RefCountedFlag::create(false); @@ -1663,7 +1823,7 @@ WebIDL::ExceptionOr> http_network_or_cache_fet http_cache = determine_the_http_cache_partition(*http_request); // 24. If httpCache is null, then set httpRequest’s cache mode to "no-store". - if (!http_cache.has_value()) + if (!http_cache) http_request->set_cache_mode(Infrastructure::Request::CacheMode::NoStore); // 25. If httpRequest’s cache mode is neither "no-store" nor "reload", then: @@ -1673,15 +1833,9 @@ WebIDL::ExceptionOr> http_network_or_cache_fet // validation, as per the "Constructing Responses from Caches" chapter of HTTP Caching [HTTP-CACHING], // if any. // NOTE: As mandated by HTTP, this still takes the `Vary` header into account. - auto raw_response = http_cache->select_response(http_request->url(), http_request->method(), *http_request->header_list()); + stored_response = http_cache->select_response(realm, http_request->current_url(), http_request->method(), *http_request->header_list()); // 2. If storedResponse is non-null, then: - if (raw_response.has_value()) { - - // FIXME: Set more properties from the cached response - auto [body, _] = TRY(extract_body(realm, ReadonlyBytes(raw_response->body))); - stored_response = Infrastructure::Response::create(vm); - stored_response->set_body(body); - + if (stored_response) { // 1. If cache mode is "default", storedResponse is a stale-while-revalidate response, // and httpRequest’s client is non-null, then: if (http_request->cache_mode() == Infrastructure::Request::CacheMode::Default @@ -1724,12 +1878,12 @@ WebIDL::ExceptionOr> http_network_or_cache_fet // 1. If storedResponse’s header list contains `ETag`, then append (`If-None-Match`, `ETag`'s value) to httpRequest’s header list. if (auto etag = stored_response->header_list()->get("ETag"sv.bytes()); etag.has_value()) { - stored_response->header_list()->append(Infrastructure::Header::from_string_pair("If-None-Match"sv, *etag)); + http_request->header_list()->append(Infrastructure::Header::from_string_pair("If-None-Match"sv, *etag)); } // 2. If storedResponse’s header list contains `Last-Modified`, then append (`If-Modified-Since`, `Last-Modified`'s value) to httpRequest’s header list. if (auto last_modified = stored_response->header_list()->get("Last-Modified"sv.bytes()); last_modified.has_value()) { - stored_response->header_list()->append(Infrastructure::Header::from_string_pair("If-Modified-Since"sv, *last_modified)); + http_request->header_list()->append(Infrastructure::Header::from_string_pair("If-Modified-Since"sv, *last_modified)); } } // 3. Otherwise, set response to storedResponse and set response’s cache state to "local". @@ -1763,13 +1917,13 @@ WebIDL::ExceptionOr> http_network_or_cache_fet auto returned_pending_response = PendingResponse::create(vm, request); - pending_forward_response->when_loaded([&realm, &vm, &fetch_params, request, response, stored_response, http_request, returned_pending_response, is_authentication_fetch, is_new_connection_fetch, revalidating_flag, include_credentials, response_was_null = !response](JS::NonnullGCPtr resolved_forward_response) mutable { + pending_forward_response->when_loaded([&realm, &vm, &fetch_params, request, response, stored_response, http_request, returned_pending_response, is_authentication_fetch, is_new_connection_fetch, revalidating_flag, include_credentials, response_was_null = !response, http_cache](JS::NonnullGCPtr resolved_forward_response) mutable { dbgln_if(WEB_FETCH_DEBUG, "Fetch: Running 'HTTP-network-or-cache fetch' pending_forward_response load callback"); if (response_was_null) { auto forward_response = resolved_forward_response; // NOTE: TRACE is omitted as it is a forbidden method in Fetch. - auto method_is_unsafe = StringView { http_request->method() }.is_one_of("GET"sv, "HEAD"sv, "OPTIONS"sv); + auto method_is_unsafe = !(StringView { http_request->method() }.is_one_of("GET"sv, "HEAD"sv, "OPTIONS"sv)); // 3. If httpRequest’s method is unsafe and forwardResponse’s status is in the range 200 to 399, inclusive, // invalidate appropriate stored responses in httpCache, as per the "Invalidation" chapter of HTTP @@ -1781,9 +1935,11 @@ WebIDL::ExceptionOr> http_network_or_cache_fet // 4. If the revalidatingFlag is set and forwardResponse’s status is 304, then: if (revalidating_flag->value() && forward_response->status() == 304) { - // FIXME: 1. Update storedResponse’s header list using forwardResponse’s header list, as per the "Freshening - // Stored Responses upon Validation" chapter of HTTP Caching. + dbgln("\033[34;1mHTTP CACHE REVALIDATE (304)\033[0m {}", http_request->current_url()); + // 1. Update storedResponse’s header list using forwardResponse’s header list, as per the "Freshening + // Stored Responses upon Validation" chapter of HTTP Caching. // NOTE: This updates the stored response in cache as well. + http_cache->freshen_stored_responses_upon_validation(*http_request, *forward_response); // 2. Set response to storedResponse. response = stored_response; @@ -1798,11 +1954,12 @@ WebIDL::ExceptionOr> http_network_or_cache_fet // 1. Set response to forwardResponse. response = forward_response; - // FIXME: 2. Store httpRequest and forwardResponse in httpCache, as per the "Storing Responses in Caches" - // chapter of HTTP Caching. + // 2. Store httpRequest and forwardResponse in httpCache, as per the "Storing Responses in Caches" chapter of HTTP Caching. // NOTE: If forwardResponse is a network error, this effectively caches the network error, which is // sometimes known as "negative caching". // NOTE: The associated body info is stored in the cache alongside the response. + if (http_cache) + http_cache->store_response(*http_request, *forward_response); } }