Kernel: Flush TLBs concurrently

Instead of flushing the TLB on the current processor first and then notifying the other processors to do the same, notify the others first, and while waiting on the others flush our own.
2025-01-24 02:12:09 -05:00 · 2020-12-02 15:03:07 -07:00 · 2020-12-02 15:03:07 -07:00 · 766db673c1
commit 766db673c1
parent 6914cf830d
2 changed files with 41 additions and 25 deletions
--- a/Kernel/Arch/i386/CPU.cpp
+++ b/Kernel/Arch/i386/CPU.cpp
@ -1711,9 +1711,10 @@ void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count)

 void Processor::flush_tlb(VirtualAddress vaddr, size_t page_count)
 {
-    flush_tlb_local(vaddr, page_count);
    if (s_smp_enabled)
        smp_broadcast_flush_tlb(vaddr, page_count);
+    else
+        flush_tlb_local(vaddr, page_count);
 }

 static volatile ProcessorMessage* s_message_pool;
@ -1871,29 +1872,33 @@ bool Processor::smp_queue_message(ProcessorMessage& msg)
    return next == nullptr;
 }

-void Processor::smp_broadcast_message(ProcessorMessage& msg, bool async)
+void Processor::smp_broadcast_message(ProcessorMessage& msg)
 {
    auto& cur_proc = Processor::current();
-    msg.async = async;
 #ifdef SMP_DEBUG
    dbg() << "SMP[" << cur_proc.id() << "]: Broadcast message " << VirtualAddress(&msg) << " to cpus: " << (count()) << " proc: " << VirtualAddress(&cur_proc);
 #endif
    atomic_store(&msg.refs, count() - 1, AK::MemoryOrder::memory_order_release);
    ASSERT(msg.refs > 0);
+    bool need_broadcast = false;
    for_each(
        [&](Processor& proc) -> IterationDecision {
            if (&proc != &cur_proc) {
-                if (proc.smp_queue_message(msg)) {
-                    // TODO: only send IPI to that CPU if we queued the first
-                }
+                if (proc.smp_queue_message(msg))
+                    need_broadcast = true;
            }
            return IterationDecision::Continue;
        });

-    // Now trigger an IPI on all other APs
+    // Now trigger an IPI on all other APs (unless all targets already had messages queued)
+    if (need_broadcast)
        APIC::the().broadcast_ipi();
+}

-    if (!async) {
+void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg)
+{
+    auto& cur_proc = Processor::current();
+    ASSERT(!msg.async);
    // If synchronous then we must cleanup and return the message back
    // to the pool. Otherwise, the last processor to complete it will return it
    while (atomic_load(&msg.refs, AK::MemoryOrder::memory_order_consume) != 0) {
@ -1907,25 +1912,30 @@ void Processor::smp_broadcast_message(ProcessorMessage& msg, bool async)

    smp_cleanup_message(msg);
    smp_return_to_pool(msg);
-    }
 }

 void Processor::smp_broadcast(void (*callback)(void*), void* data, void (*free_data)(void*), bool async)
 {
    auto& msg = smp_get_from_pool();
+    msg.async = async;
    msg.type = ProcessorMessage::CallbackWithData;
    msg.callback_with_data.handler = callback;
    msg.callback_with_data.data = data;
    msg.callback_with_data.free = free_data;
-    smp_broadcast_message(msg, async);
+    smp_broadcast_message(msg);
+    if (!async)
+        smp_broadcast_wait_sync(msg);
 }

 void Processor::smp_broadcast(void (*callback)(), bool async)
 {
    auto& msg = smp_get_from_pool();
+    msg.async = async;
    msg.type = ProcessorMessage::CallbackWithData;
    msg.callback.handler = callback;
-    smp_broadcast_message(msg, async);
+    smp_broadcast_message(msg);
+    if (!async)
+        smp_broadcast_wait_sync(msg);
 }

 void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async)
@ -1980,10 +1990,15 @@ void Processor::smp_unicast(u32 cpu, void (*callback)(), bool async)
 void Processor::smp_broadcast_flush_tlb(VirtualAddress vaddr, size_t page_count)
 {
    auto& msg = smp_get_from_pool();
+    msg.async = false;
    msg.type = ProcessorMessage::FlushTlb;
    msg.flush_tlb.ptr = vaddr.as_ptr();
    msg.flush_tlb.page_count = page_count;
-    smp_broadcast_message(msg, false);
+    smp_broadcast_message(msg);
+    // While the other processors handle this request, we'll flush ours
+    flush_tlb_local(vaddr, page_count);
+    // Now wait until everybody is done as well
+    smp_broadcast_wait_sync(msg);
 }

 void Processor::smp_broadcast_halt()
--- a/Kernel/Arch/i386/CPU.h
+++ b/Kernel/Arch/i386/CPU.h
@ -740,7 +740,8 @@ class Processor {
    static void smp_cleanup_message(ProcessorMessage& msg);
    bool smp_queue_message(ProcessorMessage& msg);
    static void smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async);
-    static void smp_broadcast_message(ProcessorMessage& msg, bool async);
+    static void smp_broadcast_message(ProcessorMessage& msg);
+    static void smp_broadcast_wait_sync(ProcessorMessage& msg);
    static void smp_broadcast_halt();

    void deferred_call_pool_init();