diff --git a/Documentation/arch/x86/amd-memory-encryption.rst b/Documentation/arch/x86/amd-memory-encryption.rst index 6df3264f23b9..bd840df708ea 100644 --- a/Documentation/arch/x86/amd-memory-encryption.rst +++ b/Documentation/arch/x86/amd-memory-encryption.rst @@ -130,8 +130,126 @@ SNP feature support. More details in AMD64 APM[1] Vol 2: 15.34.10 SEV_STATUS MSR +Reverse Map Table (RMP) +======================= + +The RMP is a structure in system memory that is used to ensure a one-to-one +mapping between system physical addresses and guest physical addresses. Each +page of memory that is potentially assignable to guests has one entry within +the RMP. + +The RMP table can be either contiguous in memory or a collection of segments +in memory. + +Contiguous RMP +-------------- + +Support for this form of the RMP is present when support for SEV-SNP is +present, which can be determined using the CPUID instruction:: + + 0x8000001f[eax]: + Bit[4] indicates support for SEV-SNP + +The location of the RMP is identified to the hardware through two MSRs:: + + 0xc0010132 (RMP_BASE): + System physical address of the first byte of the RMP + + 0xc0010133 (RMP_END): + System physical address of the last byte of the RMP + +Hardware requires that RMP_BASE and (RPM_END + 1) be 8KB aligned, but SEV +firmware increases the alignment requirement to require a 1MB alignment. + +The RMP consists of a 16KB region used for processor bookkeeping followed +by the RMP entries, which are 16 bytes in size. The size of the RMP +determines the range of physical memory that the hypervisor can assign to +SEV-SNP guests. The RMP covers the system physical address from:: + + 0 to ((RMP_END + 1 - RMP_BASE - 16KB) / 16B) x 4KB. + +The current Linux support relies on BIOS to allocate/reserve the memory for +the RMP and to set RMP_BASE and RMP_END appropriately. Linux uses the MSR +values to locate the RMP and determine the size of the RMP. The RMP must +cover all of system memory in order for Linux to enable SEV-SNP. + +Segmented RMP +------------- + +Segmented RMP support is a new way of representing the layout of an RMP. +Initial RMP support required the RMP table to be contiguous in memory. +RMP accesses from a NUMA node on which the RMP doesn't reside +can take longer than accesses from a NUMA node on which the RMP resides. +Segmented RMP support allows the RMP entries to be located on the same +node as the memory the RMP is covering, potentially reducing latency +associated with accessing an RMP entry associated with the memory. Each +RMP segment covers a specific range of system physical addresses. + +Support for this form of the RMP can be determined using the CPUID +instruction:: + + 0x8000001f[eax]: + Bit[23] indicates support for segmented RMP + +If supported, segmented RMP attributes can be found using the CPUID +instruction:: + + 0x80000025[eax]: + Bits[5:0] minimum supported RMP segment size + Bits[11:6] maximum supported RMP segment size + + 0x80000025[ebx]: + Bits[9:0] number of cacheable RMP segment definitions + Bit[10] indicates if the number of cacheable RMP segments + is a hard limit + +To enable a segmented RMP, a new MSR is available:: + + 0xc0010136 (RMP_CFG): + Bit[0] indicates if segmented RMP is enabled + Bits[13:8] contains the size of memory covered by an RMP + segment (expressed as a power of 2) + +The RMP segment size defined in the RMP_CFG MSR applies to all segments +of the RMP. Therefore each RMP segment covers a specific range of system +physical addresses. For example, if the RMP_CFG MSR value is 0x2401, then +the RMP segment coverage value is 0x24 => 36, meaning the size of memory +covered by an RMP segment is 64GB (1 << 36). So the first RMP segment +covers physical addresses from 0 to 0xF_FFFF_FFFF, the second RMP segment +covers physical addresses from 0x10_0000_0000 to 0x1F_FFFF_FFFF, etc. + +When a segmented RMP is enabled, RMP_BASE points to the RMP bookkeeping +area as it does today (16K in size). However, instead of RMP entries +beginning immediately after the bookkeeping area, there is a 4K RMP +segment table (RST). Each entry in the RST is 8-bytes in size and represents +an RMP segment:: + + Bits[19:0] mapped size (in GB) + The mapped size can be less than the defined segment size. + A value of zero, indicates that no RMP exists for the range + of system physical addresses associated with this segment. + Bits[51:20] segment physical address + This address is left shift 20-bits (or just masked when + read) to form the physical address of the segment (1MB + alignment). + +The RST can hold 512 segment entries but can be limited in size to the number +of cacheable RMP segments (CPUID 0x80000025_EBX[9:0]) if the number of cacheable +RMP segments is a hard limit (CPUID 0x80000025_EBX[10]). + +The current Linux support relies on BIOS to allocate/reserve the memory for +the segmented RMP (the bookkeeping area, RST, and all segments), build the RST +and to set RMP_BASE, RMP_END, and RMP_CFG appropriately. Linux uses the MSR +values to locate the RMP and determine the size and location of the RMP +segments. The RMP must cover all of system memory in order for Linux to enable +SEV-SNP. + +More details in the AMD64 APM Vol 2, section "15.36.3 Reverse Map Table", +docID: 24593. + Secure VM Service Module (SVSM) =============================== + SNP provides a feature called Virtual Machine Privilege Levels (VMPL) which defines four privilege levels at which guest software can run. The most privileged level is 0 and numerically higher numbers have lesser privileges. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 768dc2c63972..a6aa17d5eefe 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1558,6 +1558,7 @@ config AMD_MEM_ENCRYPT select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT select UNACCEPTED_MEMORY + select CRYPTO_LIB_AESGCM help Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index cd44e120fe53..bb55934c1cee 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -401,7 +401,8 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) * by the guest kernel. As and when a new feature is implemented in the * guest kernel, a corresponding bit should be added to the mask. */ -#define SNP_FEATURES_PRESENT MSR_AMD64_SNP_DEBUG_SWAP +#define SNP_FEATURES_PRESENT (MSR_AMD64_SNP_DEBUG_SWAP | \ + MSR_AMD64_SNP_SECURE_TSC) u64 snp_get_unsupported_features(u64 status) { diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 0f81f70aca82..9a0ddda3aa69 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -65,7 +65,6 @@ static __maybe_unused __always_inline bool amd_cc_platform_vtom(enum cc_attr att * up under SME the trampoline area cannot be encrypted, whereas under SEV * the trampoline area must be encrypted. */ - static bool noinstr amd_cc_platform_has(enum cc_attr attr) { #ifdef CONFIG_AMD_MEM_ENCRYPT @@ -97,6 +96,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_GUEST_SEV_SNP: return sev_status & MSR_AMD64_SEV_SNP_ENABLED; + case CC_ATTR_GUEST_SNP_SECURE_TSC: + return sev_status & MSR_AMD64_SNP_SECURE_TSC; + case CC_ATTR_HOST_SEV_SNP: return cc_flags.host_sev_snp; diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index c5b0148b8c0a..65d676c0f7bc 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -95,6 +96,15 @@ static u64 sev_hv_features __ro_after_init; /* Secrets page physical address from the CC blob */ static u64 secrets_pa __ro_after_init; +/* + * For Secure TSC guests, the BSP fetches TSC_INFO using SNP guest messaging and + * initializes snp_tsc_scale and snp_tsc_offset. These values are replicated + * across the APs VMSA fields (TSC_SCALE and TSC_OFFSET). + */ +static u64 snp_tsc_scale __ro_after_init; +static u64 snp_tsc_offset __ro_after_init; +static u64 snp_tsc_freq_khz __ro_after_init; + /* #VC handler runtime per-CPU data */ struct sev_es_runtime_data { struct ghcb ghcb_page; @@ -1276,6 +1286,12 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) vmsa->vmpl = snp_vmpl; vmsa->sev_features = sev_status >> 2; + /* Populate AP's TSC scale/offset to get accurate TSC values. */ + if (cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) { + vmsa->tsc_scale = snp_tsc_scale; + vmsa->tsc_offset = snp_tsc_offset; + } + /* Switch the page over to a VMSA page now that it is initialized */ ret = snp_set_vmsa(vmsa, caa, apic_id, true); if (ret) { @@ -1418,6 +1434,41 @@ static enum es_result __vc_handle_msr_caa(struct pt_regs *regs, bool write) return ES_OK; } +/* + * TSC related accesses should not exit to the hypervisor when a guest is + * executing with Secure TSC enabled, so special handling is required for + * accesses of MSR_IA32_TSC and MSR_AMD64_GUEST_TSC_FREQ. + */ +static enum es_result __vc_handle_secure_tsc_msrs(struct pt_regs *regs, bool write) +{ + u64 tsc; + + /* + * GUEST_TSC_FREQ should not be intercepted when Secure TSC is enabled. + * Terminate the SNP guest when the interception is enabled. + */ + if (regs->cx == MSR_AMD64_GUEST_TSC_FREQ) + return ES_VMM_ERROR; + + /* + * Writes: Writing to MSR_IA32_TSC can cause subsequent reads of the TSC + * to return undefined values, so ignore all writes. + * + * Reads: Reads of MSR_IA32_TSC should return the current TSC value, use + * the value returned by rdtsc_ordered(). + */ + if (write) { + WARN_ONCE(1, "TSC MSR writes are verboten!\n"); + return ES_OK; + } + + tsc = rdtsc_ordered(); + regs->ax = lower_32_bits(tsc); + regs->dx = upper_32_bits(tsc); + + return ES_OK; +} + static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) { struct pt_regs *regs = ctxt->regs; @@ -1427,8 +1478,18 @@ static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) /* Is it a WRMSR? */ write = ctxt->insn.opcode.bytes[1] == 0x30; - if (regs->cx == MSR_SVSM_CAA) + switch (regs->cx) { + case MSR_SVSM_CAA: return __vc_handle_msr_caa(regs, write); + case MSR_IA32_TSC: + case MSR_AMD64_GUEST_TSC_FREQ: + if (sev_status & MSR_AMD64_SNP_SECURE_TSC) + return __vc_handle_secure_tsc_msrs(regs, write); + else + break; + default: + break; + } ghcb_set_rcx(ghcb, regs->cx); if (write) { @@ -2508,8 +2569,8 @@ int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, } EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req); -int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input, - struct snp_guest_request_ioctl *rio) +static int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input, + struct snp_guest_request_ioctl *rio) { struct ghcb_state state; struct es_em_ctxt ctxt; @@ -2571,7 +2632,6 @@ int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *inpu return ret; } -EXPORT_SYMBOL_GPL(snp_issue_guest_request); static struct platform_device sev_guest_device = { .name = "sev-guest", @@ -2580,15 +2640,9 @@ static struct platform_device sev_guest_device = { static int __init snp_init_platform_device(void) { - struct sev_guest_platform_data data; - if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return -ENODEV; - data.secrets_gpa = secrets_pa; - if (platform_device_add_data(&sev_guest_device, &data, sizeof(data))) - return -ENODEV; - if (platform_device_register(&sev_guest_device)) return -ENODEV; @@ -2667,3 +2721,581 @@ static int __init sev_sysfs_init(void) } arch_initcall(sev_sysfs_init); #endif // CONFIG_SYSFS + +static void free_shared_pages(void *buf, size_t sz) +{ + unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; + int ret; + + if (!buf) + return; + + ret = set_memory_encrypted((unsigned long)buf, npages); + if (ret) { + WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); + return; + } + + __free_pages(virt_to_page(buf), get_order(sz)); +} + +static void *alloc_shared_pages(size_t sz) +{ + unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; + struct page *page; + int ret; + + page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); + if (!page) + return NULL; + + ret = set_memory_decrypted((unsigned long)page_address(page), npages); + if (ret) { + pr_err("failed to mark page shared, ret=%d\n", ret); + __free_pages(page, get_order(sz)); + return NULL; + } + + return page_address(page); +} + +static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno) +{ + u8 *key = NULL; + + switch (id) { + case 0: + *seqno = &secrets->os_area.msg_seqno_0; + key = secrets->vmpck0; + break; + case 1: + *seqno = &secrets->os_area.msg_seqno_1; + key = secrets->vmpck1; + break; + case 2: + *seqno = &secrets->os_area.msg_seqno_2; + key = secrets->vmpck2; + break; + case 3: + *seqno = &secrets->os_area.msg_seqno_3; + key = secrets->vmpck3; + break; + default: + break; + } + + return key; +} + +static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen) +{ + struct aesgcm_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) { + pr_err("Crypto context initialization failed\n"); + kfree(ctx); + return NULL; + } + + return ctx; +} + +int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) +{ + /* Adjust the default VMPCK key based on the executing VMPL level */ + if (vmpck_id == -1) + vmpck_id = snp_vmpl; + + mdesc->vmpck = get_vmpck(vmpck_id, mdesc->secrets, &mdesc->os_area_msg_seqno); + if (!mdesc->vmpck) { + pr_err("Invalid VMPCK%d communication key\n", vmpck_id); + return -EINVAL; + } + + /* Verify that VMPCK is not zero. */ + if (!memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { + pr_err("Empty VMPCK%d communication key\n", vmpck_id); + return -EINVAL; + } + + mdesc->vmpck_id = vmpck_id; + + mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN); + if (!mdesc->ctx) + return -ENOMEM; + + return 0; +} +EXPORT_SYMBOL_GPL(snp_msg_init); + +struct snp_msg_desc *snp_msg_alloc(void) +{ + struct snp_msg_desc *mdesc; + void __iomem *mem; + + BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE); + + mdesc = kzalloc(sizeof(struct snp_msg_desc), GFP_KERNEL); + if (!mdesc) + return ERR_PTR(-ENOMEM); + + mem = ioremap_encrypted(secrets_pa, PAGE_SIZE); + if (!mem) + goto e_free_mdesc; + + mdesc->secrets = (__force struct snp_secrets_page *)mem; + + /* Allocate the shared page used for the request and response message. */ + mdesc->request = alloc_shared_pages(sizeof(struct snp_guest_msg)); + if (!mdesc->request) + goto e_unmap; + + mdesc->response = alloc_shared_pages(sizeof(struct snp_guest_msg)); + if (!mdesc->response) + goto e_free_request; + + mdesc->certs_data = alloc_shared_pages(SEV_FW_BLOB_MAX_SIZE); + if (!mdesc->certs_data) + goto e_free_response; + + /* initial the input address for guest request */ + mdesc->input.req_gpa = __pa(mdesc->request); + mdesc->input.resp_gpa = __pa(mdesc->response); + mdesc->input.data_gpa = __pa(mdesc->certs_data); + + return mdesc; + +e_free_response: + free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); +e_free_request: + free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); +e_unmap: + iounmap(mem); +e_free_mdesc: + kfree(mdesc); + + return ERR_PTR(-ENOMEM); +} +EXPORT_SYMBOL_GPL(snp_msg_alloc); + +void snp_msg_free(struct snp_msg_desc *mdesc) +{ + if (!mdesc) + return; + + kfree(mdesc->ctx); + free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); + free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); + free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE); + iounmap((__force void __iomem *)mdesc->secrets); + + memset(mdesc, 0, sizeof(*mdesc)); + kfree(mdesc); +} +EXPORT_SYMBOL_GPL(snp_msg_free); + +/* Mutex to serialize the shared buffer access and command handling. */ +static DEFINE_MUTEX(snp_cmd_mutex); + +/* + * If an error is received from the host or AMD Secure Processor (ASP) there + * are two options. Either retry the exact same encrypted request or discontinue + * using the VMPCK. + * + * This is because in the current encryption scheme GHCB v2 uses AES-GCM to + * encrypt the requests. The IV for this scheme is the sequence number. GCM + * cannot tolerate IV reuse. + * + * The ASP FW v1.51 only increments the sequence numbers on a successful + * guest<->ASP back and forth and only accepts messages at its exact sequence + * number. + * + * So if the sequence number were to be reused the encryption scheme is + * vulnerable. If the sequence number were incremented for a fresh IV the ASP + * will reject the request. + */ +static void snp_disable_vmpck(struct snp_msg_desc *mdesc) +{ + pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n", + mdesc->vmpck_id); + memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN); + mdesc->vmpck = NULL; +} + +static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc) +{ + u64 count; + + lockdep_assert_held(&snp_cmd_mutex); + + /* Read the current message sequence counter from secrets pages */ + count = *mdesc->os_area_msg_seqno; + + return count + 1; +} + +/* Return a non-zero on success */ +static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc) +{ + u64 count = __snp_get_msg_seqno(mdesc); + + /* + * The message sequence counter for the SNP guest request is a 64-bit + * value but the version 2 of GHCB specification defines a 32-bit storage + * for it. If the counter exceeds the 32-bit value then return zero. + * The caller should check the return value, but if the caller happens to + * not check the value and use it, then the firmware treats zero as an + * invalid number and will fail the message request. + */ + if (count >= UINT_MAX) { + pr_err("request message sequence counter overflow\n"); + return 0; + } + + return count; +} + +static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc) +{ + /* + * The counter is also incremented by the PSP, so increment it by 2 + * and save in secrets page. + */ + *mdesc->os_area_msg_seqno += 2; +} + +static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req) +{ + struct snp_guest_msg *resp_msg = &mdesc->secret_response; + struct snp_guest_msg *req_msg = &mdesc->secret_request; + struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr; + struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr; + struct aesgcm_ctx *ctx = mdesc->ctx; + u8 iv[GCM_AES_IV_SIZE] = {}; + + pr_debug("response [seqno %lld type %d version %d sz %d]\n", + resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version, + resp_msg_hdr->msg_sz); + + /* Copy response from shared memory to encrypted memory. */ + memcpy(resp_msg, mdesc->response, sizeof(*resp_msg)); + + /* Verify that the sequence counter is incremented by 1 */ + if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1))) + return -EBADMSG; + + /* Verify response message type and version number. */ + if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) || + resp_msg_hdr->msg_version != req_msg_hdr->msg_version) + return -EBADMSG; + + /* + * If the message size is greater than our buffer length then return + * an error. + */ + if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz)) + return -EBADMSG; + + /* Decrypt the payload */ + memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno))); + if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz, + &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag)) + return -EBADMSG; + + return 0; +} + +static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req) +{ + struct snp_guest_msg *msg = &mdesc->secret_request; + struct snp_guest_msg_hdr *hdr = &msg->hdr; + struct aesgcm_ctx *ctx = mdesc->ctx; + u8 iv[GCM_AES_IV_SIZE] = {}; + + memset(msg, 0, sizeof(*msg)); + + hdr->algo = SNP_AEAD_AES_256_GCM; + hdr->hdr_version = MSG_HDR_VER; + hdr->hdr_sz = sizeof(*hdr); + hdr->msg_type = req->msg_type; + hdr->msg_version = req->msg_version; + hdr->msg_seqno = seqno; + hdr->msg_vmpck = req->vmpck_id; + hdr->msg_sz = req->req_sz; + + /* Verify the sequence number is non-zero */ + if (!hdr->msg_seqno) + return -ENOSR; + + pr_debug("request [seqno %lld type %d version %d sz %d]\n", + hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz); + + if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload))) + return -EBADMSG; + + memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno))); + aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo, + AAD_LEN, iv, hdr->authtag); + + return 0; +} + +static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, + struct snp_guest_request_ioctl *rio) +{ + unsigned long req_start = jiffies; + unsigned int override_npages = 0; + u64 override_err = 0; + int rc; + +retry_request: + /* + * Call firmware to process the request. In this function the encrypted + * message enters shared memory with the host. So after this call the + * sequence number must be incremented or the VMPCK must be deleted to + * prevent reuse of the IV. + */ + rc = snp_issue_guest_request(req, &mdesc->input, rio); + switch (rc) { + case -ENOSPC: + /* + * If the extended guest request fails due to having too + * small of a certificate data buffer, retry the same + * guest request without the extended data request in + * order to increment the sequence number and thus avoid + * IV reuse. + */ + override_npages = mdesc->input.data_npages; + req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; + + /* + * Override the error to inform callers the given extended + * request buffer size was too small and give the caller the + * required buffer size. + */ + override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); + + /* + * If this call to the firmware succeeds, the sequence number can + * be incremented allowing for continued use of the VMPCK. If + * there is an error reflected in the return value, this value + * is checked further down and the result will be the deletion + * of the VMPCK and the error code being propagated back to the + * user as an ioctl() return code. + */ + goto retry_request; + + /* + * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been + * throttled. Retry in the driver to avoid returning and reusing the + * message sequence number on a different message. + */ + case -EAGAIN: + if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) { + rc = -ETIMEDOUT; + break; + } + schedule_timeout_killable(SNP_REQ_RETRY_DELAY); + goto retry_request; + } + + /* + * Increment the message sequence number. There is no harm in doing + * this now because decryption uses the value stored in the response + * structure and any failure will wipe the VMPCK, preventing further + * use anyway. + */ + snp_inc_msg_seqno(mdesc); + + if (override_err) { + rio->exitinfo2 = override_err; + + /* + * If an extended guest request was issued and the supplied certificate + * buffer was not large enough, a standard guest request was issued to + * prevent IV reuse. If the standard request was successful, return -EIO + * back to the caller as would have originally been returned. + */ + if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) + rc = -EIO; + } + + if (override_npages) + mdesc->input.data_npages = override_npages; + + return rc; +} + +int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, + struct snp_guest_request_ioctl *rio) +{ + u64 seqno; + int rc; + + guard(mutex)(&snp_cmd_mutex); + + /* Check if the VMPCK is not empty */ + if (!mdesc->vmpck || !memchr_inv(mdesc->vmpck, 0, VMPCK_KEY_LEN)) { + pr_err_ratelimited("VMPCK is disabled\n"); + return -ENOTTY; + } + + /* Get message sequence and verify that its a non-zero */ + seqno = snp_get_msg_seqno(mdesc); + if (!seqno) + return -EIO; + + /* Clear shared memory's response for the host to populate. */ + memset(mdesc->response, 0, sizeof(struct snp_guest_msg)); + + /* Encrypt the userspace provided payload in mdesc->secret_request. */ + rc = enc_payload(mdesc, seqno, req); + if (rc) + return rc; + + /* + * Write the fully encrypted request to the shared unencrypted + * request page. + */ + memcpy(mdesc->request, &mdesc->secret_request, sizeof(mdesc->secret_request)); + + rc = __handle_guest_request(mdesc, req, rio); + if (rc) { + if (rc == -EIO && + rio->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) + return rc; + + pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", + rc, rio->exitinfo2); + + snp_disable_vmpck(mdesc); + return rc; + } + + rc = verify_and_dec_payload(mdesc, req); + if (rc) { + pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc); + snp_disable_vmpck(mdesc); + return rc; + } + + return 0; +} +EXPORT_SYMBOL_GPL(snp_send_guest_request); + +static int __init snp_get_tsc_info(void) +{ + struct snp_guest_request_ioctl *rio; + struct snp_tsc_info_resp *tsc_resp; + struct snp_tsc_info_req *tsc_req; + struct snp_msg_desc *mdesc; + struct snp_guest_req *req; + int rc = -ENOMEM; + + tsc_req = kzalloc(sizeof(*tsc_req), GFP_KERNEL); + if (!tsc_req) + return rc; + + /* + * The intermediate response buffer is used while decrypting the + * response payload. Make sure that it has enough space to cover + * the authtag. + */ + tsc_resp = kzalloc(sizeof(*tsc_resp) + AUTHTAG_LEN, GFP_KERNEL); + if (!tsc_resp) + goto e_free_tsc_req; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + goto e_free_tsc_resp; + + rio = kzalloc(sizeof(*rio), GFP_KERNEL); + if (!rio) + goto e_free_req; + + mdesc = snp_msg_alloc(); + if (IS_ERR_OR_NULL(mdesc)) + goto e_free_rio; + + rc = snp_msg_init(mdesc, snp_vmpl); + if (rc) + goto e_free_mdesc; + + req->msg_version = MSG_HDR_VER; + req->msg_type = SNP_MSG_TSC_INFO_REQ; + req->vmpck_id = snp_vmpl; + req->req_buf = tsc_req; + req->req_sz = sizeof(*tsc_req); + req->resp_buf = (void *)tsc_resp; + req->resp_sz = sizeof(*tsc_resp) + AUTHTAG_LEN; + req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; + + rc = snp_send_guest_request(mdesc, req, rio); + if (rc) + goto e_request; + + pr_debug("%s: response status 0x%x scale 0x%llx offset 0x%llx factor 0x%x\n", + __func__, tsc_resp->status, tsc_resp->tsc_scale, tsc_resp->tsc_offset, + tsc_resp->tsc_factor); + + if (!tsc_resp->status) { + snp_tsc_scale = tsc_resp->tsc_scale; + snp_tsc_offset = tsc_resp->tsc_offset; + } else { + pr_err("Failed to get TSC info, response status 0x%x\n", tsc_resp->status); + rc = -EIO; + } + +e_request: + /* The response buffer contains sensitive data, explicitly clear it. */ + memzero_explicit(tsc_resp, sizeof(*tsc_resp) + AUTHTAG_LEN); +e_free_mdesc: + snp_msg_free(mdesc); +e_free_rio: + kfree(rio); +e_free_req: + kfree(req); + e_free_tsc_resp: + kfree(tsc_resp); +e_free_tsc_req: + kfree(tsc_req); + + return rc; +} + +void __init snp_secure_tsc_prepare(void) +{ + if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) + return; + + if (snp_get_tsc_info()) { + pr_alert("Unable to retrieve Secure TSC info from ASP\n"); + sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_SECURE_TSC); + } + + pr_debug("SecureTSC enabled"); +} + +static unsigned long securetsc_get_tsc_khz(void) +{ + return snp_tsc_freq_khz; +} + +void __init snp_secure_tsc_init(void) +{ + unsigned long long tsc_freq_mhz; + + if (!cc_platform_has(CC_ATTR_GUEST_SNP_SECURE_TSC)) + return; + + setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); + rdmsrl(MSR_AMD64_GUEST_TSC_FREQ, tsc_freq_mhz); + snp_tsc_freq_khz = (unsigned long)(tsc_freq_mhz * 1000); + + x86_platform.calibrate_cpu = securetsc_get_tsc_khz; + x86_platform.calibrate_tsc = securetsc_get_tsc_khz; +} diff --git a/arch/x86/coco/sev/shared.c b/arch/x86/coco/sev/shared.c index 71de53194089..4386f37bd31d 100644 --- a/arch/x86/coco/sev/shared.c +++ b/arch/x86/coco/sev/shared.c @@ -1140,6 +1140,16 @@ static enum es_result vc_handle_rdtsc(struct ghcb *ghcb, bool rdtscp = (exit_code == SVM_EXIT_RDTSCP); enum es_result ret; + /* + * The hypervisor should not be intercepting RDTSC/RDTSCP when Secure + * TSC is enabled. A #VC exception will be generated if the RDTSC/RDTSCP + * instructions are being intercepted. If this should occur and Secure + * TSC is enabled, guest execution should be terminated as the guest + * cannot rely on the TSC value provided by the hypervisor. + */ + if (sev_status & MSR_AMD64_SNP_SECURE_TSC) + return ES_VMM_ERROR; + ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0); if (ret != ES_OK) return ret; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 0e2d81763615..2cac4e18cf39 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -451,6 +451,8 @@ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ +#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ #define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 3ae84c3b8e6d..9a71880eec07 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -608,6 +608,7 @@ #define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_AMD_PERF_STATUS 0xc0010063 #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 @@ -644,6 +645,7 @@ #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ #define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 @@ -682,11 +684,12 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_RESV_BIT 18 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) - -#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f - #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_AMD64_RMP_CFG 0xc0010136 +#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0 +#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT) +#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8) #define MSR_SVSM_CAA 0xc001f000 diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index 50f5666938c0..6ef92432a5ce 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -206,6 +206,7 @@ struct snp_psc_desc { #define GHCB_TERM_NO_SVSM 7 /* SVSM is not advertised in the secrets page */ #define GHCB_TERM_SVSM_VMPL0 8 /* SVSM is present but has set VMPL to 0 */ #define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */ +#define GHCB_TERM_SECURE_TSC 10 /* Secure TSC initialization failed */ #define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 91f08af31078..5d9685f92e5c 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -14,6 +14,7 @@ #include #include #include +#include #define GHCB_PROTOCOL_MIN 1ULL #define GHCB_PROTOCOL_MAX 2ULL @@ -124,6 +125,9 @@ struct snp_req_data { #define AAD_LEN 48 #define MSG_HDR_VER 1 +#define SNP_REQ_MAX_RETRY_DURATION (60*HZ) +#define SNP_REQ_RETRY_DELAY (2*HZ) + /* See SNP spec SNP_GUEST_REQUEST section for the structure */ enum msg_type { SNP_MSG_TYPE_INVALID = 0, @@ -142,6 +146,9 @@ enum msg_type { SNP_MSG_VMRK_REQ, SNP_MSG_VMRK_RSP, + SNP_MSG_TSC_INFO_REQ = 17, + SNP_MSG_TSC_INFO_RSP, + SNP_MSG_TYPE_MAX }; @@ -170,9 +177,20 @@ struct snp_guest_msg { u8 payload[PAGE_SIZE - sizeof(struct snp_guest_msg_hdr)]; } __packed; -struct sev_guest_platform_data { - u64 secrets_gpa; -}; +#define SNP_TSC_INFO_REQ_SZ 128 + +struct snp_tsc_info_req { + u8 rsvd[SNP_TSC_INFO_REQ_SZ]; +} __packed; + +struct snp_tsc_info_resp { + u32 status; + u32 rsvd1; + u64 tsc_scale; + u64 tsc_offset; + u32 tsc_factor; + u8 rsvd2[100]; +} __packed; struct snp_guest_req { void *req_buf; @@ -253,6 +271,7 @@ struct snp_msg_desc { u32 *os_area_msg_seqno; u8 *vmpck; + int vmpck_id; }; /* @@ -445,8 +464,6 @@ void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __noreturn snp_abort(void); void snp_dmi_setup(void); -int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input, - struct snp_guest_request_ioctl *rio); int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); @@ -458,6 +475,15 @@ void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot); void snp_kexec_finish(void); void snp_kexec_begin(void); +int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id); +struct snp_msg_desc *snp_msg_alloc(void); +void snp_msg_free(struct snp_msg_desc *mdesc); +int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, + struct snp_guest_request_ioctl *rio); + +void __init snp_secure_tsc_prepare(void); +void __init snp_secure_tsc_init(void); + #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define snp_vmpl 0 @@ -480,11 +506,6 @@ static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } static inline void snp_dmi_setup(void) { } -static inline int snp_issue_guest_request(struct snp_guest_req *req, struct snp_req_data *input, - struct snp_guest_request_ioctl *rio) -{ - return -ENOTTY; -} static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input) { return -ENOTTY; @@ -498,6 +519,13 @@ static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; } static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { } static inline void snp_kexec_finish(void) { } static inline void snp_kexec_begin(void) { } +static inline int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) { return -1; } +static inline struct snp_msg_desc *snp_msg_alloc(void) { return NULL; } +static inline void snp_msg_free(struct snp_msg_desc *mdesc) { } +static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, + struct snp_guest_request_ioctl *rio) { return -ENODEV; } +static inline void __init snp_secure_tsc_prepare(void) { } +static inline void __init snp_secure_tsc_init(void) { } #endif /* CONFIG_AMD_MEM_ENCRYPT */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 2b59b9951c90..92e18798f197 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -417,7 +417,9 @@ struct sev_es_save_area { u8 reserved_0x298[80]; u32 pkru; u32 tsc_aux; - u8 reserved_0x2f0[24]; + u64 tsc_scale; + u64 tsc_offset; + u8 reserved_0x300[8]; u64 rcx; u64 rdx; u64 rbx; @@ -564,7 +566,7 @@ static inline void __unused_size_checks(void) BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x1c0); BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x248); BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x298); - BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x2f0); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x300); BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x320); BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x380); BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x3f0); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 79d2e17f6582..b9592c60166e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -355,10 +355,15 @@ static void bsp_determine_snp(struct cpuinfo_x86 *c) /* * RMP table entry format is not architectural and is defined by the * per-processor PPR. Restrict SNP support on the known CPU models - * for which the RMP table entry format is currently defined for. + * for which the RMP table entry format is currently defined or for + * processors which support the architecturally defined RMPREAD + * instruction. */ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && - c->x86 >= 0x19 && snp_probe_rmptable_info()) { + (cpu_feature_enabled(X86_FEATURE_ZEN3) || + cpu_feature_enabled(X86_FEATURE_ZEN4) || + cpu_feature_enabled(X86_FEATURE_RMPREAD)) && + snp_probe_rmptable_info()) { cc_platform_set(CC_ATTR_HOST_SEV_SNP); } else { setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 67aeaba4ba9c..0864b314c26a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -30,6 +30,7 @@ #include #include #include +#include unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1515,6 +1516,9 @@ void __init tsc_early_init(void) /* Don't change UV TSC multi-chassis synchronization */ if (is_early_uv_system()) return; + + snp_secure_tsc_init(); + if (!determine_cpu_tsc_frequencies(true)) return; tsc_enable_sched_clock(); diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 0a120d85d7bb..95bae74fdab2 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -94,6 +94,8 @@ void __init mem_encrypt_init(void) /* Call into SWIOTLB to update the SWIOTLB DMA buffers */ swiotlb_update_mem_attributes(); + snp_secure_tsc_prepare(); + print_mem_encrypt_feature_info(); } diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 774f9677458f..b56c5c073003 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -541,6 +541,9 @@ void __init sme_early_init(void) * kernel mapped. */ snp_update_svsm_ca(); + + if (sev_status & MSR_AMD64_SNP_SECURE_TSC) + setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 9a6a943d8e41..1dcc027ec77e 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -31,10 +32,29 @@ #include /* - * The RMP entry format is not architectural. The format is defined in PPR - * Family 19h Model 01h, Rev B1 processor. + * The RMP entry information as returned by the RMPREAD instruction. */ struct rmpentry { + u64 gpa; + u8 assigned :1, + rsvd1 :7; + u8 pagesize :1, + hpage_region_status :1, + rsvd2 :6; + u8 immutable :1, + rsvd3 :7; + u8 rsvd4; + u32 asid; +} __packed; + +/* + * The raw RMP entry format is not architectural. The format is defined in PPR + * Family 19h Model 01h, Rev B1 processor. This format represents the actual + * entry in the RMP table memory. The bitfield definitions are used for machines + * without the RMPREAD instruction (Zen3 and Zen4), otherwise the "hi" and "lo" + * fields are only used for dumping the raw data. + */ +struct rmpentry_raw { union { struct { u64 assigned : 1, @@ -58,12 +78,48 @@ struct rmpentry { */ #define RMPTABLE_CPU_BOOKKEEPING_SZ 0x4000 +/* + * For a non-segmented RMP table, use the maximum physical addressing as the + * segment size in order to always arrive at index 0 in the table. + */ +#define RMPTABLE_NON_SEGMENTED_SHIFT 52 + +struct rmp_segment_desc { + struct rmpentry_raw *rmp_entry; + u64 max_index; + u64 size; +}; + +/* + * Segmented RMP Table support. + * - The segment size is used for two purposes: + * - Identify the amount of memory covered by an RMP segment + * - Quickly locate an RMP segment table entry for a physical address + * + * - The RMP segment table contains pointers to an RMP table that covers + * a specific portion of memory. There can be up to 512 8-byte entries, + * one pages worth. + */ +#define RST_ENTRY_MAPPED_SIZE(x) ((x) & GENMASK_ULL(19, 0)) +#define RST_ENTRY_SEGMENT_BASE(x) ((x) & GENMASK_ULL(51, 20)) + +#define RST_SIZE SZ_4K +static struct rmp_segment_desc **rmp_segment_table __ro_after_init; +static unsigned int rst_max_index __ro_after_init = 512; + +static unsigned int rmp_segment_shift; +static u64 rmp_segment_size; +static u64 rmp_segment_mask; + +#define RST_ENTRY_INDEX(x) ((x) >> rmp_segment_shift) +#define RMP_ENTRY_INDEX(x) ((u64)(PHYS_PFN((x) & rmp_segment_mask))) + +static u64 rmp_cfg; + /* Mask to apply to a PFN to get the first PFN of a 2MB page */ #define PFN_PMD_MASK GENMASK_ULL(63, PMD_SHIFT - PAGE_SHIFT) static u64 probed_rmp_base, probed_rmp_size; -static struct rmpentry *rmptable __ro_after_init; -static u64 rmptable_max_pfn __ro_after_init; static LIST_HEAD(snp_leaked_pages_list); static DEFINE_SPINLOCK(snp_leaked_pages_list_lock); @@ -116,36 +172,6 @@ static __init void snp_enable(void *arg) __snp_enable(smp_processor_id()); } -#define RMP_ADDR_MASK GENMASK_ULL(51, 13) - -bool snp_probe_rmptable_info(void) -{ - u64 rmp_sz, rmp_base, rmp_end; - - rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); - rdmsrl(MSR_AMD64_RMP_END, rmp_end); - - if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { - pr_err("Memory for the RMP table has not been reserved by BIOS\n"); - return false; - } - - if (rmp_base > rmp_end) { - pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); - return false; - } - - rmp_sz = rmp_end - rmp_base + 1; - - probed_rmp_base = rmp_base; - probed_rmp_size = rmp_sz; - - pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n", - rmp_base, rmp_end); - - return true; -} - static void __init __snp_fixup_e820_tables(u64 pa) { if (IS_ALIGNED(pa, PMD_SIZE)) @@ -178,35 +204,176 @@ static void __init __snp_fixup_e820_tables(u64 pa) } } -void __init snp_fixup_e820_tables(void) +static void __init fixup_e820_tables_for_segmented_rmp(void) +{ + u64 pa, *rst, size, mapped_size; + unsigned int i; + + __snp_fixup_e820_tables(probed_rmp_base); + + pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ; + + __snp_fixup_e820_tables(pa + RST_SIZE); + + rst = early_memremap(pa, RST_SIZE); + if (!rst) + return; + + for (i = 0; i < rst_max_index; i++) { + pa = RST_ENTRY_SEGMENT_BASE(rst[i]); + mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]); + if (!mapped_size) + continue; + + __snp_fixup_e820_tables(pa); + + /* + * Mapped size in GB. Mapped size is allowed to exceed + * the segment coverage size, but gets reduced to the + * segment coverage size. + */ + mapped_size <<= 30; + if (mapped_size > rmp_segment_size) + mapped_size = rmp_segment_size; + + /* Calculate the RMP segment size (16 bytes/page mapped) */ + size = PHYS_PFN(mapped_size) << 4; + + __snp_fixup_e820_tables(pa + size); + } + + early_memunmap(rst, RST_SIZE); +} + +static void __init fixup_e820_tables_for_contiguous_rmp(void) { __snp_fixup_e820_tables(probed_rmp_base); __snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size); } -/* - * Do the necessary preparations which are verified by the firmware as - * described in the SNP_INIT_EX firmware command description in the SNP - * firmware ABI spec. - */ -static int __init snp_rmptable_init(void) +void __init snp_fixup_e820_tables(void) { - u64 max_rmp_pfn, calc_rmp_sz, rmptable_size, rmp_end, val; - void *rmptable_start; + if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED) { + fixup_e820_tables_for_segmented_rmp(); + } else { + fixup_e820_tables_for_contiguous_rmp(); + } +} - if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) - return 0; +static bool __init clear_rmptable_bookkeeping(void) +{ + void *bk; - if (!amd_iommu_snp_en) - goto nosnp; + bk = memremap(probed_rmp_base, RMPTABLE_CPU_BOOKKEEPING_SZ, MEMREMAP_WB); + if (!bk) { + pr_err("Failed to map RMP bookkeeping area\n"); + return false; + } + + memset(bk, 0, RMPTABLE_CPU_BOOKKEEPING_SZ); + + memunmap(bk); + + return true; +} + +static bool __init alloc_rmp_segment_desc(u64 segment_pa, u64 segment_size, u64 pa) +{ + u64 rst_index, rmp_segment_size_max; + struct rmp_segment_desc *desc; + void *rmp_segment; + + /* Calculate the maximum size an RMP can be (16 bytes/page mapped) */ + rmp_segment_size_max = PHYS_PFN(rmp_segment_size) << 4; + + /* Validate the RMP segment size */ + if (segment_size > rmp_segment_size_max) { + pr_err("Invalid RMP size 0x%llx for configured segment size 0x%llx\n", + segment_size, rmp_segment_size_max); + return false; + } + + /* Validate the RMP segment table index */ + rst_index = RST_ENTRY_INDEX(pa); + if (rst_index >= rst_max_index) { + pr_err("Invalid RMP segment base address 0x%llx for configured segment size 0x%llx\n", + pa, rmp_segment_size); + return false; + } + + if (rmp_segment_table[rst_index]) { + pr_err("RMP segment descriptor already exists at index %llu\n", rst_index); + return false; + } + + rmp_segment = memremap(segment_pa, segment_size, MEMREMAP_WB); + if (!rmp_segment) { + pr_err("Failed to map RMP segment addr 0x%llx size 0x%llx\n", + segment_pa, segment_size); + return false; + } + + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) { + memunmap(rmp_segment); + return false; + } + + desc->rmp_entry = rmp_segment; + desc->max_index = segment_size / sizeof(*desc->rmp_entry); + desc->size = segment_size; + + rmp_segment_table[rst_index] = desc; + + return true; +} + +static void __init free_rmp_segment_table(void) +{ + unsigned int i; + + for (i = 0; i < rst_max_index; i++) { + struct rmp_segment_desc *desc; + + desc = rmp_segment_table[i]; + if (!desc) + continue; + + memunmap(desc->rmp_entry); + + kfree(desc); + } + + free_page((unsigned long)rmp_segment_table); + + rmp_segment_table = NULL; +} + +/* Allocate the table used to index into the RMP segments */ +static bool __init alloc_rmp_segment_table(void) +{ + struct page *page; + + page = alloc_page(__GFP_ZERO); + if (!page) + return false; + + rmp_segment_table = page_address(page); + + return true; +} + +static bool __init setup_contiguous_rmptable(void) +{ + u64 max_rmp_pfn, calc_rmp_sz, rmptable_segment, rmptable_size, rmp_end; if (!probed_rmp_size) - goto nosnp; + return false; rmp_end = probed_rmp_base + probed_rmp_size - 1; /* - * Calculate the amount the memory that must be reserved by the BIOS to + * Calculate the amount of memory that must be reserved by the BIOS to * address the whole RAM, including the bookkeeping area. The RMP itself * must also be covered. */ @@ -218,15 +385,140 @@ static int __init snp_rmptable_init(void) if (calc_rmp_sz > probed_rmp_size) { pr_err("Memory reserved for the RMP table does not cover full system RAM (expected 0x%llx got 0x%llx)\n", calc_rmp_sz, probed_rmp_size); - goto nosnp; + return false; } - rmptable_start = memremap(probed_rmp_base, probed_rmp_size, MEMREMAP_WB); - if (!rmptable_start) { - pr_err("Failed to map RMP table\n"); - goto nosnp; + if (!alloc_rmp_segment_table()) + return false; + + /* Map only the RMP entries */ + rmptable_segment = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ; + rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ; + + if (!alloc_rmp_segment_desc(rmptable_segment, rmptable_size, 0)) { + free_rmp_segment_table(); + return false; } + return true; +} + +static bool __init setup_segmented_rmptable(void) +{ + u64 rst_pa, *rst, pa, ram_pa_end, ram_pa_max; + unsigned int i, max_index; + + if (!probed_rmp_base) + return false; + + if (!alloc_rmp_segment_table()) + return false; + + rst_pa = probed_rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ; + rst = memremap(rst_pa, RST_SIZE, MEMREMAP_WB); + if (!rst) { + pr_err("Failed to map RMP segment table addr 0x%llx\n", rst_pa); + goto e_free; + } + + pr_info("Segmented RMP using %lluGB segments\n", rmp_segment_size >> 30); + + ram_pa_max = max_pfn << PAGE_SHIFT; + + max_index = 0; + ram_pa_end = 0; + for (i = 0; i < rst_max_index; i++) { + u64 rmp_segment, rmp_size, mapped_size; + + mapped_size = RST_ENTRY_MAPPED_SIZE(rst[i]); + if (!mapped_size) + continue; + + max_index = i; + + /* + * Mapped size in GB. Mapped size is allowed to exceed the + * segment coverage size, but gets reduced to the segment + * coverage size. + */ + mapped_size <<= 30; + if (mapped_size > rmp_segment_size) { + pr_info("RMP segment %u mapped size (0x%llx) reduced to 0x%llx\n", + i, mapped_size, rmp_segment_size); + mapped_size = rmp_segment_size; + } + + rmp_segment = RST_ENTRY_SEGMENT_BASE(rst[i]); + + /* Calculate the RMP segment size (16 bytes/page mapped) */ + rmp_size = PHYS_PFN(mapped_size) << 4; + + pa = (u64)i << rmp_segment_shift; + + /* + * Some segments may be for MMIO mapped above system RAM. These + * segments are used for Trusted I/O. + */ + if (pa < ram_pa_max) + ram_pa_end = pa + mapped_size; + + if (!alloc_rmp_segment_desc(rmp_segment, rmp_size, pa)) + goto e_unmap; + + pr_info("RMP segment %u physical address [0x%llx - 0x%llx] covering [0x%llx - 0x%llx]\n", + i, rmp_segment, rmp_segment + rmp_size - 1, pa, pa + mapped_size - 1); + } + + if (ram_pa_max > ram_pa_end) { + pr_err("Segmented RMP does not cover full system RAM (expected 0x%llx got 0x%llx)\n", + ram_pa_max, ram_pa_end); + goto e_unmap; + } + + /* Adjust the maximum index based on the found segments */ + rst_max_index = max_index + 1; + + memunmap(rst); + + return true; + +e_unmap: + memunmap(rst); + +e_free: + free_rmp_segment_table(); + + return false; +} + +static bool __init setup_rmptable(void) +{ + if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED) { + return setup_segmented_rmptable(); + } else { + return setup_contiguous_rmptable(); + } +} + +/* + * Do the necessary preparations which are verified by the firmware as + * described in the SNP_INIT_EX firmware command description in the SNP + * firmware ABI spec. + */ +static int __init snp_rmptable_init(void) +{ + unsigned int i; + u64 val; + + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + return 0; + + if (!amd_iommu_snp_en) + goto nosnp; + + if (!setup_rmptable()) + goto nosnp; + /* * Check if SEV-SNP is already enabled, this can happen in case of * kexec boot. @@ -235,7 +527,22 @@ static int __init snp_rmptable_init(void) if (val & MSR_AMD64_SYSCFG_SNP_EN) goto skip_enable; - memset(rmptable_start, 0, probed_rmp_size); + /* Zero out the RMP bookkeeping area */ + if (!clear_rmptable_bookkeeping()) { + free_rmp_segment_table(); + goto nosnp; + } + + /* Zero out the RMP entries */ + for (i = 0; i < rst_max_index; i++) { + struct rmp_segment_desc *desc; + + desc = rmp_segment_table[i]; + if (!desc) + continue; + + memset(desc->rmp_entry, 0, desc->size); + } /* Flush the caches to ensure that data is written before SNP is enabled. */ wbinvd_on_all_cpus(); @@ -246,12 +553,6 @@ static int __init snp_rmptable_init(void) on_each_cpu(snp_enable, NULL, 1); skip_enable: - rmptable_start += RMPTABLE_CPU_BOOKKEEPING_SZ; - rmptable_size = probed_rmp_size - RMPTABLE_CPU_BOOKKEEPING_SZ; - - rmptable = (struct rmpentry *)rmptable_start; - rmptable_max_pfn = rmptable_size / sizeof(struct rmpentry) - 1; - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/rmptable_init:online", __snp_enable, NULL); /* @@ -272,48 +573,212 @@ static int __init snp_rmptable_init(void) */ device_initcall(snp_rmptable_init); -static struct rmpentry *get_rmpentry(u64 pfn) +static void set_rmp_segment_info(unsigned int segment_shift) { - if (WARN_ON_ONCE(pfn > rmptable_max_pfn)) - return ERR_PTR(-EFAULT); - - return &rmptable[pfn]; + rmp_segment_shift = segment_shift; + rmp_segment_size = 1ULL << rmp_segment_shift; + rmp_segment_mask = rmp_segment_size - 1; } -static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level) -{ - struct rmpentry *large_entry, *entry; +#define RMP_ADDR_MASK GENMASK_ULL(51, 13) - if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) +static bool probe_contiguous_rmptable_info(void) +{ + u64 rmp_sz, rmp_base, rmp_end; + + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); + rdmsrl(MSR_AMD64_RMP_END, rmp_end); + + if (!(rmp_base & RMP_ADDR_MASK) || !(rmp_end & RMP_ADDR_MASK)) { + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); + return false; + } + + if (rmp_base > rmp_end) { + pr_err("RMP configuration not valid: base=%#llx, end=%#llx\n", rmp_base, rmp_end); + return false; + } + + rmp_sz = rmp_end - rmp_base + 1; + + /* Treat the contiguous RMP table as a single segment */ + rst_max_index = 1; + + set_rmp_segment_info(RMPTABLE_NON_SEGMENTED_SHIFT); + + probed_rmp_base = rmp_base; + probed_rmp_size = rmp_sz; + + pr_info("RMP table physical range [0x%016llx - 0x%016llx]\n", + rmp_base, rmp_end); + + return true; +} + +static bool probe_segmented_rmptable_info(void) +{ + unsigned int eax, ebx, segment_shift, segment_shift_min, segment_shift_max; + u64 rmp_base, rmp_end; + + rdmsrl(MSR_AMD64_RMP_BASE, rmp_base); + if (!(rmp_base & RMP_ADDR_MASK)) { + pr_err("Memory for the RMP table has not been reserved by BIOS\n"); + return false; + } + + rdmsrl(MSR_AMD64_RMP_END, rmp_end); + WARN_ONCE(rmp_end & RMP_ADDR_MASK, + "Segmented RMP enabled but RMP_END MSR is non-zero\n"); + + /* Obtain the min and max supported RMP segment size */ + eax = cpuid_eax(0x80000025); + segment_shift_min = eax & GENMASK(5, 0); + segment_shift_max = (eax & GENMASK(11, 6)) >> 6; + + /* Verify the segment size is within the supported limits */ + segment_shift = MSR_AMD64_RMP_SEGMENT_SHIFT(rmp_cfg); + if (segment_shift > segment_shift_max || segment_shift < segment_shift_min) { + pr_err("RMP segment size (%u) is not within advertised bounds (min=%u, max=%u)\n", + segment_shift, segment_shift_min, segment_shift_max); + return false; + } + + /* Override the max supported RST index if a hardware limit exists */ + ebx = cpuid_ebx(0x80000025); + if (ebx & BIT(10)) + rst_max_index = ebx & GENMASK(9, 0); + + set_rmp_segment_info(segment_shift); + + probed_rmp_base = rmp_base; + probed_rmp_size = 0; + + pr_info("Segmented RMP base table physical range [0x%016llx - 0x%016llx]\n", + rmp_base, rmp_base + RMPTABLE_CPU_BOOKKEEPING_SZ + RST_SIZE); + + return true; +} + +bool snp_probe_rmptable_info(void) +{ + if (cpu_feature_enabled(X86_FEATURE_SEGMENTED_RMP)) + rdmsrl(MSR_AMD64_RMP_CFG, rmp_cfg); + + if (rmp_cfg & MSR_AMD64_SEG_RMP_ENABLED) + return probe_segmented_rmptable_info(); + else + return probe_contiguous_rmptable_info(); +} + +/* + * About the array_index_nospec() usage below: + * + * This function can get called by exported functions like + * snp_lookup_rmpentry(), which is used by the KVM #PF handler, among + * others, and since the @pfn passed in cannot always be trusted, + * speculation should be stopped as a protective measure. + */ +static struct rmpentry_raw *get_raw_rmpentry(u64 pfn) +{ + u64 paddr, rst_index, segment_index; + struct rmp_segment_desc *desc; + + if (!rmp_segment_table) return ERR_PTR(-ENODEV); - entry = get_rmpentry(pfn); - if (IS_ERR(entry)) - return entry; + paddr = pfn << PAGE_SHIFT; + + rst_index = RST_ENTRY_INDEX(paddr); + if (unlikely(rst_index >= rst_max_index)) + return ERR_PTR(-EFAULT); + + rst_index = array_index_nospec(rst_index, rst_max_index); + + desc = rmp_segment_table[rst_index]; + if (unlikely(!desc)) + return ERR_PTR(-EFAULT); + + segment_index = RMP_ENTRY_INDEX(paddr); + if (unlikely(segment_index >= desc->max_index)) + return ERR_PTR(-EFAULT); + + segment_index = array_index_nospec(segment_index, desc->max_index); + + return desc->rmp_entry + segment_index; +} + +static int get_rmpentry(u64 pfn, struct rmpentry *e) +{ + struct rmpentry_raw *e_raw; + + if (cpu_feature_enabled(X86_FEATURE_RMPREAD)) { + int ret; + + /* Binutils version 2.44 supports the RMPREAD mnemonic. */ + asm volatile(".byte 0xf2, 0x0f, 0x01, 0xfd" + : "=a" (ret) + : "a" (pfn << PAGE_SHIFT), "c" (e) + : "memory", "cc"); + + return ret; + } + + e_raw = get_raw_rmpentry(pfn); + if (IS_ERR(e_raw)) + return PTR_ERR(e_raw); + + /* + * Map the raw RMP table entry onto the RMPREAD output format. + * The 2MB region status indicator (hpage_region_status field) is not + * calculated, since the overhead could be significant and the field + * is not used. + */ + memset(e, 0, sizeof(*e)); + e->gpa = e_raw->gpa << PAGE_SHIFT; + e->asid = e_raw->asid; + e->assigned = e_raw->assigned; + e->pagesize = e_raw->pagesize; + e->immutable = e_raw->immutable; + + return 0; +} + +static int __snp_lookup_rmpentry(u64 pfn, struct rmpentry *e, int *level) +{ + struct rmpentry e_large; + int ret; + + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + return -ENODEV; + + ret = get_rmpentry(pfn, e); + if (ret) + return ret; /* * Find the authoritative RMP entry for a PFN. This can be either a 4K * RMP entry or a special large RMP entry that is authoritative for a * whole 2M area. */ - large_entry = get_rmpentry(pfn & PFN_PMD_MASK); - if (IS_ERR(large_entry)) - return large_entry; + ret = get_rmpentry(pfn & PFN_PMD_MASK, &e_large); + if (ret) + return ret; - *level = RMP_TO_PG_LEVEL(large_entry->pagesize); + *level = RMP_TO_PG_LEVEL(e_large.pagesize); - return entry; + return 0; } int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { - struct rmpentry *e; + struct rmpentry e; + int ret; - e = __snp_lookup_rmpentry(pfn, level); - if (IS_ERR(e)) - return PTR_ERR(e); + ret = __snp_lookup_rmpentry(pfn, &e, level); + if (ret) + return ret; - *assigned = !!e->assigned; + *assigned = !!e.assigned; return 0; } EXPORT_SYMBOL_GPL(snp_lookup_rmpentry); @@ -326,20 +791,28 @@ EXPORT_SYMBOL_GPL(snp_lookup_rmpentry); */ static void dump_rmpentry(u64 pfn) { + struct rmpentry_raw *e_raw; u64 pfn_i, pfn_end; - struct rmpentry *e; - int level; + struct rmpentry e; + int level, ret; - e = __snp_lookup_rmpentry(pfn, &level); - if (IS_ERR(e)) { - pr_err("Failed to read RMP entry for PFN 0x%llx, error %ld\n", - pfn, PTR_ERR(e)); + ret = __snp_lookup_rmpentry(pfn, &e, &level); + if (ret) { + pr_err("Failed to read RMP entry for PFN 0x%llx, error %d\n", + pfn, ret); return; } - if (e->assigned) { + if (e.assigned) { + e_raw = get_raw_rmpentry(pfn); + if (IS_ERR(e_raw)) { + pr_err("Failed to read RMP contents for PFN 0x%llx, error %ld\n", + pfn, PTR_ERR(e_raw)); + return; + } + pr_info("PFN 0x%llx, RMP entry: [0x%016llx - 0x%016llx]\n", - pfn, e->lo, e->hi); + pfn, e_raw->lo, e_raw->hi); return; } @@ -358,16 +831,16 @@ static void dump_rmpentry(u64 pfn) pfn, pfn_i, pfn_end); while (pfn_i < pfn_end) { - e = __snp_lookup_rmpentry(pfn_i, &level); - if (IS_ERR(e)) { - pr_err("Error %ld reading RMP entry for PFN 0x%llx\n", - PTR_ERR(e), pfn_i); + e_raw = get_raw_rmpentry(pfn_i); + if (IS_ERR(e_raw)) { + pr_err("Error %ld reading RMP contents for PFN 0x%llx\n", + PTR_ERR(e_raw), pfn_i); pfn_i++; continue; } - if (e->lo || e->hi) - pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi); + if (e_raw->lo || e_raw->hi) + pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e_raw->lo, e_raw->hi); pfn_i++; } } diff --git a/drivers/virt/coco/sev-guest/Kconfig b/drivers/virt/coco/sev-guest/Kconfig index 0b772bd921d8..a6405ab6c2c3 100644 --- a/drivers/virt/coco/sev-guest/Kconfig +++ b/drivers/virt/coco/sev-guest/Kconfig @@ -2,7 +2,6 @@ config SEV_GUEST tristate "AMD SEV Guest driver" default m depends on AMD_MEM_ENCRYPT - select CRYPTO_LIB_AESGCM select TSM_REPORTS help SEV-SNP firmware provides the guest a mechanism to communicate with diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index b699771be029..264b6523fe52 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -31,9 +31,6 @@ #define DEVICE_NAME "sev-guest" -#define SNP_REQ_MAX_RETRY_DURATION (60*HZ) -#define SNP_REQ_RETRY_DELAY (2*HZ) - #define SVSM_MAX_RETRIES 3 struct snp_guest_dev { @@ -60,86 +57,6 @@ static int vmpck_id = -1; module_param(vmpck_id, int, 0444); MODULE_PARM_DESC(vmpck_id, "The VMPCK ID to use when communicating with the PSP."); -/* Mutex to serialize the shared buffer access and command handling. */ -static DEFINE_MUTEX(snp_cmd_mutex); - -static bool is_vmpck_empty(struct snp_msg_desc *mdesc) -{ - char zero_key[VMPCK_KEY_LEN] = {0}; - - if (mdesc->vmpck) - return !memcmp(mdesc->vmpck, zero_key, VMPCK_KEY_LEN); - - return true; -} - -/* - * If an error is received from the host or AMD Secure Processor (ASP) there - * are two options. Either retry the exact same encrypted request or discontinue - * using the VMPCK. - * - * This is because in the current encryption scheme GHCB v2 uses AES-GCM to - * encrypt the requests. The IV for this scheme is the sequence number. GCM - * cannot tolerate IV reuse. - * - * The ASP FW v1.51 only increments the sequence numbers on a successful - * guest<->ASP back and forth and only accepts messages at its exact sequence - * number. - * - * So if the sequence number were to be reused the encryption scheme is - * vulnerable. If the sequence number were incremented for a fresh IV the ASP - * will reject the request. - */ -static void snp_disable_vmpck(struct snp_msg_desc *mdesc) -{ - pr_alert("Disabling VMPCK%d communication key to prevent IV reuse.\n", - vmpck_id); - memzero_explicit(mdesc->vmpck, VMPCK_KEY_LEN); - mdesc->vmpck = NULL; -} - -static inline u64 __snp_get_msg_seqno(struct snp_msg_desc *mdesc) -{ - u64 count; - - lockdep_assert_held(&snp_cmd_mutex); - - /* Read the current message sequence counter from secrets pages */ - count = *mdesc->os_area_msg_seqno; - - return count + 1; -} - -/* Return a non-zero on success */ -static u64 snp_get_msg_seqno(struct snp_msg_desc *mdesc) -{ - u64 count = __snp_get_msg_seqno(mdesc); - - /* - * The message sequence counter for the SNP guest request is a 64-bit - * value but the version 2 of GHCB specification defines a 32-bit storage - * for it. If the counter exceeds the 32-bit value then return zero. - * The caller should check the return value, but if the caller happens to - * not check the value and use it, then the firmware treats zero as an - * invalid number and will fail the message request. - */ - if (count >= UINT_MAX) { - pr_err("request message sequence counter overflow\n"); - return 0; - } - - return count; -} - -static void snp_inc_msg_seqno(struct snp_msg_desc *mdesc) -{ - /* - * The counter is also incremented by the PSP, so increment it by 2 - * and save in secrets page. - */ - *mdesc->os_area_msg_seqno += 2; -} - static inline struct snp_guest_dev *to_snp_dev(struct file *file) { struct miscdevice *dev = file->private_data; @@ -147,242 +64,6 @@ static inline struct snp_guest_dev *to_snp_dev(struct file *file) return container_of(dev, struct snp_guest_dev, misc); } -static struct aesgcm_ctx *snp_init_crypto(u8 *key, size_t keylen) -{ - struct aesgcm_ctx *ctx; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT); - if (!ctx) - return NULL; - - if (aesgcm_expandkey(ctx, key, keylen, AUTHTAG_LEN)) { - pr_err("Crypto context initialization failed\n"); - kfree(ctx); - return NULL; - } - - return ctx; -} - -static int verify_and_dec_payload(struct snp_msg_desc *mdesc, struct snp_guest_req *req) -{ - struct snp_guest_msg *resp_msg = &mdesc->secret_response; - struct snp_guest_msg *req_msg = &mdesc->secret_request; - struct snp_guest_msg_hdr *req_msg_hdr = &req_msg->hdr; - struct snp_guest_msg_hdr *resp_msg_hdr = &resp_msg->hdr; - struct aesgcm_ctx *ctx = mdesc->ctx; - u8 iv[GCM_AES_IV_SIZE] = {}; - - pr_debug("response [seqno %lld type %d version %d sz %d]\n", - resp_msg_hdr->msg_seqno, resp_msg_hdr->msg_type, resp_msg_hdr->msg_version, - resp_msg_hdr->msg_sz); - - /* Copy response from shared memory to encrypted memory. */ - memcpy(resp_msg, mdesc->response, sizeof(*resp_msg)); - - /* Verify that the sequence counter is incremented by 1 */ - if (unlikely(resp_msg_hdr->msg_seqno != (req_msg_hdr->msg_seqno + 1))) - return -EBADMSG; - - /* Verify response message type and version number. */ - if (resp_msg_hdr->msg_type != (req_msg_hdr->msg_type + 1) || - resp_msg_hdr->msg_version != req_msg_hdr->msg_version) - return -EBADMSG; - - /* - * If the message size is greater than our buffer length then return - * an error. - */ - if (unlikely((resp_msg_hdr->msg_sz + ctx->authsize) > req->resp_sz)) - return -EBADMSG; - - /* Decrypt the payload */ - memcpy(iv, &resp_msg_hdr->msg_seqno, min(sizeof(iv), sizeof(resp_msg_hdr->msg_seqno))); - if (!aesgcm_decrypt(ctx, req->resp_buf, resp_msg->payload, resp_msg_hdr->msg_sz, - &resp_msg_hdr->algo, AAD_LEN, iv, resp_msg_hdr->authtag)) - return -EBADMSG; - - return 0; -} - -static int enc_payload(struct snp_msg_desc *mdesc, u64 seqno, struct snp_guest_req *req) -{ - struct snp_guest_msg *msg = &mdesc->secret_request; - struct snp_guest_msg_hdr *hdr = &msg->hdr; - struct aesgcm_ctx *ctx = mdesc->ctx; - u8 iv[GCM_AES_IV_SIZE] = {}; - - memset(msg, 0, sizeof(*msg)); - - hdr->algo = SNP_AEAD_AES_256_GCM; - hdr->hdr_version = MSG_HDR_VER; - hdr->hdr_sz = sizeof(*hdr); - hdr->msg_type = req->msg_type; - hdr->msg_version = req->msg_version; - hdr->msg_seqno = seqno; - hdr->msg_vmpck = req->vmpck_id; - hdr->msg_sz = req->req_sz; - - /* Verify the sequence number is non-zero */ - if (!hdr->msg_seqno) - return -ENOSR; - - pr_debug("request [seqno %lld type %d version %d sz %d]\n", - hdr->msg_seqno, hdr->msg_type, hdr->msg_version, hdr->msg_sz); - - if (WARN_ON((req->req_sz + ctx->authsize) > sizeof(msg->payload))) - return -EBADMSG; - - memcpy(iv, &hdr->msg_seqno, min(sizeof(iv), sizeof(hdr->msg_seqno))); - aesgcm_encrypt(ctx, msg->payload, req->req_buf, req->req_sz, &hdr->algo, - AAD_LEN, iv, hdr->authtag); - - return 0; -} - -static int __handle_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, - struct snp_guest_request_ioctl *rio) -{ - unsigned long req_start = jiffies; - unsigned int override_npages = 0; - u64 override_err = 0; - int rc; - -retry_request: - /* - * Call firmware to process the request. In this function the encrypted - * message enters shared memory with the host. So after this call the - * sequence number must be incremented or the VMPCK must be deleted to - * prevent reuse of the IV. - */ - rc = snp_issue_guest_request(req, &mdesc->input, rio); - switch (rc) { - case -ENOSPC: - /* - * If the extended guest request fails due to having too - * small of a certificate data buffer, retry the same - * guest request without the extended data request in - * order to increment the sequence number and thus avoid - * IV reuse. - */ - override_npages = mdesc->input.data_npages; - req->exit_code = SVM_VMGEXIT_GUEST_REQUEST; - - /* - * Override the error to inform callers the given extended - * request buffer size was too small and give the caller the - * required buffer size. - */ - override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); - - /* - * If this call to the firmware succeeds, the sequence number can - * be incremented allowing for continued use of the VMPCK. If - * there is an error reflected in the return value, this value - * is checked further down and the result will be the deletion - * of the VMPCK and the error code being propagated back to the - * user as an ioctl() return code. - */ - goto retry_request; - - /* - * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been - * throttled. Retry in the driver to avoid returning and reusing the - * message sequence number on a different message. - */ - case -EAGAIN: - if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) { - rc = -ETIMEDOUT; - break; - } - schedule_timeout_killable(SNP_REQ_RETRY_DELAY); - goto retry_request; - } - - /* - * Increment the message sequence number. There is no harm in doing - * this now because decryption uses the value stored in the response - * structure and any failure will wipe the VMPCK, preventing further - * use anyway. - */ - snp_inc_msg_seqno(mdesc); - - if (override_err) { - rio->exitinfo2 = override_err; - - /* - * If an extended guest request was issued and the supplied certificate - * buffer was not large enough, a standard guest request was issued to - * prevent IV reuse. If the standard request was successful, return -EIO - * back to the caller as would have originally been returned. - */ - if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) - rc = -EIO; - } - - if (override_npages) - mdesc->input.data_npages = override_npages; - - return rc; -} - -static int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req, - struct snp_guest_request_ioctl *rio) -{ - u64 seqno; - int rc; - - guard(mutex)(&snp_cmd_mutex); - - /* Check if the VMPCK is not empty */ - if (is_vmpck_empty(mdesc)) { - pr_err_ratelimited("VMPCK is disabled\n"); - return -ENOTTY; - } - - /* Get message sequence and verify that its a non-zero */ - seqno = snp_get_msg_seqno(mdesc); - if (!seqno) - return -EIO; - - /* Clear shared memory's response for the host to populate. */ - memset(mdesc->response, 0, sizeof(struct snp_guest_msg)); - - /* Encrypt the userspace provided payload in mdesc->secret_request. */ - rc = enc_payload(mdesc, seqno, req); - if (rc) - return rc; - - /* - * Write the fully encrypted request to the shared unencrypted - * request page. - */ - memcpy(mdesc->request, &mdesc->secret_request, - sizeof(mdesc->secret_request)); - - rc = __handle_guest_request(mdesc, req, rio); - if (rc) { - if (rc == -EIO && - rio->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) - return rc; - - pr_alert("Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", - rc, rio->exitinfo2); - - snp_disable_vmpck(mdesc); - return rc; - } - - rc = verify_and_dec_payload(mdesc, req); - if (rc) { - pr_alert("Detected unexpected decode failure from ASP. rc: %d\n", rc); - snp_disable_vmpck(mdesc); - return rc; - } - - return 0; -} - struct snp_req_resp { sockptr_t req_data; sockptr_t resp_data; @@ -414,7 +95,7 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io req.msg_version = arg->msg_version; req.msg_type = SNP_MSG_REPORT_REQ; - req.vmpck_id = vmpck_id; + req.vmpck_id = mdesc->vmpck_id; req.req_buf = report_req; req.req_sz = sizeof(*report_req); req.resp_buf = report_resp->data; @@ -461,7 +142,7 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque req.msg_version = arg->msg_version; req.msg_type = SNP_MSG_KEY_REQ; - req.vmpck_id = vmpck_id; + req.vmpck_id = mdesc->vmpck_id; req.req_buf = derived_key_req; req.req_sz = sizeof(*derived_key_req); req.resp_buf = buf; @@ -539,7 +220,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques req.msg_version = arg->msg_version; req.msg_type = SNP_MSG_REPORT_REQ; - req.vmpck_id = vmpck_id; + req.vmpck_id = mdesc->vmpck_id; req.req_buf = &report_req->data; req.req_sz = sizeof(report_req->data); req.resp_buf = report_resp->data; @@ -616,76 +297,11 @@ static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long return ret; } -static void free_shared_pages(void *buf, size_t sz) -{ - unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; - int ret; - - if (!buf) - return; - - ret = set_memory_encrypted((unsigned long)buf, npages); - if (ret) { - WARN_ONCE(ret, "failed to restore encryption mask (leak it)\n"); - return; - } - - __free_pages(virt_to_page(buf), get_order(sz)); -} - -static void *alloc_shared_pages(struct device *dev, size_t sz) -{ - unsigned int npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; - struct page *page; - int ret; - - page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(sz)); - if (!page) - return NULL; - - ret = set_memory_decrypted((unsigned long)page_address(page), npages); - if (ret) { - dev_err(dev, "failed to mark page shared, ret=%d\n", ret); - __free_pages(page, get_order(sz)); - return NULL; - } - - return page_address(page); -} - static const struct file_operations snp_guest_fops = { .owner = THIS_MODULE, .unlocked_ioctl = snp_guest_ioctl, }; -static u8 *get_vmpck(int id, struct snp_secrets_page *secrets, u32 **seqno) -{ - u8 *key = NULL; - - switch (id) { - case 0: - *seqno = &secrets->os_area.msg_seqno_0; - key = secrets->vmpck0; - break; - case 1: - *seqno = &secrets->os_area.msg_seqno_1; - key = secrets->vmpck1; - break; - case 2: - *seqno = &secrets->os_area.msg_seqno_2; - key = secrets->vmpck2; - break; - case 3: - *seqno = &secrets->os_area.msg_seqno_3; - key = secrets->vmpck3; - break; - default: - break; - } - - return key; -} - struct snp_msg_report_resp_hdr { u32 status; u32 report_size; @@ -979,13 +595,10 @@ static void unregister_sev_tsm(void *data) static int __init sev_guest_probe(struct platform_device *pdev) { - struct sev_guest_platform_data *data; - struct snp_secrets_page *secrets; struct device *dev = &pdev->dev; struct snp_guest_dev *snp_dev; struct snp_msg_desc *mdesc; struct miscdevice *misc; - void __iomem *mapping; int ret; BUILD_BUG_ON(sizeof(struct snp_guest_msg) > PAGE_SIZE); @@ -993,115 +606,57 @@ static int __init sev_guest_probe(struct platform_device *pdev) if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return -ENODEV; - if (!dev->platform_data) - return -ENODEV; - - data = (struct sev_guest_platform_data *)dev->platform_data; - mapping = ioremap_encrypted(data->secrets_gpa, PAGE_SIZE); - if (!mapping) - return -ENODEV; - - secrets = (__force void *)mapping; - - ret = -ENOMEM; snp_dev = devm_kzalloc(&pdev->dev, sizeof(struct snp_guest_dev), GFP_KERNEL); if (!snp_dev) - goto e_unmap; + return -ENOMEM; - mdesc = devm_kzalloc(&pdev->dev, sizeof(struct snp_msg_desc), GFP_KERNEL); - if (!mdesc) - goto e_unmap; + mdesc = snp_msg_alloc(); + if (IS_ERR_OR_NULL(mdesc)) + return -ENOMEM; - /* Adjust the default VMPCK key based on the executing VMPL level */ - if (vmpck_id == -1) - vmpck_id = snp_vmpl; - - ret = -EINVAL; - mdesc->vmpck = get_vmpck(vmpck_id, secrets, &mdesc->os_area_msg_seqno); - if (!mdesc->vmpck) { - dev_err(dev, "Invalid VMPCK%d communication key\n", vmpck_id); - goto e_unmap; - } - - /* Verify that VMPCK is not zero. */ - if (is_vmpck_empty(mdesc)) { - dev_err(dev, "Empty VMPCK%d communication key\n", vmpck_id); - goto e_unmap; - } + ret = snp_msg_init(mdesc, vmpck_id); + if (ret) + goto e_msg_init; platform_set_drvdata(pdev, snp_dev); snp_dev->dev = dev; - mdesc->secrets = secrets; - - /* Allocate the shared page used for the request and response message. */ - mdesc->request = alloc_shared_pages(dev, sizeof(struct snp_guest_msg)); - if (!mdesc->request) - goto e_unmap; - - mdesc->response = alloc_shared_pages(dev, sizeof(struct snp_guest_msg)); - if (!mdesc->response) - goto e_free_request; - - mdesc->certs_data = alloc_shared_pages(dev, SEV_FW_BLOB_MAX_SIZE); - if (!mdesc->certs_data) - goto e_free_response; - - ret = -EIO; - mdesc->ctx = snp_init_crypto(mdesc->vmpck, VMPCK_KEY_LEN); - if (!mdesc->ctx) - goto e_free_cert_data; misc = &snp_dev->misc; misc->minor = MISC_DYNAMIC_MINOR; misc->name = DEVICE_NAME; misc->fops = &snp_guest_fops; - /* Initialize the input addresses for guest request */ - mdesc->input.req_gpa = __pa(mdesc->request); - mdesc->input.resp_gpa = __pa(mdesc->response); - mdesc->input.data_gpa = __pa(mdesc->certs_data); - /* Set the privlevel_floor attribute based on the vmpck_id */ - sev_tsm_ops.privlevel_floor = vmpck_id; + sev_tsm_ops.privlevel_floor = mdesc->vmpck_id; ret = tsm_register(&sev_tsm_ops, snp_dev); if (ret) - goto e_free_cert_data; + goto e_msg_init; ret = devm_add_action_or_reset(&pdev->dev, unregister_sev_tsm, NULL); if (ret) - goto e_free_cert_data; + goto e_msg_init; ret = misc_register(misc); if (ret) - goto e_free_ctx; + goto e_msg_init; snp_dev->msg_desc = mdesc; - dev_info(dev, "Initialized SEV guest driver (using VMPCK%d communication key)\n", vmpck_id); + dev_info(dev, "Initialized SEV guest driver (using VMPCK%d communication key)\n", + mdesc->vmpck_id); return 0; -e_free_ctx: - kfree(mdesc->ctx); -e_free_cert_data: - free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE); -e_free_response: - free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); -e_free_request: - free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); -e_unmap: - iounmap(mapping); +e_msg_init: + snp_msg_free(mdesc); + return ret; } static void __exit sev_guest_remove(struct platform_device *pdev) { struct snp_guest_dev *snp_dev = platform_get_drvdata(pdev); - struct snp_msg_desc *mdesc = snp_dev->msg_desc; - free_shared_pages(mdesc->certs_data, SEV_FW_BLOB_MAX_SIZE); - free_shared_pages(mdesc->response, sizeof(struct snp_guest_msg)); - free_shared_pages(mdesc->request, sizeof(struct snp_guest_msg)); - kfree(mdesc->ctx); + snp_msg_free(snp_dev->msg_desc); misc_deregister(&snp_dev->misc); } diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index caa4b4430634..0bf7d33a1048 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -81,6 +81,14 @@ enum cc_attr { */ CC_ATTR_GUEST_SEV_SNP, + /** + * @CC_ATTR_GUEST_SNP_SECURE_TSC: SNP Secure TSC is active. + * + * The platform/OS is running as a guest/virtual machine and actively + * using AMD SEV-SNP Secure TSC feature. + */ + CC_ATTR_GUEST_SNP_SECURE_TSC, + /** * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. *