commit 69cdec6c61e7866110f917f1f5bf4d3381b029a5 from: Murilo Ijanc date: Mon Mar 23 23:12:03 2026 UTC add syscall/sysret interface for ring 3 to ring 0 transition commit - 28328e7600e718a0b0b419e39f5bcdfe01cd17f4 commit + 69cdec6c61e7866110f917f1f5bf4d3381b029a5 blob - /dev/null blob + 19c5e1cb6f1699b2130d43d1e633ee098d376a8c (mode 644) --- /dev/null +++ src/syscall.rs @@ -0,0 +1,245 @@ +// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et : +// +// Copyright (c) 2025-2026 Murilo Ijanc' +// +// Permission to use, copy, modify, and/or distribute this software for any +// purpose with or without fee is hereby granted, provided that the above +// copyright notice and this permission notice appear in all copies. +// +// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +// + +//! syscall/sysret interface for x86-64. +//! +//! ABI: +//! rax = syscall number +//! rdi = arg1, rsi = arg2, rdx = arg3 +//! r10 = arg4, r8 = arg5 +//! rax = return value + +use core::arch::{asm, naked_asm}; + +use crate::gdt; +use crate::serial; + +// MSR addresses +const MSR_STAR: u32 = 0xC000_0081; +const MSR_LSTAR: u32 = 0xC000_0082; +const MSR_SFMASK: u32 = 0xC000_0084; + +// Syscall numbers +pub const SYS_DEBUG_PRINT: u64 = 14; +pub const SYS_EXIT: u64 = 5; +pub const SYS_YIELD: u64 = 6; + +// Per-CPU kernel/user stack pointers (single-core) +static mut KERNEL_RSP: u64 = 0; +static mut USER_RSP: u64 = 0; + +/// Initialize the syscall mechanism by writing MSRs. +pub fn init(kernel_stack: u64) { + unsafe { + KERNEL_RSP = kernel_stack; + } + + // STAR: syscall CS/SS in bits 47:32, sysret base in 63:48 + // syscall: CS = 0x08 (kernel code), SS = 0x08+8 = 0x10 + // sysret: SS = 0x10+8 = 0x18 (user data), CS = 0x10+16 = 0x20 (user code) + // CPU adds RPL=3 for sysret automatically + let star: u64 = (0x0010u64 << 48) | (0x0008u64 << 32); + + // LSTAR: address of syscall entry point + let lstar = syscall_entry as *const () as u64; + + // SFMASK: clear IF (bit 9) on syscall entry + let sfmask: u64 = 0x200; + + unsafe { + wrmsr(MSR_STAR, star); + wrmsr(MSR_LSTAR, lstar); + wrmsr(MSR_SFMASK, sfmask); + + // Enable SCE (System Call Extensions) in EFER + let efer = rdmsr(0xC000_0080); + wrmsr(0xC000_0080, efer | 1); + } +} + +/// The raw syscall entry point (naked function). +/// CPU state on entry: +/// RCX = user RIP +/// R11 = user RFLAGS +/// RSP = user stack (untrusted!) +#[unsafe(no_mangle)] +#[unsafe(naked)] +unsafe extern "C" fn syscall_entry() { + naked_asm!( + // Save user RSP, load kernel RSP + // (no swapgs — single-core, use absolute addresses) + "mov [{user_rsp}], rsp", + "mov rsp, [{kern_rsp}]", + + // Build a frame for the dispatcher + "push rcx", // user RIP + "push r11", // user RFLAGS + "push rbp", + "push rbx", + "push r12", + "push r13", + "push r14", + "push r15", + + // Shuffle args: rax=nr→rdi, rdi→rsi, rsi→rdx, + // rdx→rcx, r10→r8, r8→r9 + "mov r9, r8", + "mov r8, r10", + "mov rcx, rdx", + "mov rdx, rsi", + "mov rsi, rdi", + "mov rdi, rax", + "call syscall_dispatch", + // Return value is in rax + + // Restore + "pop r15", + "pop r14", + "pop r13", + "pop r12", + "pop rbx", + "pop rbp", + "pop r11", // user RFLAGS + "pop rcx", // user RIP + + // Restore user RSP + "mov rsp, [{user_rsp}]", + + "sysretq", + + user_rsp = sym USER_RSP, + kern_rsp = sym KERNEL_RSP, + ); +} + +/// Syscall dispatcher (called from assembly). +#[unsafe(no_mangle)] +extern "C" fn syscall_dispatch( + nr: u64, + arg1: u64, + arg2: u64, + _arg3: u64, + _arg4: u64, + _arg5: u64, +) -> u64 { + match nr { + SYS_DEBUG_PRINT => { + // arg1 = pointer to string, arg2 = length + // Safety: we trust the pointer for now (kernel + // tasks). A real implementation would validate. + let ptr = arg1 as *const u8; + let len = arg2 as usize; + if len <= 256 { + for i in 0..len { + let b = unsafe { *ptr.add(i) }; + if b == b'\n' { + serial::putc(b'\r'); + } + serial::putc(b); + } + } + 0 + } + SYS_EXIT => { + crate::sched::exit(); + } + SYS_YIELD => { + crate::sched::yield_now(); + 0 + } + _ => { + serial::print("syscall: unknown nr="); + print_num(nr); + serial::print("\n"); + u64::MAX // error + } + } +} + +/// Jump to Ring 3 and execute `entry` with `user_stack`. +/// Does not return. +pub fn jump_to_usermode( + entry: u64, + user_stack: u64, +) -> ! { + unsafe { + asm!( + "cli", + // Set up iretq frame on stack: + // SS, RSP, RFLAGS, CS, RIP + "push {ss}", // user SS + "push {rsp_u}", // user RSP + "push 0x202", // RFLAGS (IF=1) + "push {cs}", // user CS + "push {rip_u}", // user RIP + "iretq", + ss = in(reg) gdt::USER_DATA as u64, + rsp_u = in(reg) user_stack, + cs = in(reg) gdt::USER_CODE as u64, + rip_u = in(reg) entry, + options(noreturn), + ); + } +} + +unsafe fn wrmsr(msr: u32, val: u64) { + let lo = val as u32; + let hi = (val >> 32) as u32; + unsafe { + asm!( + "wrmsr", + in("ecx") msr, + in("eax") lo, + in("edx") hi, + options(nostack), + ); + } +} + +unsafe fn rdmsr(msr: u32) -> u64 { + let lo: u32; + let hi: u32; + unsafe { + asm!( + "rdmsr", + in("ecx") msr, + out("eax") lo, + out("edx") hi, + options(nostack), + ); + } + ((hi as u64) << 32) | lo as u64 +} + +fn print_num(val: u64) { + if val == 0 { + serial::putc(b'0'); + return; + } + let mut buf = [0u8; 20]; + let mut n = val; + let mut i = 0; + while n > 0 { + buf[i] = b'0' + (n % 10) as u8; + n /= 10; + i += 1; + } + while i > 0 { + i -= 1; + serial::putc(buf[i]); + } +}