commit - 28328e7600e718a0b0b419e39f5bcdfe01cd17f4
commit + 69cdec6c61e7866110f917f1f5bf4d3381b029a5
blob - /dev/null
blob + 19c5e1cb6f1699b2130d43d1e633ee098d376a8c (mode 644)
--- /dev/null
+++ src/syscall.rs
+// vim: set tw=79 cc=80 ts=4 sw=4 sts=4 et :
+//
+// Copyright (c) 2025-2026 Murilo Ijanc' <murilo@ijanc.org>
+//
+// Permission to use, copy, modify, and/or distribute this software for any
+// purpose with or without fee is hereby granted, provided that the above
+// copyright notice and this permission notice appear in all copies.
+//
+// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+//
+
+//! syscall/sysret interface for x86-64.
+//!
+//! ABI:
+//! rax = syscall number
+//! rdi = arg1, rsi = arg2, rdx = arg3
+//! r10 = arg4, r8 = arg5
+//! rax = return value
+
+use core::arch::{asm, naked_asm};
+
+use crate::gdt;
+use crate::serial;
+
+// MSR addresses
+const MSR_STAR: u32 = 0xC000_0081;
+const MSR_LSTAR: u32 = 0xC000_0082;
+const MSR_SFMASK: u32 = 0xC000_0084;
+
+// Syscall numbers
+pub const SYS_DEBUG_PRINT: u64 = 14;
+pub const SYS_EXIT: u64 = 5;
+pub const SYS_YIELD: u64 = 6;
+
+// Per-CPU kernel/user stack pointers (single-core)
+static mut KERNEL_RSP: u64 = 0;
+static mut USER_RSP: u64 = 0;
+
+/// Initialize the syscall mechanism by writing MSRs.
+pub fn init(kernel_stack: u64) {
+ unsafe {
+ KERNEL_RSP = kernel_stack;
+ }
+
+ // STAR: syscall CS/SS in bits 47:32, sysret base in 63:48
+ // syscall: CS = 0x08 (kernel code), SS = 0x08+8 = 0x10
+ // sysret: SS = 0x10+8 = 0x18 (user data), CS = 0x10+16 = 0x20 (user code)
+ // CPU adds RPL=3 for sysret automatically
+ let star: u64 = (0x0010u64 << 48) | (0x0008u64 << 32);
+
+ // LSTAR: address of syscall entry point
+ let lstar = syscall_entry as *const () as u64;
+
+ // SFMASK: clear IF (bit 9) on syscall entry
+ let sfmask: u64 = 0x200;
+
+ unsafe {
+ wrmsr(MSR_STAR, star);
+ wrmsr(MSR_LSTAR, lstar);
+ wrmsr(MSR_SFMASK, sfmask);
+
+ // Enable SCE (System Call Extensions) in EFER
+ let efer = rdmsr(0xC000_0080);
+ wrmsr(0xC000_0080, efer | 1);
+ }
+}
+
+/// The raw syscall entry point (naked function).
+/// CPU state on entry:
+/// RCX = user RIP
+/// R11 = user RFLAGS
+/// RSP = user stack (untrusted!)
+#[unsafe(no_mangle)]
+#[unsafe(naked)]
+unsafe extern "C" fn syscall_entry() {
+ naked_asm!(
+ // Save user RSP, load kernel RSP
+ // (no swapgs — single-core, use absolute addresses)
+ "mov [{user_rsp}], rsp",
+ "mov rsp, [{kern_rsp}]",
+
+ // Build a frame for the dispatcher
+ "push rcx", // user RIP
+ "push r11", // user RFLAGS
+ "push rbp",
+ "push rbx",
+ "push r12",
+ "push r13",
+ "push r14",
+ "push r15",
+
+ // Shuffle args: rax=nr→rdi, rdi→rsi, rsi→rdx,
+ // rdx→rcx, r10→r8, r8→r9
+ "mov r9, r8",
+ "mov r8, r10",
+ "mov rcx, rdx",
+ "mov rdx, rsi",
+ "mov rsi, rdi",
+ "mov rdi, rax",
+ "call syscall_dispatch",
+ // Return value is in rax
+
+ // Restore
+ "pop r15",
+ "pop r14",
+ "pop r13",
+ "pop r12",
+ "pop rbx",
+ "pop rbp",
+ "pop r11", // user RFLAGS
+ "pop rcx", // user RIP
+
+ // Restore user RSP
+ "mov rsp, [{user_rsp}]",
+
+ "sysretq",
+
+ user_rsp = sym USER_RSP,
+ kern_rsp = sym KERNEL_RSP,
+ );
+}
+
+/// Syscall dispatcher (called from assembly).
+#[unsafe(no_mangle)]
+extern "C" fn syscall_dispatch(
+ nr: u64,
+ arg1: u64,
+ arg2: u64,
+ _arg3: u64,
+ _arg4: u64,
+ _arg5: u64,
+) -> u64 {
+ match nr {
+ SYS_DEBUG_PRINT => {
+ // arg1 = pointer to string, arg2 = length
+ // Safety: we trust the pointer for now (kernel
+ // tasks). A real implementation would validate.
+ let ptr = arg1 as *const u8;
+ let len = arg2 as usize;
+ if len <= 256 {
+ for i in 0..len {
+ let b = unsafe { *ptr.add(i) };
+ if b == b'\n' {
+ serial::putc(b'\r');
+ }
+ serial::putc(b);
+ }
+ }
+ 0
+ }
+ SYS_EXIT => {
+ crate::sched::exit();
+ }
+ SYS_YIELD => {
+ crate::sched::yield_now();
+ 0
+ }
+ _ => {
+ serial::print("syscall: unknown nr=");
+ print_num(nr);
+ serial::print("\n");
+ u64::MAX // error
+ }
+ }
+}
+
+/// Jump to Ring 3 and execute `entry` with `user_stack`.
+/// Does not return.
+pub fn jump_to_usermode(
+ entry: u64,
+ user_stack: u64,
+) -> ! {
+ unsafe {
+ asm!(
+ "cli",
+ // Set up iretq frame on stack:
+ // SS, RSP, RFLAGS, CS, RIP
+ "push {ss}", // user SS
+ "push {rsp_u}", // user RSP
+ "push 0x202", // RFLAGS (IF=1)
+ "push {cs}", // user CS
+ "push {rip_u}", // user RIP
+ "iretq",
+ ss = in(reg) gdt::USER_DATA as u64,
+ rsp_u = in(reg) user_stack,
+ cs = in(reg) gdt::USER_CODE as u64,
+ rip_u = in(reg) entry,
+ options(noreturn),
+ );
+ }
+}
+
+unsafe fn wrmsr(msr: u32, val: u64) {
+ let lo = val as u32;
+ let hi = (val >> 32) as u32;
+ unsafe {
+ asm!(
+ "wrmsr",
+ in("ecx") msr,
+ in("eax") lo,
+ in("edx") hi,
+ options(nostack),
+ );
+ }
+}
+
+unsafe fn rdmsr(msr: u32) -> u64 {
+ let lo: u32;
+ let hi: u32;
+ unsafe {
+ asm!(
+ "rdmsr",
+ in("ecx") msr,
+ out("eax") lo,
+ out("edx") hi,
+ options(nostack),
+ );
+ }
+ ((hi as u64) << 32) | lo as u64
+}
+
+fn print_num(val: u64) {
+ if val == 0 {
+ serial::putc(b'0');
+ return;
+ }
+ let mut buf = [0u8; 20];
+ let mut n = val;
+ let mut i = 0;
+ while n > 0 {
+ buf[i] = b'0' + (n % 10) as u8;
+ n /= 10;
+ i += 1;
+ }
+ while i > 0 {
+ i -= 1;
+ serial::putc(buf[i]);
+ }
+}