You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

176 lines
6.1 KiB
ArmAsm

#include "csr.h"
.section .stack
stack_bottom:
.space 4096
stack_top:
// put the startup code in a special section so that the linker can position it at the start of the binary
.section .text._start
// tell the linker that init is a function located elsewhere
.extern init
.type init, @function
.global _start
_start:
// setup mie register, enable timer and software interrupts targeting machine mode
// mie[7] MTIE = 1 - enable timer interrupts
// mie[3] MSIE = 1 - enable software interrupts
li a0, 0x88
csrw CSR_MIE, a0 // write to mie csr
// load trap vector address into a0
la a0, trap_vector
csrw CSR_MTVEC, a0 // write to mtvec csr
// enable interrupts in mstatus
// mstatus[07] MPIE = 1 - we want to enable interrupts with mret
li a0, 0x80
csrw CSR_MSTATUS, a0 // write to mstatus csr
.option push
.option norelax
// init sp and gp
la sp, stack_top
la gp, __global_pointer$
.option pop
// clear kernel bss section
mv a0, zero
la a1, _bss_start
la a2, _bss_end
jal memset
// jump to init
jal init
// halt machine after returning from init
li t0, -1
csrw CSR_HALT, t0
// if the halt CSR somehow didn't exit immediately trap execution in this infinite loop
1:
j 1b
.extern trap_handle
.type trap_handle, @function
.align 4
trap_vector:
// save all registers into the PCB struct
// switch contents of t6 with contents of mscratch
// mscratch holds the PCBs regs field address
csrrw t6, CSR_MSCRATCH, t6
sw ra, 0(t6)
sw sp, 4(t6)
sw gp, 8(t6)
sw tp, 12(t6)
sw t0, 16(t6)
sw t1, 20(t6)
sw t2, 24(t6)
sw s0, 28(t6)
sw s1, 32(t6)
sw a0, 36(t6)
sw a1, 40(t6)
sw a2, 44(t6)
sw a3, 48(t6)
sw a4, 52(t6)
sw a5, 56(t6)
sw a6, 60(t6)
sw a7, 64(t6)
sw s2, 68(t6)
sw s3, 72(t6)
sw s4, 76(t6)
sw s5, 80(t6)
sw s6, 84(t6)
sw s7, 88(t6)
sw s8, 92(t6)
sw s9, 96(t6)
sw s10, 100(t6)
sw s11, 104(t6)
sw t3, 108(t6)
sw t4, 112(t6)
sw t5, 116(t6)
mv a0, t6 // save struct address to already saved register
csrrw t6, CSR_MSCRATCH, t6 // load original t6 register from mscratch
sw t6, 120(a0) // save original t6 register
// save mepc to pc field in pcb
csrr t6, CSR_MEPC
sw t6, -4(a0)
// load mcause and mtval values in the correct registers for call to trap_handle function
csrr a1, CSR_MCAUSE
srli a0, a1, 31
slli a1, a1, 1
srli a1, a1, 1
csrr a2, CSR_MTVAL
// reinit sp and gp
.option push
.option norelax
la sp, stack_top
la gp, _gp
.option pop
jal trap_handle
// make memset global
.global memset
.type memset, @function
#ifdef __risc_no_ext
// "dumb" memset, if RV32M is not present on the target
// since memset is currently only used at startup, the performance implications
// should be minimal.
memset:
bge a1, a2, 2f
1:
sw a0, 0(a1)
addi a1, a1, 4
blt a1, a2, 1b
2:
ret
#else
// "smart" memset, writing 32 bytes at a time. uses RV32M. If not present,
// the "dumb" fallback above is used.
// write a0 to memory starting at a1, until a2 (both must be four byte aligned)
// this uses a loop which writes 32 (numbytes) bytes at a time
// to prevent overshooting the end, we first calulate how many instructions to
// skip of the first iteration of the loop. this way, (a2 - a1) is a multiple of
// (numbytes) when we reach the blt instruction for the first time.
// this math works so good, because we write 4 bytes of mem, in 4 bytes of
// instructions. Therefore instruction bytes to skip = write bytes to skip
// bytes to skip = numbytes - ((a2 - a1) % numbytes)
memset:
sub t1, a2, a1 // t1 = a2 - a1
li t2, 32 // = numbytes
rem t1, t1, t2 // t1 = (a2 - a1) % numbytes
beq zero, t1, 1f // skip 0 bytes? => begin loop
sub t2, t2, t1 // t2 = numbytes - ((a2 - a1) % numbytes)
// = bytes to skip
sub a1, a1, t2 // subtract skipped bytes from a2
// to account for the skipped instruction
// when we reach the addi, a1, a1, 32 inst.
auipc t1, 0 // get current address
add t1, t2, t1 // add calulated offset
jalr zero, t1, 12 // skip the instructions by forward-jumping
// the 12 is added to compensate for the
// three instructions auipc, add, jalr
1:
sw a0, 0(a1)
sw a0, 4(a1)
sw a0, 8(a1)
sw a0, 12(a1)
sw a0, 16(a1)
sw a0, 20(a1)
sw a0, 24(a1)
sw a0, 28(a1)
addi a1, a1, 32
blt a1, a2, 1b
ret
#endif
// this is where instantiated threads return to once they are finished
.section .thread_fini
.global thread_finalizer
thread_finalizer:
// just a simple exit syscall
nop
li a7, 5
ecall