You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

176 lines
6.1 KiB
ArmAsm

#include "csr.h"
3 years ago
.section .stack
stack_bottom:
.space 4096
stack_top:
// put the startup code in a special section so that the linker can position it at the start of the binary
.section .text._start
3 years ago
// tell the linker that init is a function located elsewhere
3 years ago
.extern init
.type init, @function
.global _start
_start:
// setup mie register, enable timer and software interrupts targeting machine mode
// mie[7] MTIE = 1 - enable timer interrupts
// mie[3] MSIE = 1 - enable software interrupts
li a0, 0x88
csrw CSR_MIE, a0 // write to mie csr
// load trap vector address into a0
la a0, trap_vector
csrw CSR_MTVEC, a0 // write to mtvec csr
3 years ago
// enable interrupts in mstatus
// mstatus[07] MPIE = 1 - we want to enable interrupts with mret
3 years ago
li a0, 0x80
csrw CSR_MSTATUS, a0 // write to mstatus csr
3 years ago
.option push
.option norelax
// init sp and gp
la sp, stack_top
la gp, __global_pointer$
3 years ago
.option pop
// clear kernel bss section
mv a0, zero
la a1, _bss_start
la a2, _bss_end
jal memset
// jump to init
3 years ago
jal init
// halt machine after returning from init
li t0, -1
csrw CSR_HALT, t0
// if the halt CSR somehow didn't exit immediately trap execution in this infinite loop
1:
j 1b
3 years ago
.extern trap_handle
.type trap_handle, @function
3 years ago
.align 4
trap_vector:
// save all registers into the PCB struct
// switch contents of t6 with contents of mscratch
// mscratch holds the PCBs regs field address
csrrw t6, CSR_MSCRATCH, t6
3 years ago
sw ra, 0(t6)
sw sp, 4(t6)
sw gp, 8(t6)
sw tp, 12(t6)
sw t0, 16(t6)
sw t1, 20(t6)
sw t2, 24(t6)
sw s0, 28(t6)
sw s1, 32(t6)
sw a0, 36(t6)
sw a1, 40(t6)
sw a2, 44(t6)
sw a3, 48(t6)
sw a4, 52(t6)
sw a5, 56(t6)
sw a6, 60(t6)
sw a7, 64(t6)
sw s2, 68(t6)
sw s3, 72(t6)
sw s4, 76(t6)
sw s5, 80(t6)
sw s6, 84(t6)
sw s7, 88(t6)
sw s8, 92(t6)
sw s9, 96(t6)
sw s10, 100(t6)
sw s11, 104(t6)
sw t3, 108(t6)
sw t4, 112(t6)
sw t5, 116(t6)
mv a0, t6 // save struct address to already saved register
csrrw t6, CSR_MSCRATCH, t6 // load original t6 register from mscratch
3 years ago
sw t6, 120(a0) // save original t6 register
// save mepc to pc field in pcb
csrr t6, CSR_MEPC
sw t6, -4(a0)
// load mcause and mtval values in the correct registers for call to trap_handle function
csrr a1, CSR_MCAUSE
3 years ago
srli a0, a1, 31
slli a1, a1, 1
srli a1, a1, 1
csrr a2, CSR_MTVAL
3 years ago
// reinit sp and gp
.option push
.option norelax
la sp, stack_top
la gp, _gp
3 years ago
.option pop
jal trap_handle
// make memset global
.global memset
.type memset, @function
#ifdef __risc_no_ext
// "dumb" memset, if RV32M is not present on the target
// since memset is currently only used at startup, the performance implications
// should be minimal.
memset:
bge a1, a2, 2f
1:
sw a0, 0(a1)
addi a1, a1, 4
blt a1, a2, 1b
2:
ret
#else
// "smart" memset, writing 32 bytes at a time. uses RV32M. If not present,
// the "dumb" fallback above is used.
// write a0 to memory starting at a1, until a2 (both must be four byte aligned)
// this uses a loop which writes 32 (numbytes) bytes at a time
// to prevent overshooting the end, we first calulate how many instructions to
// skip of the first iteration of the loop. this way, (a2 - a1) is a multiple of
// (numbytes) when we reach the blt instruction for the first time.
// this math works so good, because we write 4 bytes of mem, in 4 bytes of
// instructions. Therefore instruction bytes to skip = write bytes to skip
// bytes to skip = numbytes - ((a2 - a1) % numbytes)
memset:
sub t1, a2, a1 // t1 = a2 - a1
li t2, 32 // = numbytes
rem t1, t1, t2 // t1 = (a2 - a1) % numbytes
beq zero, t1, 1f // skip 0 bytes? => begin loop
sub t2, t2, t1 // t2 = numbytes - ((a2 - a1) % numbytes)
// = bytes to skip
sub a1, a1, t2 // subtract skipped bytes from a2
// to account for the skipped instruction
// when we reach the addi, a1, a1, 32 inst.
auipc t1, 0 // get current address
add t1, t2, t1 // add calulated offset
jalr zero, t1, 12 // skip the instructions by forward-jumping
// the 12 is added to compensate for the
// three instructions auipc, add, jalr
1:
sw a0, 0(a1)
sw a0, 4(a1)
sw a0, 8(a1)
sw a0, 12(a1)
sw a0, 16(a1)
sw a0, 20(a1)
sw a0, 24(a1)
sw a0, 28(a1)
addi a1, a1, 32
blt a1, a2, 1b
ret
#endif
// this is where instantiated threads return to once they are finished
.section .thread_fini
.global thread_finalizer
thread_finalizer:
// just a simple exit syscall
nop
li a7, 5
ecall