#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "kernel_int.h"
#include "ral_export.h"
#include "printf.h"
#include "kernel.h"
#include "timers.h"
#include "entry.h"
#include "heap.h"
#include "irqs.h"
#include "emu.h"
#include "mpu.h"
#include "dal.h"
#include "ral.h"


//stack top (provided by linker)
extern void __stack_top();


#define IRQPRIO_HWSVCS					0	//hardware services. cannot make syscalls or expect to read accurate time! never disabled
#define IRQPRIO_SYSTICK					1	//never disabled, shoudl not do any syscalls or affect the OS in any way. timekeeping only please
#define IRQPRIO_USAGE_FAULT				2
#define IRQPRIO_BUS_FAULT				2
#define IRQPRIO_MEMMANGE_FAULT			2
#define IRQPRIO_SVCALL					3
#define IRQPRIO_SCHED_TIMER				4	//this irq level and higher are actually disabled by irqsAllOff()
#define IRQPRIO_HW_IRQS_LOW_LIMIT		5
#define IRQPRIO_HW_IRQS_HIGH_LIMIT		14
#define IRQPRIO_PENDSV					15

#if __NVIC_PRIO_BITS < 4
	#error "not enough prio bits"
#endif


#ifdef CUSTOM_MMU_FAULT_HANDLER

	extern bool CUSTOM_MMU_FAULT_HANDLER(struct CortexExcFrame *exc, uint32_t addr);	//true to continue execution as if no fault happened

#endif


static void hwrInit(void)
{
	const struct MachInitDataInterrupts irqInfo = {
		.schedulingTimerPrio = IRQPRIO_SCHED_TIMER,
		.lowestAllowablePrio = IRQPRIO_HW_IRQS_LOW_LIMIT,
		.highestAllowablePrio = IRQPRIO_HW_IRQS_HIGH_LIMIT,
		.hardwareServicesPrio = IRQPRIO_HWSVCS,
	};
	uint32_t i;
	
	//fuck priority grouping
	NVIC_SetPriorityGrouping(0);
	
	//we need systick to be the highest prio thing possible
	NVIC_SetPriority(SysTick_IRQn, IRQPRIO_SYSTICK);
	
	//usage fault needs to be high because kernel may cause it using "T" memory accesses
	NVIC_SetPriority(UsageFault_IRQn, IRQPRIO_USAGE_FAULT);
	NVIC_SetPriority(BusFault_IRQn, IRQPRIO_BUS_FAULT);
	NVIC_SetPriority(MemoryManagement_IRQn, IRQPRIO_MEMMANGE_FAULT);
	
	//syscalls are high prio (so real irq cannot preempt them) - they are fast and need to be atomic
	NVIC_SetPriority(SVCall_IRQn, IRQPRIO_SVCALL);
	
	//hole: lowest for timer used for scheduling, the rest are for hard irqs
	
	//pendsv is lowest prio and does actual rescheduling (it assumes it saves PSP, so it MUST be lowest prio)
	NVIC_SetPriority(PendSV_IRQn, IRQPRIO_PENDSV);
	
	//set all HW ints we do not know about to low prio (highest prio val)
	for (i = 0; i < CPU_NUM_IRQS; i++)
		NVIC_SetPriority((IRQn_Type)i, IRQPRIO_HW_IRQS_HIGH_LIMIT);
	
	//export funcs
	if (!ralSetRePalmTabFunc(REPALM_FUNC_IDX_GET_CLOCK_RATE, cpuGetClockRate) ||
			!ralSetRePalmTabFunc(REPALM_FUNC_IDX_GET_TIMER_VAL, timerGetTime))
		fatal("cannot export DAL funcs\n");
	
	//let mach driver set prios it cares about higher
	machInit(STAGE_INIT_INTERRUPTS, &irqInfo);
	
	//ints on
	(void)irqsAllOn();
}

static void generalFaultHandlerShowSpWords(uint32_t *words, uint32_t nwords)
{
	uint32_t i;
	
	for (i = 0; i < nwords; i++)
		loge("    [SP + 0x%03x ( == 0x%08x) ] = 0x%08x\n", i * 4, words + i, words[i]);
}

static void generalFaultHandlerLogSpWords(char *line, uint32_t *words, uint32_t nwords)
{
	while (nwords >= 3) {
		spr(line + strlen(line), "%08x %08x %08x\n", words[0], words[1], words[2]);
		nwords -= 3;
		words +=3;
	}
}

static bool handleFaultSilently(struct CortexExcFrame *exc, struct CortexPushedRegs *pushedRegs, uint32_t excNum)	//return true if it was handled
{
	extern uint8_t user_access_0[], user_access_1[], user_access_2[], user_access_3[];
	extern bool userAccessFaultReturnPath(void);
	
	if (excNum == CORTEX_EXC_NUM_MEM_MANAGE_FAULT || excNum == CORTEX_EXC_NUM_BUS_FAULT) {
		if ((exc->pc == (uintptr_t)&user_access_0) || (exc->pc == (uintptr_t)&user_access_1) || (exc->pc == (uintptr_t)&user_access_2) || (exc->pc == (uintptr_t)&user_access_3)) {
			exc->pc = ((uint32_t)&userAccessFaultReturnPath) &~ 1;
			SCB->CFSR = SCB->CFSR;	//clear state
			return true;
		}
	}
	
	return false;
}


static void faultLogMMFSR(char *line, uint32_t cfsr, uint32_t mmfar)
{
	uint32_t t = (cfsr & SCB_CFSR_MEMFAULTSR_Msk) >> SCB_CFSR_MEMFAULTSR_Pos;
	
	if (t & 0x80)
		spr(line + strlen(line), "MMFSR %02x MMFAR %08x\n", t, mmfar);
	else
		spr(line + strlen(line), "MMFSR %02x\n", t);
}

static void faultShowMMFSR(const char* subtitle, uint32_t cfsr, uint32_t mmfar)
{
	uint32_t t = (cfsr & SCB_CFSR_MEMFAULTSR_Msk) >> SCB_CFSR_MEMFAULTSR_Pos;
	loge("%sMMFSR=0x%02x (%s%s%s%s%s%s) MMFAR=0x%08x CFSR=0x%08x\n", subtitle, t,
			(t & 0x80) ? "MMARVALID," : "",
			(t & 0x20) ? "MLSPERR," : "",
			(t & 0x10) ? "MSTKERR," : "",
			(t & 0x08) ? "MUNSTKERR," : "",
			(t & 0x02) ? "DACCVIOL," : "",
			(t & 0x01) ? "IACCVIOL," : "",
			mmfar, cfsr);
}

static void faultLogBFSR(char *line, uint32_t cfsr, uint32_t bfar)
{
	uint32_t t = (cfsr & SCB_CFSR_BUSFAULTSR_Msk) >> SCB_CFSR_BUSFAULTSR_Pos;
	
	if (t & 0x80)
		spr(line + strlen(line), "BFSR %02x BFAR %08x\n", t, bfar);
	else
		spr(line + strlen(line), "BFSR %02x\n", t);
}

static void faultShowBFSR(const char* subtitle, uint32_t cfsr, uint32_t bfar)
{
	uint32_t t = (cfsr & SCB_CFSR_BUSFAULTSR_Msk) >> SCB_CFSR_BUSFAULTSR_Pos;
	loge("%sBFSR=%02x (%s%s%s%s%s%s%s) BFAR=0x%08x CFSR=0x%08x\n", subtitle, t,
			(t & 0x80) ? "BFARVALID," : "",
			(t & 0x20) ? "LSPERR," : "",
			(t & 0x10) ? "STKERR," : "",
			(t & 0x08) ? "UNSTKERR," : "",
			(t & 0x04) ? "IMPRECISERR," : "",
			(t & 0x02) ? "PRECISERR," : "",
			(t & 0x01) ? "IBUSERR," : "",
			bfar, cfsr);
}

static void faultLogUFSR(char *line, uint32_t cfsr)
{
	uint32_t t = (cfsr & SCB_CFSR_USGFAULTSR_Msk) >> SCB_CFSR_USGFAULTSR_Pos;
	
	spr(line + strlen(line), "UFSR %04x\n", t);
}

static void faultShowUFSR(const char* subtitle, uint32_t cfsr)
{
	uint32_t t = (cfsr & SCB_CFSR_USGFAULTSR_Msk) >> SCB_CFSR_USGFAULTSR_Pos;
	loge("%sUFSR=0x%04x (%s%s%s%s%s%s) CFSR=0x%08x\n", subtitle, t,
			(t & 0x0200) ? "DIVBYZERO," : "",
			(t & 0x0100) ? "UNALIGNED," : "",
			(t & 0x0008) ? "NOCP," : "",
			(t & 0x0004) ? "INVPC," : "",
			(t & 0x0002) ? "INVSTATE," : "",
			(t & 0x0001) ? "UNDEFINSTR," : "",
			cfsr);
}

static void __attribute__((used)) generalFaultHandler(struct CortexExcFrame *exc, struct CortexPushedRegs *pushedRegs, uint32_t ipsr)
{
	uint32_t mmfar, bfar, hfsr, cfsr;
	struct EmuCpuState* ctx = NULL;
	char *msg, *line;

	mmfar = SCB->MMFAR;
	bfar = SCB->BFAR;
	cfsr = SCB->CFSR;
	hfsr = SCB->HFSR;
	SCB->CFSR = cfsr;	//clear status
	SCB->HFSR = hfsr;	//clear status
		
	#ifdef CUSTOM_MMU_FAULT_HANDLER
	
		//0x82 = MMARVALID + DACCVIOL
		if (ipsr == CORTEX_EXC_NUM_MEM_MANAGE_FAULT && (cfsr & 0xbb) == 0x82 && CUSTOM_MMU_FAULT_HANDLER(exc, mmfar))
			return;
	
	#endif
	
	if (handleFaultSilently(exc, pushedRegs, ipsr))
		return;
	
	asm volatile("cpsid i");	//XXX: let us report....
	
	msg = halErrorGetBuffer();
	
	switch (ipsr) {
		case CORTEX_EXC_NUM_NMI:
			loge("NMI FAULT\n");
			spr(msg, "FAULT: %s", "NMI");
			line = msg + strlen(msg) + 1;
			line[0] = 0;
			break;
		
		case CORTEX_EXC_NUM_HARD_FAULT:
			loge("HARD FAULT HFSR=0x%08x (%s%s%s)\n", hfsr,
				(hfsr & 0x80000000) ? "DEBUGEVT," : "",
				(hfsr & 0x40000000) ? "FORCED," : "",
				(hfsr & 0x00000002) ? "VECTTBL," : "");
			spr(msg, "HARD:%08x", hfsr);
			line = msg + strlen(msg) + 1;
			line[0] = 0;
			
			switch (cfsr & 0x00008080) {	//have A VALID ADDR? LOG THE HARD WAY (assume only one addr)
				
				case 0x8080:	//both: unlikely
					spr(line + strlen(line), "%08x %08x %08x\n", cfsr, mmfar, bfar);
					break;
				
				case 0x8000:	//have bfar
					spr(line + strlen(line), "%08x ba=%08x\n", cfsr, bfar);
					break;
				
				case 0x0080:	//have mmfar
					spr(line + strlen(line), "%08x ma=%08x\n", cfsr, mmfar);
					break;
				
				default:
					spr(line + strlen(line), "CFSR: %08x\n", cfsr);
			}
			
			faultShowMMFSR("\t\t", cfsr, mmfar);
			faultShowBFSR("\t\t", cfsr, bfar);
			faultShowUFSR("\t\t", cfsr);
			break;

		case CORTEX_EXC_NUM_MEM_MANAGE_FAULT:
			faultShowMMFSR("MEM MANAGE FAULT ", cfsr, mmfar);
			spr(msg, "MEM: %08x");
			line = msg + strlen(msg) + 1;
			line[0] = 0;
			faultLogMMFSR(line, cfsr, mmfar);
			break;

		case CORTEX_EXC_NUM_BUS_FAULT:
			faultShowBFSR("BUS FAULT ", cfsr, bfar);
			spr(msg, "BUS: %08x", cfsr);
			line = msg + strlen(msg) + 1;
			line[0] = 0;
			faultLogBFSR(line, cfsr, bfar);
			break;
		
		case CORTEX_EXC_NUM_USAGE_FAULT:
			faultShowUFSR("USAGE FAULT UFSR", cfsr);
			spr(msg, "USG: %08x", cfsr);
			line = msg + strlen(msg) + 1;
			line[0] = 0;
			break;
		
		case CORTEX_EXC_NUM_DEBUG_MON:
			loge("DEBUG EXCEPTION\n");
			spr(msg, "FAULT: %s", "DBG");
			line = msg + strlen(msg) + 1;
			line[0] = 0;
			break;
		
		default:
			loge("UNKNOWN FAULT (ipsr = 0x0%x)\n", ipsr);
			spr(msg, "FAULT: %xh", ipsr);
			line = msg + strlen(msg) + 1;
			line[0] = 0;
			break;
	}
	
	loge("  SR  = 0x%08x\n", exc->sr);
	loge("  R0  = 0x%08x    R8  = 0x%08x\n", exc->r0, pushedRegs->regs8_11[8  - 8]);
	loge("  R1  = 0x%08x    R9  = 0x%08x\n", exc->r1, pushedRegs->regs8_11[9  - 8]);
	loge("  R2  = 0x%08x    R10 = 0x%08x\n", exc->r2, pushedRegs->regs8_11[10 - 8]);
	loge("  R3  = 0x%08x    R11 = 0x%08x\n", exc->r3, pushedRegs->regs8_11[11 - 8]);
	loge("  R4  = 0x%08x    R12 = 0x%08x\n", pushedRegs->regs4_7[4 - 4], exc->r12);
	loge("  R5  = 0x%08x    SP  = 0x%08x\n", pushedRegs->regs4_7[5 - 4], exc + 1);
	loge("  R6  = 0x%08x    LR  = 0x%08x\n", pushedRegs->regs4_7[6 - 4], exc->lr);
	loge("  R7  = 0x%08x    PC  = 0x%08x\n", pushedRegs->regs4_7[7 - 4], exc->pc);
	
	spr(line + strlen(line), "sr %08x regs: %08x\n", exc->sr, exc->r0);
	spr(line + strlen(line), "%08x %08x %08x\n", exc->r1, exc->r2, exc->r3);
	spr(line + strlen(line), "%08x %08x %08x\n", pushedRegs->regs4_7[4 - 4], pushedRegs->regs4_7[5 - 4], pushedRegs->regs4_7[6 - 4]);
	spr(line + strlen(line), "%08x %08x %08x\n", pushedRegs->regs4_7[7 - 4], pushedRegs->regs8_11[8  - 8], pushedRegs->regs8_11[9  - 8]);
	spr(line + strlen(line), "%08x %08x %08x\n", pushedRegs->regs8_11[10 - 8], pushedRegs->regs8_11[10 - 8], exc->r12);
	spr(line + strlen(line), "%08x_%08x_%08x\n", exc + 1, exc->lr, exc->pc);	//underscores show where regs and and SP dump begins
	
	loge("  some words at SP:\n");
	generalFaultHandlerShowSpWords((uint32_t*)(exc + 1), 128);
	generalFaultHandlerLogSpWords(line, (uint32_t*)(exc + 1), 96);
	
	impl_HALErrDisplay(msg, false, NULL, false);	//let it draw
	
	#ifdef EXPLICIT_EMU_CTX
	
		ctx = schedGetCurEmuContextFromFaultContext();
		
	#endif
	#ifdef IMPLICIT_EMU_CTX
	
		extern uint8_t emuCpuRunCodeStart[], emuCpuRunCodeEnd[];
		if (exc->pc >= (uintptr_t)emuCpuRunCodeStart && exc->pc < (uintptr_t)emuCpuRunCodeEnd) 
			ctx = (struct EmuCpuState*)pushedRegs->regs4_7[4 - 4];
		
	#endif
	
	if (ctx) {
		loge("IN ARM EMULATION MODE. CONTEXT:\n");
		loge("  SR  = 0x%08x\n", ctx->sr);
		loge("  R0  = 0x%08x    R8  = 0x%08x\n", ctx->regs[0], ctx->regs[ 8]);
		loge("  R1  = 0x%08x    R9  = 0x%08x\n", ctx->regs[1], ctx->regs[ 9]);
		loge("  R2  = 0x%08x    R10 = 0x%08x\n", ctx->regs[2], ctx->regs[10]);
		loge("  R3  = 0x%08x    R11 = 0x%08x\n", ctx->regs[3], ctx->regs[11]);
		loge("  R4  = 0x%08x    R12 = 0x%08x\n", ctx->regs[4], ctx->regs[12]);
		loge("  R5  = 0x%08x    SP  = 0x%08x\n", ctx->regs[5], ctx->regs[13]);
		loge("  R6  = 0x%08x    LR  = 0x%08x\n", ctx->regs[6], ctx->regs[14]);
		loge("  R7  = 0x%08x    PC  = 0x%08x (may be advanced past instr)\n", ctx->regs[7], ctx->regs[15]);
		loge("  some words at emulated SP:\n");
		generalFaultHandlerShowSpWords((uint32_t*)ctx->regs[13], 64);
	}
	
	kernelLogCurTaskForExc();
	
	asm volatile ("cpsid if	\n\t");
	while(1); //asm volatile ("wfi");
}


void BusFault_Handler(void) __attribute__ ((alias ("MemManage_Handler")));		//it is the same so why waste the bytes copying it
void NMI_Handler(void) __attribute__ ((alias ("MemManage_Handler")));			//it is the same so why waste the bytes copying it
void DebugMonitor_Handler(void) __attribute__ ((alias ("MemManage_Handler")));	//it is the same so why waste the bytes copying it
void HardFault_Handler(void) __attribute__ ((alias ("MemManage_Handler")));		//it is the same so why waste the bytes copying it

//called from asm and given an exc frame. cause and extraData unused and need not be passed through
void __attribute__((used,naked)) faultHandlerWithExcFrame(struct CortexExcFrame *exc, uint32_t cause, uint32_t extraData)
{
	asm volatile(
		"	push  {r4-r11,lr}							\n\t"
		"	mov   r1, sp								\n\t"
		"	mrs   r2, IPSR								\n\t"
		"	bl    generalFaultHandler					\n\t"
		"	pop   {r4-r11,pc}							\n\t"
	);
}

void __attribute__((used,naked)) MemManage_Handler(void)
{
	asm volatile(
		"	tst   lr, #4								\n\t"	//see which stack fault was on
		"	ite   eq									\n\t"
		"	mrseq r0, msp								\n\t"	//grab the appropriate SP
		"	mrsne r0, psp								\n\t"
		"	b     faultHandlerWithExcFrame				\n\t"
	);
}

static void kernelFirstTask(void *param)
{
	void (*entryFunc)(void) = (void (*)())param;

	//give the supervisor mode the entire initial stack (it currently has some of it). Safe to do since we're currently on PSP
	asm("msr MSP, %0\n":: "r"(__stack_top));
	
	entryFunc();
	__builtin_unreachable();
}

void kernelInit(void (*entryFunc)(void), void* hyperFuncIfAny)
{
	kstatus_t sta;
	void* stack;
	tid_t tid;
	
	machInit(STAGE_INIT_SET_VTOR, NULL);
	machInit(STAGE_SETUP_HEAPS, NULL);
	
	SCB->CCR = SCB_CCR_DIV_0_TRP_Msk | SCB_CCR_UNALIGN_TRP_Msk
	
		#ifdef SCB_CCR_BP_Msk
			 | SCB_CCR_BP_Msk
		#endif
		#ifdef SCB_CCR_IC_Msk
			 | SCB_CCR_IC_Msk
		#endif
		#ifdef SCB_CCR_DC_Msk
			 | SCB_CCR_DC_Msk
		#endif
	;
	
	SCB->SHCSR = SCB_SHCSR_USGFAULTENA_Msk | SCB_SHCSR_BUSFAULTENA_Msk | SCB_SHCSR_MEMFAULTENA_Msk;
	
	hwrInit();
	timersInit();
	
	if (!mpuInit())
		fatal("MPU init failed\n");
	
	machInit(STAGE_INIT_MPU, NULL);
	
	machInit(STAGE_INIT_PRE_SCHED, NULL);
	if (KERN_STATUS_OK != schedInit())
		fatal("SCHED init failed\n");
	
	stack = kheapAlloc(ZEROTH_TASK_STACK_SZ);
	if (!stack)
		fatal("cannot allocate initial stack\n");
	
	sta = KTaskCreate(CREATE_4CC('t','s','k','0'), (void*)kernelFirstTask, stack, ZEROTH_TASK_STACK_SZ, NULL, SCHED_DEFAULT_PRIO, false, &tid);
	if (sta != KERN_STATUS_OK)
		fatal("cannot create first task with code %u\n", sta);

	sta = schedStart(tid, (void*)entryFunc);
	if (sta != KERN_STATUS_OK)
		fatal("sched start failed with code %u\n", sta);
	
	fatal("kernel sched failed to init\n");
}

kstatus_t kernelInitLate(void)
{
	//nothing yet
	
	return KERN_STATUS_OK;
}
