#ifndef _M0_FAULT_DISPATCH_
#define _M0_FAULT_DISPATCH_

/*

	comes in a few pieces so you can insert your own things in the middle
	do not include more than once or shit will not link...duh

	define three things to include more processing
	
	CODE_TO_GO_TO_ARM
	
		code to jump to your own way to do arm
		 * r0 contains exc frame
		 * r2 contains the ARM pc
		 * r1, r3, r12 are all yours
		 * LR is whatever you need to return from this exception
		 * numeric labels 5 andlater are yours
	
	CODE_GOTO_ARM_PC_IN_EXC_VALID
		same as the above, except exc->pc already has the arm PC
		 * BUT, r2 does not!!!
	
	CODE_FOR_UNDEF_32BIT_INSTR
		
		code to run in case of an undefined 32-bit instr
		 * r0 contains exc frame
		 * r1 has first halfword
		 * r2 has second halfword
		 * r3, r12 are all yours
		 * LR is whatever you need to return from this exception
		 * numeric labels 5 and later are yours
	
	CODE_FOR_POSSIBLY_UNDEF_16BIT_INSTR (could also be defined, up to you to check!)
		 * r0 contains exc frame
		 * r1 has the instr (do not clobber it)
		 * r2 has pc
		 * r2, r3, r12 are all yours
		 * LR is whatever you need to return from this exception
		 * numeric labels 5 and later are yours

	EXTRA_ASM_INPUTS
		if you want to use asm inputs, start at 0 and use this variable to define them

*/



#define STR2(x)			#x
#define STR(x)			STR2(x)



static void __attribute__((naked)) callFaultHandlerWithSetHiregs(struct CortexExcFrame* frm, uint32_t cause, uint32_t extraData, struct CortexPushedRegs *pushedRegs)
{
	asm volatile(
		"	ldmia r3!, {r4-r7}						\n\t"
		"	mov   r8, r4							\n\t"
		"	mov   r9, r5							\n\t"
		"	mov   r10, r6							\n\t"
		"	mov   r11, r7							\n\t"
		"	ldmia r3!, {r4-r7}						\n\t"
		"	ldr   r3, =faultHandlerWithExcFrame		\n\t"
		"	bx    r3								\n\t"
		".ltorg										\n\t"
		:::"memory"
	);
}

static uint32_t analyzeInstr16PrvUtilGetReg(const struct CortexExcFrame* frm, struct CortexPushedRegs *moreRegs, uint32_t regNo)
{
	switch (regNo) {
		case 0:		return frm->r0;
		case 1:		return frm->r1;
		case 2:		return frm->r2;
		case 3:		return frm->r3;
		case 4:		//fallthrough
		case 5:		//fallthrough
		case 6:		//fallthrough
		case 7:		return moreRegs->regs4_7[regNo - 4];
		case 8:		//fallthrough
		case 9:		//fallthrough
		case 10:	//fallthrough
		case 11:	return moreRegs->regs8_11[regNo - 8];
		case 12:	return frm->r12;
		case 13:	return (uintptr_t)(frm + 1);
		case 14:	return frm->lr;
		case 15:	return frm->pc + 4;
		default:	return 0xffffffff;
	}
}

static bool load8(uint32_t addr, uint32_t *dstP)
{
	uint32_t tmp;
	bool ret;
	
	asm volatile(
		".syntax unified		\n\t"
		"	movs	%0, #1		\n\t"	//special formulation for our fault handler
		"	ldrb	%1, [%2]	\n\t"
		"	b		1f			\n\t"
		"	movs	%0, #0		\n\t"
		"1:						\n\t"
		"	str		%1, [%3]	\n\t"
	:"=&l"(ret), "=&l"(tmp)
	:"l"(addr), "l"(dstP)
	:"cc", "memory");
	
	return ret;
}

static bool store8(uint32_t addr, uint32_t val)
{
	bool ret;
	
	asm volatile(
		".syntax unified		\n\t"
		"	movs	%0, #1		\n\t"	//special formulation for our fault handler
		"	strb	%2, [%1]	\n\t"
		"	b		1f			\n\t"
		"	movs	%0, #0		\n\t"
		"1:						\n\t"
	:"=&l"(ret)
	:"l"(addr), "l"(val)
	:"cc", "memory");
	
	return ret;
}

static bool load16(uint32_t addr, uint32_t *dstP)
{
	uint32_t tmp;
	bool ret;
	
	asm volatile(
		".syntax unified		\n\t"
		"	movs	%0, #1		\n\t"	//special formulation for our fault handler
		"	ldrh	%1, [%2]	\n\t"
		"	b		1f			\n\t"
		"	movs	%0, #0		\n\t"
		"1:						\n\t"
		"	str		%1, [%3]	\n\t"
	:"=&l"(ret), "=&l"(tmp)
	:"l"(addr), "l"(dstP)
	:"cc", "memory");
	
	return ret;
}

static bool store16(uint32_t addr, uint32_t val)
{
	bool ret;
	
	asm volatile(
		".syntax unified		\n\t"
		"	movs	%0, #1		\n\t"	//special formulation for our fault handler
		"	strh	%2, [%1]	\n\t"
		"	b		1f			\n\t"
		"	movs	%0, #0		\n\t"
		"1:						\n\t"
	:"=&l"(ret)
	:"l"(addr), "l"(val)
	:"cc", "memory");
	
	return ret;
}

static bool load32(uint32_t addr, uint32_t *dstP)
{
	uint32_t tmp;
	bool ret;
	
	asm volatile(
		".syntax unified		\n\t"
		"	movs	%0, #1		\n\t"	//special formulation for our fault handler
		"	ldr		%1, [%2]	\n\t"
		"	b		1f			\n\t"
		"	movs	%0, #0		\n\t"
		"1:						\n\t"
		"	str		%1, [%3]	\n\t"
	:"=&l"(ret), "=&l"(tmp)
	:"l"(addr), "l"(dstP)
	:"cc", "memory");
	
	return ret;
}

static bool store32(uint32_t addr, uint32_t val)
{
	bool ret;
	
	asm volatile(
		".syntax unified		\n\t"
		"	movs	%0, #1		\n\t"	//special formulation for our fault handler
		"	str		%2, [%1]	\n\t"
		"	b		1f			\n\t"
		"	movs	%0, #0		\n\t"
		"1:						\n\t"
	:"=&l"(ret)
	:"l"(addr), "l"(val)
	:"cc", "memory");
	
	return ret;
}

//we also get a pointer to the hiregs since we need to reset them before we call faultHandlerWithExcFrame()
// we get them in pushedRegs
void __attribute__((used)) analyzeInstr16(struct CortexExcFrame* frm, uint16_t instr, struct CortexPushedRegs *pushedRegs)
{
	static const uint8_t popCntTab[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};	//in a nibble
	static bool (*const accessR[])(uint32_t addr, uint32_t *dstP) = {load8, load16, load32};
	static bool (*const accessW[])(uint32_t addr, uint32_t val) = {store8, store16, store32};
	uint_fast8_t excCause = EXC_m0_CAUSE_UNCLASSIFIABLE, accessSzLog = 0;
	uint32_t addr, val, excExtraData = 0, ofst = 0, base = 0, i;
	bool testStore = false;
	
	switch (instr >> 11) {
		case 0b01000:
			if ((instr & 0x05c0) == 0x0500)			//cmp.hi with both lo regs
				excCause = EXC_m0_CAUSE_UNDEFINSTR;
			break;
		
		case 0b01001:	//load from literal pool (alignment guaranteed by instr)
			addr = ((frm->pc + 4) &~ 3) + ((instr & 0xff) << 2);
			if (!load32(addr, &val)) {
				
				excCause = EXC_m0_CAUSE_MEM_ACCESS_FAIL;
				excExtraData = addr;
			}
			break;
		
		case 0b01010:	//load/store register
		case 0b01011:
			switch ((instr >> 9) & 0x07) {
				case 0b000:		//STR
					testStore = true;
					//fallthrough
				case 0b100:		//LDR
					accessSzLog = 2;
					break;
				case 0b001:		//STRH
					testStore = true;
					//fallthrough
				case 0b101:		//LDRH
				case 0b111:		//LDRSH
					accessSzLog = 1;
					break;
				case 0b010:		//STRB
					testStore = true;
					//fallthrough
				case 0b110:		//LDRB
				case 0b011:		//LDRSB
					accessSzLog = 0;
					break;
			}
			ofst = analyzeInstr16PrvUtilGetReg(frm, pushedRegs, (instr >> 6) & 0x07);
			goto load_store_check;
			
		case 0b01100:	//str imm
			testStore = true;
			//fallthrough
		case 0b01101:	//ldr imm
			accessSzLog = 2;
			goto load_store_check_get_imm;

		case 0b01110:	//strb imm
			testStore = true;
			//fallthrough
		case 0b01111:	//ldrb imm
			accessSzLog = 0;
			goto load_store_check_get_imm;

		case 0b10000:	//strh imm
			testStore = true;
			//fallthrough
		
		case 0b10001:	//ldrh imm
			accessSzLog = 1;
			goto load_store_check_get_imm;
		
load_store_check_get_imm:
			ofst = ((instr >> 6) & 0x1f) << accessSzLog;

load_store_check:
			base = analyzeInstr16PrvUtilGetReg(frm, pushedRegs, (instr >> 3) & 0x07);

load_store_check_have_base:
			addr = base + ofst;
			
			if (addr << (32 - accessSzLog)) {
				
				excCause = EXC_m0_CAUSE_UNALIGNED;
				excExtraData = addr;
			}
			else if (!accessR[accessSzLog](addr, &val)) {
				
				excCause = EXC_m0_CAUSE_MEM_ACCESS_FAIL;
				excExtraData = addr;
			}
			else if (testStore && !accessW[accessSzLog](addr, val)) {
				
				excCause = EXC_m0_CAUSE_MEM_ACCESS_FAIL;
				excExtraData = addr;
			}
			//if we have not faulted by now, we do not know why this load/store faulted
			break;
		
		case 0b10010:	//sp-based store
			testStore = true;
			//fallthrough
		
		case 0b10011:	//sp-based load
			ofst = (instr & 0xff) * 4;
			accessSzLog = 3;
			base = analyzeInstr16PrvUtilGetReg(frm, pushedRegs, 13);
			goto load_store_check_have_base;
		
		case 0b10111:
			if ((instr & 0x0700) == 0x0600)
				excCause = EXC_m0_CAUSE_BKPT_HIT;
			//push/pop are here too, but if they fail, we'd fail to stash and not ever get here
			break;
		
		case 0b11000:	//STMIA
			testStore = true;
			//fallthrough
		case 0b11001:	//LDMIA
			base = analyzeInstr16PrvUtilGetReg(frm, pushedRegs, (instr >> 8) & 0x07);
			if (base & 3)							//unaligned base
				excCause = EXC_m0_CAUSE_UNALIGNED;
			else if (!(instr & 0xff))				//LDM/STM with empty reg set
				excCause = EXC_m0_CAUSE_UNDEFINSTR;
			else {
				i = popCntTab[instr & 0x0f] + popCntTab[(instr >> 4) & 0x0f];
				
				do {
					
					addr = base;
					base += 4;
					
					if (!load32(addr, &val)) {
						
						excCause = EXC_m0_CAUSE_MEM_ACCESS_FAIL;
						excExtraData = addr;
					}
					else if (testStore && !store32(addr, val)) {
						
						excCause = EXC_m0_CAUSE_MEM_ACCESS_FAIL;
						excExtraData = addr;
					}
					else
						continue;
					
					break;	//one fault found is enough
					
				} while (--i);
				//if we have not faulted by now, we do not know why this ldm/stm faulted
			}
			break;
		
		case 0b11011:
			if ((instr & 0x0700) == 0x0600)			//UDF
				excCause = EXC_m0_CAUSE_UNDEFINSTR;
			break;
	}
	
	callFaultHandlerWithSetHiregs(frm, excCause, excExtraData, pushedRegs);
}

//using "4" works, except the prioritization is wrong and other IRQs interrupt the code, even PendSV does.
//Changing SHPR regs does nothing to help, but using an actual irq and settin git sprio high does, so we do that
//it must be an irq not used for anything on the system
#define EXC_LVL_USED					(0x10 + M0_FAULT_DISP_IRQ_USED)

void __attribute__((used,naked)) HardFault_Handler(void)
{
	asm volatile(
		
		//grab the appropriate SP
		".syntax unified									\n\t"
		"	mov   r0, lr									\n\t"
		"	lsrs  r0, #3									\n\t"
		"	bcs   1f										\n\t"
		"	mov   r0, sp									\n\t"
		"	b     2f										\n\t"
		"1:													\n\t"
		"	mrs   r0, psp									\n\t"
		"2:													\n\t"
		
		//check for ARM mode (do this first as it is speed critical)
		"	ldr   r1, [r0, #4 * 7]							\n\t"	//load pushed flags
		"	movs  r3, #1									\n\t"
		"	lsls  r3, #24									\n\t"	//T flag
		"	tst   r1, r3									\n\t"	//check for T bit
		"	bne   not_arm_mode								\n\t"	//if it is set, further testing to be done here
		"	orrs  r1, r3									\n\t"
		"	str   r1, [r0, #4 * 7]							\n\t"	//store into pushed flags
		
		CODE_GOTO_ARM_PC_IN_EXC_VALID
		
		//check if re-entry
		"not_arm_mode:										\n\t"
		"	ldr   r2, [r0, #4 * 7]							\n\t"
		"	lsls  r2, #32 - 6								\n\t"
		"	lsrs  r2, #32 - 6								\n\t"
		"	cmp   r2, #" STR(EXC_LVL_USED) "				\n\t"
		"	bne   not_reentry								\n\t"
		
		//is re-entry from our own code only (should only be memory access issues)
		//our memory accesses are crafted specially and we detect that
		//we also assume here that Pc in our exclusive mode is valid
		"	ldr   r3, [r0, #4 * 6]							\n\t"	//exc.PC
		"	ldrh  r1, [r3, #2]								\n\t"	//should be a "B . +4" = 0xe000
		"	movs  r2, #0xe0									\n\t"
		"   lsls  r2, #8									\n\t"
		"	cmp   r1, r2									\n\t"
		"	bne   bug_in_classifier							\n\t"
		"	ldrh  r2, [r3, #4]								\n\t"	//should be a "MOV R?, #??" = 0x46f6
		"	lsrs  r2, #11									\n\t"
		"	cmp   r2, #0x04									\n\t"
		"	bne   bug_in_classifier							\n\t"
		
		//re-entry from our own code - skip the load/store and the next instruction
		"	adds  r3, #4									\n\t"
		"	str   r3, [r0, #4 * 6]							\n\t"	//exc.PC
		"	bx    lr										\n\t"
		
		//re-entry from our code but not a specially-instrumented load/store instr
		"bug_in_classifier:									\n\t"
		"	movs  r1, #" STR(EXC_m0_CAUSE_UNCLASSIFIABLE) "	\n\t"
		"	b     report_fatal								\n\t"
		
		//pc alignment exception
		"pc_align_exc:										\n\t"
		"	movs  r1, #" STR(EXC_m0_CAUSE_UNALIGNED) "		\n\t"	//expects address in r2
		"	b     report_fatal								\n\t"
		
		"access_fail_plus_4:								\n\t"
		"	adds  r2, #2									\n\t"
		"access_fail_plus_2:								\n\t"
		"	adds  r2, #2									\n\t"
		"access_fail:										\n\t"	//expects address in r2
		"	movs  r1, #" STR(EXC_m0_CAUSE_MEM_ACCESS_FAIL) "\n\t"
		"report_fatal:										\n\t"
		"	ldr   r3, =faultHandlerWithExcFrame				\n\t"
		"	bx    r3										\n\t"
		
		//not re-entry - check pc alignment (r3 still has T bit)
		"not_reentry:										\n\t"
		"	ldr   r2, [r0, #4 * 6]							\n\t"	//exc.PC
		"	lsrs  r1, r2, #1								\n\t"	//check pc align
		"	bcs   pc_align_exc								\n\t"
		
		//further checks will take place in another context - go there now
		"   adr   r1, check_more_in_custom_mode				\n\t"	//stashed PC
		"	adds  r3, #" STR(EXC_LVL_USED) "				\n\t"	//desired SR
		"	push  {r1, r3}									\n\t"
		"	mov   r1, r12									\n\t"	//stashed r12
		"	push  {r1, lr}									\n\t"	//and stashed lr
		"	push  {r0-r3}									\n\t"	//stashed r0..r3
		"	movs  r2, #0x0e									\n\t"
		"	mvns  r2, r2									\n\t"	//get an lr to go to (handler mode, main stack) -> 0xfffffff1
		"	bx    r2										\n\t"
		
		//check more in a safer space (r0 = exc; r2 = exc->pc; lr & sp set for direct return to original exc cause)
		
		".balign 4											\n\t"
		"check_more_in_custom_mode:							\n\t"
		"	cmp   r0, r0									\n\t"	//set Z
		"	ldrh  r1, [r2]									\n\t"	//load instr, on failure, branch is skipped too
		"	b     1f										\n\t"
		"	movs  r1, #1									\n\t"	//clear Z. only executed on access failure
		"1:													\n\t"	//Z flag is clear on failure, due to the mov above, which is skipped on a succesful read
		"	bne   access_fail								\n\t"
		"	lsrs  r3, r1, #11								\n\t"
		"	cmp   r3, #0x1C									\n\t"
		"	bls   instr_is_16bits_long						\n\t"
		
		//let's read the second half of a 32-bit instr
		"instr_is_32_bits_long:								\n\t"
		"	mov   r12, r2									\n\t"	//save exc.pc
		"	cmp   r0, r0									\n\t"	//set Z
		"	ldrh  r2, [r2, #2]								\n\t"	//load instr part 2, on failure, branch is skipped too
		"	b     1f										\n\t"
		"	movs  r1, #1									\n\t"	//clear Z. only executed on access failure
		"1:													\n\t"	//Z flag is clear on failure, due to the mov above, which is skipped on a succesful read
		"	bne   access_fail_plus_2						\n\t"
		
		//is it a blx.imm? (r3 still has first part >> 11)
		"	cmp   r3, #0x1e									\n\t"
		"	bne   instr32_not_blx							\n\t"
		"	lsrs  r3, r2, #11								\n\t"	//BLX in ARMv6 is onger, but this is for v5
		"	cmp   r3, #0x1d									\n\t"
		"	bne   instr32_not_blx							\n\t"
		
		"instr32_is_blx:									\n\t"
		"	lsls  r1, #21									\n\t"
		"	asrs  r1, #9									\n\t"
		"	lsls  r2, #21									\n\t"
		"	lsrs  r2, #20									\n\t"
		"	adds  r2, r1									\n\t"	//offset
		"	mov   r1, r12									\n\t"	//the PC at call site
		"	adds  r1, #5									\n\t"	//lr
		"	str   r1, [r0, #4 * 5]							\n\t"	//set exce frame's LR
		"	adds  r2, r1									\n\t"
		"	lsrs  r2, #2									\n\t"
		"	lsls  r2, #2									\n\t"	//target address of the BLX
		
		//check if this is an OsCall (a speed optimization)
		"blx_check_for_oscall:								\n\t"
		"	cmp   r0, r0									\n\t"	//set Z
		"	ldr   r1, [r2]									\n\t"	//load instr, on failure, branch is skipped too
		"	b     1f										\n\t"
		"	movs  r1, #0									\n\t"	//clear Z. only executed on access failure
		"1:													\n\t"
		"	bne   access_fail								\n\t"
		"	movs  r3, #0x0c									\n\t"
		"	ands  r3, r1									\n\t"
		"	beq   go_to_raw_arm_addr						\n\t"	//not oscall
		"	negs  r3, r3									\n\t"	//offset is negative
		"	mov   r12, r3									\n\t"	//save the offset
		"	add   r1, r12									\n\t"	//remove the offset to check instr
		"	ldr   r3, =0xe519c000							\n\t"
		"	cmp   r1, r3									\n\t"
		"	bne   go_to_raw_arm_addr						\n\t"
		"	mov   r3, r9									\n\t"
		"	add   r3, r12									\n\t"
		"	ldr   r3, [r3]									\n\t"
		"	mov   r12, r3									\n\t"	//r12 = [r9, #-table]
		
		//check word #2
		"	cmp   r0, r0									\n\t"	//set Z
		"	ldr   r1, [r2, #4]								\n\t"	//load instr, on failure, branch is skipped too
		"	b     1f										\n\t"
		"	movs  r1, #0									\n\t"	//clear Z. only executed on access failure
		"1:													\n\t"
		"	bne   access_fail_plus_4						\n\t"
		#ifdef HAVE_v8M_BASE
			"	movw  r3, #0x00000ffc						\n\t"
		#else	
			"	movs  r3, #0x0f								\n\t"
			"	lsls  r3, #8								\n\t"
			"	adds  r3, #0xfc								\n\t"
		#endif
		"	ands  r3, r1									\n\t"	//funcOffset
		"	add   r12, r3									\n\t"	//pointer to fun caddr
		"	subs  r1, r3									\n\t"
		"	ldr   r3, =0xe59cf000							\n\t"
		"	cmp   r1, r3									\n\t"
		"	bne   go_to_raw_arm_addr						\n\t"
		
		"blx_is_to_os_call:									\n\t"
		"	mov   r2, r12									\n\t"
		"	ldr   r2, [r2]									\n\t"	//os call func addr
		"	lsrs  r1, r2, #1								\n\t"
		"	bcc   go_to_raw_arm_addr						\n\t"	//ok, go interp there
		
		"blx_is_to_oscall_impl_in_thumb:					\n\t"
		"	lsls  r1, #1									\n\t"	//set it to PC and go!
		"	str   r1, [r0, #4 * 6]							\n\t"
		"	bx    lr										\n\t"
		
		//32-bit instr that is not a BLX.imm
		"instr32_not_blx:									\n\t"
		CODE_FOR_UNDEF_32BIT_INSTR
		
		"undef_instr:										\n\t"
		"	movs  r1, #" STR(EXC_m0_CAUSE_UNDEFINSTR) "		\n\t"
		"	b     report_fatal								\n\t"
		
		//it was a 16 bit instr. instr is in r1, pc is in r2, remember, "BX PC" is not executed by c-m0 so we need to do that here
		"instr_is_16bits_long:								\n\t"
		"	movs  r3, #0x47									\n\t"
		"	lsls  r3, #8									\n\t"
		"	adds  r3, #0x78									\n\t"
		"	cmp   r3, r1									\n\t"
		"	bne   instr16_is_not_bx_pc						\n\t"
		"	lsrs  r2, #2									\n\t"
		"	adds  r2, #1									\n\t"
		"	lsls  r2, #2									\n\t"
		"go_to_raw_arm_addr:								\n\t"
		CODE_TO_GO_TO_ARM
		
		//16 bit instr that is not a "bx pc"
		"instr16_is_not_bx_pc:								\n\t"
		CODE_FOR_POSSIBLY_UNDEF_16BIT_INSTR
		
		//more triage will be done in C (r0 = excFrame, r1 = instr16, r2 = pc)
		"	push  {r4-r7, lr}								\n\t"
		"	mov   r4, r8									\n\t"
		"	mov   r5, r9									\n\t"
		"	mov   r6, r10									\n\t"
		"	mov   r7, r11									\n\t"
		"	push  {r4-r7}									\n\t"
		"	mov   r2, sp									\n\t"
		"	bl    analyzeInstr16							\n\t"
		"	pop   {r4-r7}									\n\t"
		"	mov   r8, r4									\n\t"
		"	mov   r9, r5									\n\t"
		"	mov   r10, r6									\n\t"
		"	mov   r11, r7									\n\t"
		"	pop   {r4-r7, pc}								\n\t"
		
		CODE_AT_END_EXTRA
		
		".ltorg												\n\t"
		:
		: EXTRA_ASM_INPUTS
		: "cc", "memory", "r0", "r1", "r2", "r3", "r12" //yes gcc needs this list...
	);
}



#endif
