#include <exported.h>
#include "common.h"
#include "printf.h"
#include "rp2040.h"
#include <boot.h>
#include "sdHw.h"
#include <dal.h>
#include <kal.h>

#define DMA_UNIT_DO		10
#define DMA_UNIT_DI		11


#define PIN_SDIO_CLK	5		//sdio mode only
#define PIN_SDIO_D3_CS	22		//sdio mode only as D3, active low as SD nCS


struct MyHwData {
	uint32_t rdTimeoutTicks;
	uint32_t wrTimeoutTicks;
	uint16_t timeoutBytes;
	uint8_t smNo;
};

static const uint8_t mCrcTab7[] = {		//generated from the iterative func :)
	0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
	0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26, 0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e,
	0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d, 0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45,
	0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14, 0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c,
	0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b, 0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13,
	0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42, 0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a,
	0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69, 0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21,
	0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70, 0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38,
	0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e, 0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36,
	0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67, 0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f,
	0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04,
	0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55, 0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d,
	0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a, 0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52,
	0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03, 0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b,
	0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28, 0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60,
	0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31, 0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79,
};

bool sdHwSetBusWidth(struct SdHwData *hwData, bool useFourWide)
{
	return !useFourWide;
}

void sdHwNotifyRCA(struct SdHwData *hwData, uint_fast16_t rca)
{
	//nothing
}

void sdHwSetTimeouts(struct SdHwData *hwData, uint_fast16_t timeoutBytes, uint32_t rdTimeoutTicks, uint32_t wrTimeoutTicks)
{
	struct MyHwData *hw = (struct MyHwData*)hwData;
	
	hw->timeoutBytes = timeoutBytes;
	hw->rdTimeoutTicks = rdTimeoutTicks;
	hw->wrTimeoutTicks = wrTimeoutTicks;
}

uint32_t sdHwInit(struct SdHwData *hwData)
{
	struct MyHwData *hw = (struct MyHwData*)hwData;
	int8_t smNo = repalmGetSdioPioInfo();
	
	if (smNo < 0)
		return 0;
	
	hw->smNo = smNo;
	
	//default timeouts
	sdHwSetTimeouts(hwData, 10000, 0, 0);
	sdHwSetSpeed(hwData, 400000, false);
	
	dma_hw->ch[DMA_UNIT_DO].write_addr = (uintptr_t)&pio1_hw->txf[hw->smNo];
	dma_hw->ch[DMA_UNIT_DO].al1_ctrl = ((8 + hw->smNo) << DMA_CH0_CTRL_TRIG_TREQ_SEL_LSB) + (DMA_UNIT_DO << DMA_CH0_CTRL_TRIG_CHAIN_TO_LSB);
	dma_hw->ch[DMA_UNIT_DI].read_addr = (uintptr_t)&pio1_hw->rxf[hw->smNo];
	dma_hw->ch[DMA_UNIT_DI].al1_ctrl = ((12 + hw->smNo) << DMA_CH0_CTRL_TRIG_TREQ_SEL_LSB) + (DMA_UNIT_DI << DMA_CH0_CTRL_TRIG_CHAIN_TO_LSB);
	
	return SD_HW_FLAG_INITED;
}

static void sdHwPrvSpiTransact(struct SdHwData *hwData, const void *src, bool autoIncrSrc, void *dst, bool autoIncrDst, uint32_t len)
{
	dma_hw->ch[DMA_UNIT_DI].write_addr = (uintptr_t)dst;
	dma_hw->ch[DMA_UNIT_DI].transfer_count = len;
	dma_hw->ch[DMA_UNIT_DI].ctrl_trig = (dma_hw->ch[DMA_UNIT_DI].ctrl_trig &~ DMA_CH0_CTRL_TRIG_INCR_WRITE_BITS) | (autoIncrDst ? DMA_CH0_CTRL_TRIG_INCR_WRITE_BITS : 0) | DMA_CH0_CTRL_TRIG_EN_BITS;
	
	dma_hw->ch[DMA_UNIT_DO].read_addr = (uintptr_t)src;
	dma_hw->ch[DMA_UNIT_DO].transfer_count = len;
	dma_hw->ch[DMA_UNIT_DO].ctrl_trig = (dma_hw->ch[DMA_UNIT_DO].ctrl_trig &~ DMA_CH0_CTRL_TRIG_INCR_READ_BITS) | (autoIncrSrc ? DMA_CH0_CTRL_TRIG_INCR_READ_BITS : 0) | DMA_CH0_CTRL_TRIG_EN_BITS;
	
	while (dma_hw->ch[DMA_UNIT_DI].transfer_count || dma_hw->ch[DMA_UNIT_DO].transfer_count || (dma_hw->ch[DMA_UNIT_DI].ctrl_trig & DMA_CH0_CTRL_TRIG_BUSY_BITS) || (dma_hw->ch[DMA_UNIT_DO].ctrl_trig & DMA_CH0_CTRL_TRIG_BUSY_BITS));
	
	dma_hw->ch[DMA_UNIT_DI].al1_ctrl &=~ DMA_CH0_CTRL_TRIG_EN_BITS;
	dma_hw->ch[DMA_UNIT_DO].al1_ctrl &=~ DMA_CH0_CTRL_TRIG_EN_BITS;
}

static uint8_t sdSpiByte(struct SdHwData *restrict hwData, uint_fast8_t val)
{
	uint8_t tx = val, rx;
	
	sdHwPrvSpiTransact(hwData, &tx, false, &rx, false, 1);
	
	return rx;
}

void sdHwGiveInitClocks(struct SdHwData *hwData)
{
	uint8_t ff = 0xff, dst;
	
	sdHwPrvSpiTransact(hwData, &ff, false, &dst, false, 16);
}

static void sdChipSelect(void)
{
	sio_hw->gpio_clr = 1 << PIN_SDIO_D3_CS;
}

void sdHwChipDeselect(struct SdHwData *hwData)
{
	struct MyHwData *hw = (struct MyHwData*)hwData;
	
	pio1_hw->fdebug = 1 << (PIO_FDEBUG_TXSTALL_LSB + hw->smNo);
	while (!(pio1_hw->fdebug & (1 << (PIO_FDEBUG_TXSTALL_LSB + hw->smNo)))) asm("dsb sy");
	sio_hw->gpio_set = 1 << PIN_SDIO_D3_CS;
}

static void sdPrvSendCmd(struct SdHwData *hwData, uint8_t cmd, uint32_t param, bool cmdCrcRequired)
{
	uint_fast8_t crc = 0;
	uint8_t tx[6], dst;
	
	if (cmdCrcRequired) {
		
		crc = mCrcTab7[crc * 2 ^ (0x40 + cmd)];
		crc = mCrcTab7[crc * 2 ^ (uint8_t)(param >> 24)];
		crc = mCrcTab7[crc * 2 ^ (uint8_t)(param >> 16)];
		crc = mCrcTab7[crc * 2 ^ (uint8_t)(param >> 8)];
		crc = mCrcTab7[crc * 2 ^ (uint8_t)param];
	}
	tx[0] = 0x40 + cmd;
	tx[1] = param >> 24;
	tx[2] = param >> 16;
	tx[3] = param >> 8;
	tx[4] = param;
	tx[5] = crc * 2 +1;
	
	sdHwPrvSpiTransact(hwData, tx, true, &dst, false, sizeof(tx));
}

enum SdHwCmdResult sdHwCmd(struct SdHwData *hwData, uint_fast8_t cmd, uint32_t param, bool cmdCrcRequired, enum SdHwRespType respTyp, void *respBufOut, enum SdHwDataDir dataDir, uint_fast16_t blockSz, uint32_t numBlocks)
{
	uint8_t *rsp = (uint8_t*)respBufOut;
	uint_fast8_t ret, i = 0;
	
	(void)cmdCrcRequired;
	
	sdChipSelect();
	
	sdPrvSendCmd(hwData, cmd, param, cmdCrcRequired);
	
	if (cmd == 12)		//do not ask!
		sdSpiByte(hwData, 0xff);
	
	while ((ret = sdSpiByte(hwData, 0xff)) == 0xff) {
		
		if (++i == 128) {
			sdHwChipDeselect(hwData);
			return SdHwCmdResultRespTimeout;
		}
	}
	
	switch (respTyp) {
		case SdRespTypeNone:
			break;
		
		case SdRespTypeR1:
		case SdRespTypeR1withBusy:
			*rsp++ = ret;
			break;
		
		case SdRespTypeR3:
		case SdRespTypeR7:
			if (ret & FLAG_ILLEGAL_CMD) {
				sdHwChipDeselect(hwData);
				return SdCmdInvalid;
			}
			if (ret &~ FLAG_IN_IDLE_MODE) {
				sdHwChipDeselect(hwData);
				return SdCmdInternalError;
			}
			for (i = 0; i < 4; i++)
				*rsp++ = sdSpiByte(hwData, 0xff);
			break;
		
		case SdRespTypeSpiR2:
			if (sdSpiByte(hwData, 0xff))
				ret |= FLAG_MISC_ERR;
			*rsp = ret;
			if (ret &~ FLAG_IN_IDLE_MODE) {
				sdHwChipDeselect(hwData);
				return SdCmdInternalError;
			}
			break;
		
		default:
			sdHwChipDeselect(hwData);
			return SdCmdInternalError;
	}
	
	if (dataDir == SdHwDataNone)
		sdHwChipDeselect(hwData);
		
	return SdHwCmdResultOK;
}

static bool sdHwPrvDataWait(struct SdHwData *hwData)
{
	struct MyHwData *hw = (struct MyHwData*)hwData;
	uint_fast16_t tries, timeoutBytes = hw->timeoutBytes;
	uint32_t timeoutTicks = hw->rdTimeoutTicks;
	uint_fast8_t byte;
	uint32_t time;
	
	for (tries = 0; tries < timeoutBytes; tries++) {
		
		byte = sdSpiByte(hwData, 0xff);
		
		if (!(byte & 0xf0)) {
			logt("sdHwPrvDataWait: 1 %02xh\n", byte);
			return false;
		}
		
		if (byte == 0xfe)
			return true;
	}
	
	time = TimGetTicks();
	do {
		byte = sdSpiByte(hwData, 0xff);
		
		if (!(byte & 0xf0)) {
			logt("sdHwPrvDataWait: 2 %02xh\n", byte);
			return false;
		}
		
		if (byte == 0xfe)
			return true;
	
	} while (TimGetTicks() - time < timeoutTicks);
	
	logt("sdHwPrvDataWait: 3\n", byte);
	return false;
}

enum SdHwReadResult sdHwReadData(struct SdHwData *hwData, uint8_t *data, uint_fast16_t sz)	//length must be even, pointer must be halfword aligned
{
	uint8_t ff = 0xff, dst;
	
	if (!sdHwPrvDataWait(hwData))
		return SdHwReadTimeout;
	
	sdHwPrvSpiTransact(hwData, &ff, false, data, true, sz);
	sdHwPrvSpiTransact(hwData, &ff, false, &dst, false, 2);	//CRC

	//todo: consider crc
	return SdHwReadOK;
}

enum SdHwWriteReply sdHwWriteData(struct SdHwData *hwData, const uint8_t *data, uint_fast16_t sz, bool isMultiblock)
{
	struct MyHwData *hw = (struct MyHwData*)hwData;
	uint8_t dst, marker = isMultiblock ? 0xFC : 0xFE, zero = 0;
	uint_fast16_t tries, timeoutBytes = hw->timeoutBytes;
	uint32_t timeoutTicks = hw->wrTimeoutTicks;
	uint_fast8_t byte;
	uint32_t time;
	
	sdHwPrvSpiTransact(hwData, &marker, false, &dst, false, 1);
	sdHwPrvSpiTransact(hwData, data, true, &dst, false, sz);
	sdHwPrvSpiTransact(hwData, &zero, false, &dst, false, 2);	//crc
	
	for (tries = 0; tries < timeoutBytes; tries++) {
		
		byte = sdSpiByte(hwData, 0xFF);
		
		if ((byte & 0x11) == 0x01) {
		
			switch (byte & 0x1f) {
				case 0x05:
					return SdHwWriteAccepted;
				
				case 0x0b:
					return SdHwWriteCrcErr;
				
				case 0x0d:
					return SdHwWriteError;
				
				default:
					return SdHwCommErr;
			}
		}
	}
	
	time = TimGetTicks();
	do {
		byte = sdSpiByte(hwData, 0xFF);
		
		if ((byte & 0x11) == 0x01) {
		
			switch (byte & 0x1f) {
				case 0x05:
					return SdHwWriteAccepted;
				
				case 0x0b:
					return SdHwWriteCrcErr;
				
				case 0x0d:
					return SdHwWriteError;
				
				default:
					return SdHwCommErr;
			}
		}
	
	} while (TimGetTicks() - time < timeoutTicks);
	
	return SdHwTimeout;
}

bool sdHwPrgBusyWait(struct SdHwData *hwData)
{
	struct MyHwData *hw = (struct MyHwData*)hwData;
	uint_fast16_t tries, timeoutBytes = hw->timeoutBytes;
	uint32_t timeoutTicks = hw->wrTimeoutTicks;
	uint32_t time;
	
	for (tries = 0; tries < timeoutBytes; tries++) {
		
		if (sdSpiByte(hwData, 0xFF) == 0xff)
			return true;
	}
	
	time = TimGetTicks();
	do {
		if (sdSpiByte(hwData, 0xFF) == 0xff)
			return true;
	
	} while (TimGetTicks() - time < timeoutTicks);
	
	return false;
}

void sdHwSetSpeed(struct SdHwData *hwData, uint32_t maxHz, bool highSpeedSignalling)
{
	int32_t baseRate = repalmDalGetClockRate(SdioUnitClockRate);
	struct MyHwData *hw = (struct MyHwData*)hwData;
	uint32_t divI, divF;
	
	//no need for more on this system
	if (maxSpeed > 10000000)
		maxSpeed = 10000000;
	
	if (baseRate <= 0)
		return;
	
	if (maxSpeed > baseRate) {
		divI = 1;
		divF = 0;
	}
	else {
		uint32_t modifiedSpeed = baseRate + maxSpeed - 1;		//cause we need to round the divisor up
		
		divI = modifiedSpeed / maxSpeed;
		divF = modifiedSpeed % maxSpeed * 256 / maxSpeed;
	}
	
	pio1_hw->sm[hw->smNo].clkdiv = (pio1_hw->sm[hw->smNo].clkdiv &~ (PIO_SM0_CLKDIV_INT_BITS | PIO_SM0_CLKDIV_FRAC_BITS)) | (divI << PIO_SM0_CLKDIV_INT_LSB) | (divF << PIO_SM0_CLKDIV_FRAC_LSB);
}

void sdHwRxRawBytes(struct SdHwData *hwData, void *dstP /* can be NULL*/, uint_fast16_t numBytes)
{
	uint8_t *dst = (uint8_t*)dstP;
	
	while (numBytes--) {
		
		uint_fast8_t val = sdSpiByte(hwData, 0xff);
		
		if (dst)
			*dst++ = val;
	}
}

bool sdHwMultiBlockWriteSignalEnd(struct SdHwData *hwData)
{
	//stoptran token
	(void)sdSpiByte(hwData, 0xFD);
	
	return true;
}

bool sdHwMultiBlockReadSignalEnd(struct SdHwData *hwData)
{
	//nothing
	
	return true;
}

uint32_t sdHwGetMaxBlocksAtOnce(struct SdHwData *hwData)
{
	return 0xffffffff;
}

uint32_t sdHwGetMaxBlockSize(struct SdHwData *hwData)
{
	return 0xffffffff;
}

void sdHwCardPower(struct SdHwData *hwData, bool on)
{
	//nothing
}

bool sdHwIsCardInserted(struct SdHwData *hwData)
{
	return true;
}

bool sdHwIsCardLockSwitchOn(struct SdHwData *hwData)
{
	return false;
}

void sdHwShutdown(struct SdHwData *hwData)
{
	//nothing
}

void sdHwSleep(struct SdHwData *hwData)
{
	//really should do this
}

void sdHwWake(struct SdHwData *hwData)
{
	//really should do this
}
