#include "printf.h"
#include "audio.h"
#include "emit.h"


//emit efficient code to load a sample in the proper format, convert it to our 8.24 format, and adjust the source pointer by the number of bytes consumed
//produced code will output result in dstReg, which must be a loReg, no other regs will be clobbered, flags could be clobbered, stack might be used
//produced code assumes base for source data is in r1, and will be updated
//will not work if dstReg is r0..r1 and sample type is float
static enum EmitStatus audioPrvEmitLoadSample(struct EmitBuf *dest, uint32_t dstRegL, uint32_t dstRegR, uint32_t tmpReg, enum AudioSampleType typ, enum AudioChannelConfig chCfg)
{
	uint32_t dstRegT, i, nCh, srcReg = 1;
	bool stereo;
	
	switch (chCfg) {
		case AudioMono:
			stereo = false;
			nCh = 1;
			
			if (dstRegL >= 8) {
				loge("%s: left reg must be loreg and %u >= 8\n", __func__, dstRegL);
				return EmitErrNotEncodeable;
			}
			break;
		
		case AudioStereo:
			stereo = true;
			nCh = 2;
			
			if (dstRegL >= dstRegR) {
				loge("%s: left reg must be smaller than right and %u >= %u\n", __func__, dstRegL, dstRegR);
				return EmitErrNotEncodeable;
			}
			if (dstRegR >= 8) {
				loge("%s: right reg must be loreg and %u >= 8\n", __func__, dstRegR);
				return EmitErrNotEncodeable;
			}
			break;
		
		default:
			return EmitErrNotEncodeable;
	}
	
	(void)nCh;	//shut GCC up
	
	//first: load, second: shift and adjust pointers
	//try to keep loads in order so that they can be pipelined and thus we save a cycle
	switch (typ) {
		case AudioSampleU8:
		case AudioSampleS8:
			
			//LDRB dstRegL, [src, #0]
			EMIT(LLloadImm, dstRegL, srcReg, 0, EmitSzByte, false, EmitAdrModeIndex);
			
			if (stereo) {
			
				//LDRB dstRegR, [src, #1]
				EMIT(LLloadImm, dstRegR, srcReg, 1, EmitSzByte, false, EmitAdrModeIndex);
			}
			
			//ADDS src, #numChannels * sizeof(uint8_t)
			EMIT(LLaddImm, srcReg, srcReg, sizeof(uint8_t) * nCh, EmitFlagsDoNotCare, false);
			
			if (typ == AudioSampleU8) {
				
				//dstRegL -= 0x80;
				EMIT(LLsubImm, dstRegL, dstRegL, 0x80, EmitFlagsDoNotCare, false);
				
				if (stereo) {
			
					//dstRegR -= 0x80;
					EMIT(LLsubImm, dstRegR, dstRegR, 0x80, EmitFlagsDoNotCare, false);
				}
			}
			else {
				
				//SXTB dstRegL, dstRegL
				EMIT(LLextend, dstRegL, dstRegL, 0, true, false);
				
				if (stereo) {
			
					//SXTB dstRegR, dstRegR
					EMIT(LLextend, dstRegR, dstRegR, 0, true, false);
				}
			}
			break;
		
		case AudioSampleU16LE:
		case AudioSampleS16LE:
		case AudioSampleU16BE:
		case AudioSampleS16BE:
		
			if (stereo) {
				//LDMIA src!, {dstRegL}
				EMIT(LLldmia, srcReg, 1 << dstRegL, true);
				
				if (typ == AudioSampleU16BE || typ == AudioSampleS16BE) {
					
					//REV16 dstRegL, dstRegL
					EMIT(LLrev16, dstRegL, dstRegL);
				}
				
				if (typ == AudioSampleU16LE || typ == AudioSampleU16BE) {
					
					//MOVL dstRegR, #0x80008000
					EMIT(HLloadImmToReg, dstRegR, 0x80008000, true, true, false);

					//EOR dstRegL, dstRegL, dstRegR
					EMIT(LLeorReg, dstRegL, dstRegL, dstRegR, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
				}
				
				//ASRS dstRegR, dstRegL, #16
				EMIT(LLmov, dstRegR, dstRegL, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
				
				//SXTH dstRegL, dstRegL
				EMIT(LLextend, dstRegL, dstRegL, 0, false, false);
			}
			else {
				
				//LDRH dstRegL, [src]
				EMIT(LLloadImm, dstRegL, srcReg, 0, EmitSzHalfword, false, EmitAdrModeIndex);
				
				//ADDS src, #sizeof(uint16_t)
				EMIT(LLaddImm, srcReg, srcReg, sizeof(uint16_t), EmitFlagsDoNotCare, false);
				
				if (typ == AudioSampleS16LE) {
					
					//SXTH dstRegL, dstRegL
					EMIT(LLextend, dstRegL, dstRegL, 0, false, false);
				}
				else if (typ == AudioSampleS16BE) {
						
					//REVSH dstRegL
					EMIT(LLrevsh, dstRegL, dstRegL);
				}
				else {
					
					if (typ == AudioSampleU16BE) {
						
						//REV16 dstRegL, dstRegL
						EMIT(LLrev16, dstRegL, dstRegL);
					}
					
					//MOVL tmpReg, #0x8000
					EMIT(HLloadImmToReg, tmpReg, 0x8000, true, true, false);
					
					//SUB dstRegL, dstRegL, tmpReg
					EMIT(LLsubReg, dstRegL, dstRegL, tmpReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
				}
			}
			break;

		case AudioSampleU32LE:
		case AudioSampleU32BE:
		case AudioSampleS32LE:
		case AudioSampleS32BE:
		
			//ldmia src!, {dstRegL, [dstRegR]}
			EMIT(LLldmia, srcReg, (1 << dstRegL) | (stereo ? (1 << dstRegR) : 0), true);
		
			if (typ == AudioSampleU32BE || typ == AudioSampleS32BE) {
				
				//REV dstRegL, dstRegL
				EMIT(LLrev, dstRegL, dstRegL);

				if (stereo) {
					
					//REV dstRegR, dstRegR
					EMIT(LLrev, dstRegR, dstRegR);
				}
			}
		
			if (typ == AudioSampleU32LE || typ == AudioSampleU32BE) {
				
				//MOVL tmpReg, #0x80000000
				EMIT(HLloadImmToReg, tmpReg, 0x80000000, true, true, false);
				
				//SUB dstRegL, dstRegL, tmpReg
				EMIT(LLsubReg, dstRegL, dstRegL, tmpReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
				
				if (stereo) {
					
					//SUB dstRegR, dstRegR, tmpReg
					EMIT(LLsubReg, dstRegR, dstRegR, tmpReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
				}
			}
			break;
		
		case AudioSampleFloatLE:
		case AudioSampleFloatBE:
		
			// abbreviated float reading code that only does what we care for. we need two temp regs and do this for each value
			//  inf, nan, and all out of bounds values become zero (this might introduce clipping if your samples are too big)
		
			//push {r0-r1}
			EMIT(HLpush, 0x0003);
			
			//ldmia src!, {dstRegL, [dstRegR]}
			EMIT(LLldmia, srcReg, (1 << dstRegL) | (stereo ? (1 << dstRegR) : 0), true);
			
			//MOVL tmpReg, #0x80000000
			EMIT(HLloadImmToReg, tmpReg, 0x80000000, true, true, false);
			
			for (i = 0; i < nCh; i++) {
				
				uint32_t rvNo = i ? dstRegR : dstRegL;
				struct EmitBuf bccOverJumpToOut, branchToOut, skipNegate;
		
				if (typ == AudioSampleFloatBE) {
					
					//rev rvNo, rvNo
					EMIT(LLrev, rvNo, rvNo);
				}
			
				//ubfx r0, rV, #23, #8
				EMIT(LLbfx, 0, rvNo, 23, 8, true, true);
				
				//subs r0, #104
				EMIT(LLsubImm, 0, 0, 104, EmitSetFlags, false);
				
				//cmp r0, #127 - 104
				EMIT(LLcmpImm, 0, 127 - 104);
				
				//bcc skipThisPart
				EMIT(SaveSpace, &bccOverJumpToOut, 1);
				
				//mov rV, #0
				EMIT(HLloadImmToReg, rvNo, 0, false, false, true);
				
				//b out
				EMIT(SaveSpace, &branchToOut, 1);
				
				//skipThisPart is here
				EMIT_TO(LLbranch, &bccOverJumpToOut, emitGetPtrToJumpHere(dest), EmitCcCc);
				
				//lsl r1, rV, #8
				EMIT(LLmov, 1, rvNo, EmitShiftLsl, 8, EmitFlagsDoNotCare, false);
				
				//orr r1, r1, tmpReg
				EMIT(LLorrReg, 1, 1, tmpReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
				
				//sub r0, r0, #(127 - 104) + 9
				EMIT(LLsubImm, 0, 0, (127 - 104) + 9, EmitFlagsDoNotCare, false);
				
				//neg r0, r0
				EMIT(LLrsbImm, 0, 0, 0, EmitLeaveFlags, true);
				
				//lsrs r1, r0
				EMIT(LLshiftByReg, 1, 1, 0, EmitShiftLsr, EmitFlagsDoNotCare, false);
				
				//lsls rV, #1
				EMIT(LLmov, rvNo, rvNo, EmitShiftLsl, 1, EmitSetFlags, false);
				
				//mov rV, r1
				EMIT(LLmov, rvNo, 1, EmitShiftLsl, 0, EmitLeaveFlags, false);
				
				//bcc skipNegate
				EMIT(SaveSpace, &skipNegate, 1);
				
				//neg[cs] rV, r1
				EMIT(LLrsbImm, rvNo, 1, 0, EmitLeaveFlags, true);
				
				//skipNegate is here
				EMIT_TO(LLbranch, &skipNegate, emitGetPtrToJumpHere(dest), EmitCcCc);
				
				//"out" label is here
				EMIT_TO(LLbranch, &branchToOut, emitGetPtrToJumpHere(dest), EmitCcAl);
			}
			
			//pop {r0-r1}
			EMIT(HLpop, 0x0003);
		
			break;
		default:
			return EmitErrNotEncodeable;
	}
	
	//shift sample into place
	switch (typ) {
		case AudioSampleU8:
		case AudioSampleS8:
			//lsls dstRegL, 16
			EMIT(LLmov, dstRegL, dstRegL, EmitShiftLsl, 16, EmitFlagsDoNotCare, false);
			
			if (stereo) {
				//lsls dstRegR, 16
				EMIT(LLmov, dstRegR, dstRegR, EmitShiftLsl, 16, EmitFlagsDoNotCare, false);
			}
			break;
		case AudioSampleU16LE:
		case AudioSampleU16BE:
		case AudioSampleS16LE:
		case AudioSampleS16BE:
			//lsl dstRegL, 8
			EMIT(LLmov, dstRegL, dstRegL, EmitShiftLsl, 8, EmitFlagsDoNotCare, false);
			
			if (stereo) {
				//lsls dstRegR, 8
				EMIT(LLmov, dstRegR, dstRegR, EmitShiftLsl, 8, EmitFlagsDoNotCare, false);
			}
			break;
		case AudioSampleU32LE:
		case AudioSampleU32BE:
		case AudioSampleS32LE:
		case AudioSampleS32BE:
			//asr dstRegL, 8
			EMIT(LLmov, dstRegL, dstRegL, EmitShiftAsr, 8, EmitFlagsDoNotCare, false);
			
			if (stereo) {
				//asr dstRegR, 8
				EMIT(LLmov, dstRegR, dstRegR, EmitShiftAsr, 8, EmitFlagsDoNotCare, false);
			}
			break;
		case AudioSampleFloatLE:
		case AudioSampleFloatBE:
			break;
		default:
			//nothing
			return EmitErrNotEncodeable;
	}
	
	return EmitErrNone;
}

//tmpReg may not equal dstReg, all else allowed. we KNOW that 0 <= volumeReg <= 0x7fff
static enum EmitStatus audioPrvEmitVolumeScale(struct EmitBuf *dest, uint32_t dstReg, uint32_t sampleReg, uint32_t volumeReg, uint32_t tmpReg)
{
	//asr tmpReg, sampleReg, #16
	EMIT(LLmov, tmpReg, sampleReg, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
	
	//uxth dstReg, sampleReg
	EMIT(LLextend, dstReg, sampleReg, 0, false, true);

	//mul tmpReg, volumeReg
	EMIT(LLmulReg, tmpReg, tmpReg, volumeReg, EmitFlagsDoNotCare, false);

	//mul dstReg, volumeReg
	EMIT(LLmulReg, dstReg, dstReg, volumeReg, EmitFlagsDoNotCare, false);

	//lsr[s] dstReg, #10
	EMIT(LLmov, dstReg, dstReg, EmitShiftLsr, 10, EmitFlagsDoNotCare, false);
	
	//lsl tmpReg, #22 - 16
	EMIT(LLmov, tmpReg, tmpReg, EmitShiftLsl, 22 - 16, EmitFlagsDoNotCare, false);
	
	//add dstReg, tmpReg
	EMIT(LLaddReg, dstReg, dstReg, tmpReg, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	return EmitErrNone;
}

//scale sample by volume, take channels into account. for mono, L is used. if mono in AND out, we expect mono volume in volL
//if output is mono, combine and generate it in L
//regL & regR are in and out, MUST be loRegs
static enum EmitStatus audioPrvMixInFuncScaleChansByVolume(struct EmitBuf *dest, uint32_t regL, uint32_t regR, uint32_t regVolL, uint32_t regVolR, uint32_t regTmpA, uint32_t regTmpB, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	enum EmitStatus now;
	
	//if input is stereo, scale by volumes now
	if (chans == AudioStereo) {
		
		if (regVolL >= 8) {
			
			//mov regTmpB, regVolL
			EMIT(LLmov, regTmpB, regVolL, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
			regVolL = regTmpB;
		}
		
		now = audioPrvEmitVolumeScale(dest, regL, regL, regVolL, regTmpA);
		if (now != EmitErrNone)
			return now;
		
		if (regVolR >= 8) {
			
			//mov regTmpB, regVolR
			EMIT(LLmov, regTmpB, regVolR, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
			regVolR = regTmpB;
		}
		
		now = audioPrvEmitVolumeScale(dest, regR, regR, regVolR, regTmpA);
		if (now != EmitErrNone)
			return now;
		
		//if output is mono, combine samples now
		if (!nativeFmtIsStereo) {
		
			//add regL, regR
			EMIT(LLaddReg, regL, regL, regR, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			//asrs regL, #1
			EMIT(LLmov, regL, regL, EmitShiftAsr, 1, EmitFlagsDoNotCare, false);
		}
	}
	else if (nativeFmtIsStereo) {	//if input is mono but output is stereo, scale the input sample by each channel's volume
		
		if (regVolR >= 8) {
			
			//mov regTmpB, regVolR
			EMIT(LLmov, regTmpB, regVolR, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
			regVolR = regTmpB;
		}
		
		now = audioPrvEmitVolumeScale(dest, regR, regL, regVolR, regTmpA);
		if (now != EmitErrNone)
			return now;
		
		if (regVolL >= 8) {
			
			//mov regTmpB, regVolL
			EMIT(LLmov, regTmpB, regVolL, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
			regVolL = regTmpB;
		}
		
		now = audioPrvEmitVolumeScale(dest, regL, regL, regVolL, regTmpA);
		if (now != EmitErrNone)
			return now;
	}
	else {						//mono in and out - scale the sample by the average volume we had calculated
		
		if (regVolL >= 8) {
			
			//mov regTmpB, regVolL
			EMIT(LLmov, regTmpB, regVolL, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
			regVolL = regTmpB;
		}
		
		now = audioPrvEmitVolumeScale(dest, regL, regL, regVolL, regTmpA);
		if (now != EmitErrNone)
			return now;
	}
	
	return EmitErrNone;
}

static enum EmitStatus audioPrvMixInAddToOutSampleProperly(struct EmitBuf *dest, uint32_t regDst, uint32_t regSrc)
{
	if (MIXER_BE_RECKLESS_WITH_MIXING) {
		
		//ADD regDst, regSrc
		EMIT(LLaddReg, regDst, regDst, regSrc, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	else {
		struct EmitBuf bvcSpace, bcsSatNeg;
		
		//this is the fastest way i found to saturate in v7. it works, i promise. proof is up to the reader
		
		//ADDS regDst, regSrc
		EMIT(LLaddReg, regDst, regDst, regSrc, EmitShiftLsl, 0, EmitSetFlags, false);

		//"BVC skipSat"		
		EMIT(SaveSpace, &bvcSpace, 1);
		
		//sat: (need to produce 0x80000000 or 0x7fffffff). common path here produces 0x80000000
		
		//mov regDst, #0x80	//important: this does not affect carry
		EMIT(LLmovImm, regDst, 0x80, 0, EmitFlagsDoNotCare, false);

		//rev regDst, regDst	//important: this does not affect carry
		EMIT(LLrev, regDst, regDst);
		
		//"BCS satNegative"		
		EMIT(SaveSpace, &bcsSatNeg, 1);
		
		//sat: positive path: need to convert 0x80000000 to 0x7fffffff. easy
		EMIT(LLsubImm, regDst, regDst, 1, EmitFlagsDoNotCare, false);
		
		//satNegative sat is here
		EMIT_TO(LLbranch, &bcsSatNeg, emitGetPtrToJumpHere(dest), EmitCcCs);
		
		//skipSat sat is here
		EMIT_TO(LLbranch, &bvcSpace, emitGetPtrToJumpHere(dest), EmitCcVc);
	}
	
	return EmitErrNone;
}

//may corrupt samples in for speed. may corrupt flags. sample regs MUST be in order if stereo
static enum EmitStatus audioPrvMixInFuncEmitSampleExport(struct EmitBuf *dest, uint32_t regL, uint32_t regR, uint32_t tmpRegL, uint32_t tmpRegR, bool nativeFmtIsStereo)
{
	enum EmitStatus now;
	
	if (nativeFmtIsStereo) { //add two samples into the output buffer
	
		if (regL >= regR)
			fatal("sample regs must be distincs and in order\n");
	
		//ldr tmpRegL, [r0]
		EMIT(LLloadImm, tmpRegL, 0, 0, EmitSzWord, false, EmitAdrModeIndex);
		
		//ldr tmpRegR, [r0, #4]
		EMIT(LLloadImm, tmpRegL, 0, 4, EmitSzWord, false, EmitAdrModeIndex);
		
		//NOTE: we could add to temp and leave sample regs uncorrupted, but sometimes sample regs are loregs in order and we save on our store using a STMIA.N
		now = audioPrvMixInAddToOutSampleProperly(dest, regL, tmpRegL);
		if (now != EmitErrNone)
			return now;
		
		now = audioPrvMixInAddToOutSampleProperly(dest, regR, tmpRegR);
		if (now != EmitErrNone)
			return now;

		//if sample regs are in order, use a stmia with a writeback
		
		//stmia r0!, {regL, regR}	//order is proper for sure!
		EMIT(HLstmia, 0, (1 << regL) + (1 << regR), true);
	}
	else {				//add one sample into the output buffer
		
		//ldr tmpRegL, [r0]
		EMIT(LLloadImm, tmpRegL, 0, 0, EmitSzWord, false, EmitAdrModeIndex);

		//NOTE: we could add to temp and leave sample reg uncorrupted, but sometimes sample reg is a loreg and we save on our store using a STMIA.N

		//add regL, tmpRegL 				// add LEFT
		now = audioPrvMixInAddToOutSampleProperly(dest, regL, tmpRegL);
		if (now != EmitErrNone)
			return now;

		//stmia r0!, {regL}
		EMIT(HLstmia, 0, (1 << regL), true);
	}
	
	return EmitErrNone;
}

enum EmitStatus audioPrvStreamCreateOutputMixFuncGutsNoResamp(struct EmitBuf *dest, enum AudioSampleType sampTyp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	uint32_t sampleSzShift = (mSampleShifts[sampTyp] + (chans == AudioStereo ? 1 : 0));
	struct EmitBuf space;
	enum EmitStatus now;
	uintptr_t loopPos;
	
	//int32_t* MixInF(int32_t* dst, const uint8_t** srcP, uint32_t maxOutSamples, void* resampleStateP, uint32_t volumeL, uint32_t volumeR, uint32_t numInSamples);
	//r0 is dst
	//r1 is src
	//r2 is volumeL
	//r3 is volumeR
	//r4 is sampleL
	//r5 is sampleR
	//r6 is local temp in a few places
	//r7 is local temp for mixing in
	//r12 is src end pointer
	
	//RESAMP STATE not used
	
	//push {r1, r4-r7}
	EMIT(HLpush, 0x00f2);

	//ldr r1, [r1]	//get source pointer
	EMIT(LLloadImm, 1, 1, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//up front decide how many samples we'll produce
	
	//ldr r4, [sp, #ofst_to_numInSamples]
	EMIT(LLloadImm, 4, EMIT_REG_NO_SP, 7 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//cmp r2, r4
	EMIT(LLcmpReg, 2, 4, EmitShiftLsl, 0);
	
	//bmi skip	//taken if r2 < r4		(less or same space in output as we'll produce samples)
	EMIT(SaveSpace, &space, 1);
	
	//mov[pl] r2, r4	//so we simply just have to produce numSamples == r2
	EMIT(LLmov, 2, 4, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
	
	//"skip" is here
	EMIT_TO(LLbranch, &space, emitGetPtrToJumpHere(dest), EmitCcMi);
	
	//lsl r2, sampleSzShift		//calc how many BYTES we'll use from input
	EMIT(LLmov, 2, 2, EmitShiftLsl, sampleSzShift, EmitFlagsDoNotCare, true);
	
	//mov r12, r2				//num bytes to produce
	EMIT(LLmov, 12, 2, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
	
	//add r12, r1				//end pointer for input	(hi reg is ok since we'll be comparing to it)
	EMIT(LLaddReg, 12, 12, 1, EmitShiftLsl, 0, EmitFlagsDoNotCare, true);
	
	//ldr r2, [sp, #ofst_to_volume_L]
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 5 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//ldr r3, [sp, #ofst_to_volume_R]
	EMIT(LLloadImm, 3, EMIT_REG_NO_SP, 6 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);

	//if input & output are both mono, we need to calculate average volume (we'll store it in r2)
	if (!nativeFmtIsStereo && chans == AudioMono) {
		
		//add r2, r3
		EMIT(LLaddReg, 2, 2, 3, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

		//lsrs r2, #1
		EMIT(LLmov, 2, 2, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
	}
	
	loopPos = emitGetPtrToJumpHere(dest);
	
	//load sample(s) into r4 (and r5 if stereo)
	now = audioPrvEmitLoadSample(dest, 4, 5, 6, sampTyp, chans);
	if (now != EmitErrNone)
		return now;
	
	//handle volume scaling
	now = audioPrvMixInFuncScaleChansByVolume(dest, 4, 5, 2, 3, 6, 7, chans, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//store it
	now = audioPrvMixInFuncEmitSampleExport(dest, 4, 5, 6, 7, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//cmp r12, r1  //see if we're done
	EMIT(LLcmpReg, 12, 1, EmitShiftLsl, 0);
	
	//loop back if there are more samples to work on
	EMIT(LLbranch, loopPos, EmitCcNe);

	//ldr r2, [sp]   //we need to store r1
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//str r1, [r2]
	EMIT(LLstoreImm, 1, 2, 0, EmitSzWord, EmitAdrModeIndex);

	//pop {r1, r4-r7}
	EMIT(HLpop, 0x00f2);
	
	//bx lr
	EMIT(LLbx, EMIT_REG_NO_LR);

	return EmitErrNone;
}

enum EmitStatus audioPrvStreamCreateOutputMixFuncGutsUpsample(struct EmitBuf *dest, const uint16_t* resampTab, enum AudioSampleType sampTyp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	const uint8_t regDst = 0, regSrc = 1, regCurSampL = 2, regCurSampR = 3, regNextSampL = 4, regNextSampR = 5, regTmpA = 6, regTmpB = 7, regVolL = 8;
	const uint8_t regVolR = 9, regSrcEndPtr = 10, regNeedSampleLoad = 11, regTabPtr = 12, regDstEndPtr = 14;
	uint32_t sampleSzShift = (mSampleShifts[sampTyp] + (chans == AudioStereo ? 1 : 0)), sampleSz = 1 << sampleSzShift;
	struct EmitBuf beqOutOfInput, skipTableReloadSpot, bneSpot, jumpToExitCodeSpot;
	uint32_t i, numOutputChannels = nativeFmtIsStereo ? 2 : 1, regSrcEnd;
	uintptr_t loopLoadSample, loopPostLoadSample;
	enum EmitStatus now;
	
	if (regCurSampL >= regCurSampR || regNextSampL >= regNextSampR || regCurSampR >= regTmpA || regNextSampR >= regTmpA)	
		fatal("required order violated\n");
	
	//int32_t* upsample(int32_t* dst, const uint8_t** srcP, uint32_t maxOutSamples, void* resampleStateP, uint32_t volumeL, uint32_t volumeR, uint32_t numInSamples)

	//r0 is dst
	//r1 is src
	//r2 is current sample L (or mono)
	//r3 is current sample R
	//r4 is "next" sample L (or mono)
	//r5 is "next" sample R
	//r6 is temp value
	//r7 is temp value
	//r8 is volume L (or mono volume)
	//r9 is volume R
	//r10 is source end pointer
	//r11 is bool_load_a_sample flag
	//r12 is current table pointer
	//r14 is dst end ptr
	
	//RESAMP STATE is:
	// [0] - previously read "now" L sample
	// [1] - previously read "now" R sample
	// [2] - table pointer to current entry
	
	//we use each sample more than once, so volume scaling is better done BEFORE interpolation than after. we do that
	
	//push {r1, r3-r7, lr}
	EMIT(HLpush, 0x40fa);

	//mov r4..r7, r8..r11
	for (i = 0; i < 4; i++)
		EMIT(LLmov, 4 + i, 8 + i, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

	//push {r4-r7}	//push r8..r11 and one word of space
	EMIT(HLpush, 0x00f0);

	//ldr regSrc, [r1]	//get source pointer
	EMIT(LLloadImm, regSrc, 1, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//use up r2 ("maxOutSamples") to calculate regDstEndPtr
	
	//calculate destination end pointer
	//lsl regTmpA, r2, #2 + isStereo
	EMIT(LLmov, regTmpA, 2, EmitShiftLsl, nativeFmtIsStereo ? 3 : 2, EmitFlagsDoNotCare, false);
	
	//add regTmpA, regDst
	EMIT(LLaddReg, regTmpA, regTmpA, regDst, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//mov regDstEndPtr, regTmpA
	EMIT(LLmov, regDstEndPtr, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

	//use up r3 ("resampleStateP") to load state 

	//load resamp state
	//ldmia r3!, {regNextSampL, regNextSampL, regTmpB}				//load current table index to regTabPtr, and "cur L" and "cur R" from resamp state into new sample regs (which normally store next sample), but we're about to move it into "cur" sample space
	EMIT(LLldmia, 3, (1 << regTmpB) | (1 << regNextSampL) | (1 << regNextSampR), true);
	
	//mov regTabPtr, regTmpB
	EMIT(LLmov, regTabPtr, regTmpB, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//calculate source end pointer
	//ldr regTmpA, [sp, #ofst_to_numInSamples]
	EMIT(LLloadImm, regTmpA, EMIT_REG_NO_SP, 13 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//lsl regTmpA, in_sample_sz_shift
	EMIT(LLmov, regTmpA, regTmpA, EmitShiftLsl, sampleSzShift, EmitFlagsDoNotCare, false);
	
	//add regTmpA, regSrc
	EMIT(LLaddReg, regTmpA, regTmpA, regSrc, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//move regSrcEndPtr, regTmpA
	EMIT(LLmov, regSrcEndPtr, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//ldr regTmpA, [sp, #ofst_to_volume_L]
	EMIT(LLloadImm, regTmpA, EMIT_REG_NO_SP, 11 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//ldr regTmpB, [sp, #ofst_to_volume_R]
	EMIT(LLloadImm, regTmpB, EMIT_REG_NO_SP, 12 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//if input & output are both mono, we need to calculate average volume (we'll store it in r8)
	if (!nativeFmtIsStereo && chans == AudioMono) {
		
		//add regTmpA, regTmpB
		EMIT(LLaddReg, regTmpA, regTmpA, regTmpB, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//lsr regVolL, #1
		EMIT(LLmov, regTmpA, regTmpA, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
	}
	else {
		
		//mov regVolR, regTmpB
		EMIT(LLmov, regVolR, regTmpB, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//mov regVolL, regTmpA
	EMIT(LLmov, regVolL, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//save location for the loop where a new sample needs to be loaded
	loopLoadSample = emitGetPtrToJumpHere(dest);
	
	//move current "next" sample into "current"
	EMIT(LLmov, regCurSampL, regNextSampL, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	if (nativeFmtIsStereo) {
		
		EMIT(LLmov, regCurSampR, regNextSampR, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//cmp src, regSrcEndPtr
	EMIT(LLcmpReg, regSrc, regSrcEndPtr, EmitShiftLsl, 0);
	
	//save space for a "beq" to exit
	EMIT(SaveSpace, &beqOutOfInput, 1);
	
	//get "next" sample
	now = audioPrvEmitLoadSample(dest, regNextSampL, regNextSampR, regTmpA, sampTyp, chans);
	if (now != EmitErrNone)
		return now;
	
	//scale by volume and maybe merge
	now = audioPrvMixInFuncScaleChansByVolume(dest, regNextSampL, regNextSampR, regVolL, regVolR, regTmpA, regTmpB, chans, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//save location for the loop where sample is not needed to be loaded
	loopPostLoadSample = emitGetPtrToJumpHere(dest);
	
	//mov regTmpA, regTabPtr
	EMIT(LLmov, regTmpA, regTabPtr, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//ldrh regTmpB, [regTmpA]
	EMIT(LLloadImm, regTmpB, regTmpA, 0, EmitSzHalfword, false, EmitAdrModeIndex);
	
	//add regTmpA, #sizeof(uint16_t)
	EMIT(LLaddImm, regTmpA, regTmpA, sizeof(uint16_t), EmitFlagsDoNotCare, false);
	
	#ifdef HAVE_v8M_BASE	//can use cbnz
		
		//save space for a "cbnz" to skip table reload
		EMIT(SaveSpace, &skipTableReloadSpot, 1);
		
	#else					//must use cmp
	
		//cmp regTmpB, #0	// is the table over? (did we read a zero)?
		EMIT(LLcmpImm, regTmpB, 0);
		
		//save space for a "bne" to skip table reload
		EMIT(SaveSpace, &skipTableReloadSpot, 1);
	#endif
	
	//ldr regTmpA, =resampTab + 1	//reload table. point right to elem idx 1 since we'll inline idx 0 here
	EMIT(HLloadImmToReg, regTmpA, (uintptr_t)(resampTab + 1), true, true, false);
	
	//mov regTmpB, resampTab[0]		//we know we'll load idx 0, inline it here
	EMIT(HLloadImmToReg, regTmpB, *resampTab, true, true, false);
	
	#ifdef HAVE_v8M_BASE	//can use cbnz
		
		//set up that "cbnz" we saved a space for
		EMIT_TO(LLcbnz, &skipTableReloadSpot, regTmpB, emitGetPtrToJumpHere(dest));
		
	#else					//must use cmp
	
		//set up that "bne" we saved a space for
		EMIT_TO(LLbranch, &skipTableReloadSpot, emitGetPtrToJumpHere(dest), EmitCcNe);
	
	#endif
	
	//mov regTabPtr, regTmpA
	EMIT(LLmov, regTabPtr, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

	//regTmpB is now the table entry. we need to save the bottom bit for later in regNeedSampleLoad
	
	//lsl regTmpA, regTmpB, #31				//grab the bottom bit
	EMIT(LLmov, regTmpA, regTmpB, EmitShiftLsl, 31, EmitSetFlags, false);
	
	//mov regNeedSampleLoad, regTmpA
	EMIT(LLmov, regNeedSampleLoad, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//bottom bit is too low to matter so we do not shift it off. sum being 0x10000 instead of 0x8000 matters more to us
	
	//ldr regTmpA, =0x10000
	EMIT(HLloadImmToReg, regTmpA, 0x10000, true, true, false);
	
	//sub regTmpA, regTmpB		//get (0x10000 - tabEntry) into regTmpA
	EMIT(LLsubReg, regTmpA, regTmpA, regTmpB, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//push {r0, r1, regCurSampL, regCurSampR, regNextSampL, regNextSampR}
	EMIT(HLpush, 0x0003 | (1 << regCurSampL) | (1 << regCurSampR) | (1 << regNextSampL) | (1 << regNextSampR));
	
	//interpolate. "Now" sample's coefficient is in regTmpB, "next" sample's coefficient is in regTmpA
	//we produce results in regCurSampL, regCurSampR, they get preserved on stack
	
	for (i = 0; i < numOutputChannels; i++) {
		//we know that table values are nonzero, fit in, u16 and add up to 0x10000
		
		uint32_t sampRegNext = i ? regNextSampR : regNextSampL;
		uint32_t sampRegCur = i ? regCurSampR : regCurSampL;
		uint32_t tmpRegX = 0, tmpRegY = 1;
		
	//long mul by tablVal and shift down right away
		
		//asr tmpRegX, sampRegCur, #16
		EMIT(LLmov, tmpRegX, sampRegCur, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
		
		//uxth sampRegCur, sampRegCur
		EMIT(LLextend, sampRegCur, sampRegCur, 0, false, true);

		//mul sampRegCur, regTmpB (tablVal)
		EMIT(LLmulReg, sampRegCur, sampRegCur, regTmpB, EmitFlagsDoNotCare, false);
		
		//mul tmpRegX, regTmpB (tablVal)
		EMIT(LLmulReg, tmpRegX, tmpRegX, regTmpB, EmitFlagsDoNotCare, false);
	
	//long mul by inverseTabVal and shift down right away
		
		//asr tmpRegY, sampRegNext, #16
		EMIT(LLmov, tmpRegY, sampRegNext, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);
		
		//uxth sampRegNext, sampRegNext
		EMIT(LLextend, sampRegNext, sampRegNext, 0, false, true);

		//mul sampRegNext, inverseTabVal(regTmpA)
		EMIT(LLmulReg, sampRegNext, sampRegNext, regTmpA, EmitFlagsDoNotCare, false);
		
		//mul tmpRegY, inverseTabVal(regTmpA)
		EMIT(LLmulReg, tmpRegY, tmpRegY, regTmpA, EmitFlagsDoNotCare, false);
		
	//add them up
	
		//add tmpRegX, tmpRegY					//collect high bits
		EMIT(LLaddReg, tmpRegX, tmpRegX, tmpRegY, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//mov tmpRegY, #0						//prepare a place to save carry out of adding low bits
		EMIT(LLmovImm, tmpRegY, 0, 0, EmitFlagsDoNotCare, false);
		
		//adds sampRegCur, sampRegNext			//add low bits and export C
		EMIT(LLaddReg, sampRegCur, sampRegCur, sampRegNext, EmitShiftLsl, 0, EmitSetFlags, false);
		
		//adc tmpRegY, tmpRegY	//capture C flag from the addition
		EMIT(LLadcReg, tmpRegY, tmpRegY, tmpRegY, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//lsr sampRegCur, #16					//shift out lowest bits we do not care for
		EMIT(LLmov, sampRegCur, sampRegCur, EmitShiftLsr, 16, EmitFlagsDoNotCare, false);
		
		//lsl tmpRegY, #16						//put carry into proper place
		EMIT(LLmov, tmpRegY, tmpRegY, EmitShiftLsl, 16, EmitFlagsDoNotCare, false);
		
		//add sampRegCur, tmpRegY				//put carry where it belongs
		EMIT(LLaddReg, sampRegCur, sampRegCur, tmpRegY, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//add sampRegCur, tmpRegX				//produce the final answer
		EMIT(LLaddReg, sampRegCur, sampRegCur, tmpRegX, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//pop {r0, r1}
	EMIT(HLpop, 0x0003);
	
	//store the resuts (they are in regCurSampL, regCurSampR);
	now = audioPrvMixInFuncEmitSampleExport(dest, regCurSampL, regCurSampR, regTmpA, regTmpB, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;

	//pop {regCurSampL, regCurSampR, regNextSampL, regNextSampR}
	EMIT(HLpop, (1 << regCurSampL) | (1 << regCurSampR) | (1 << regNextSampL) | (1 << regNextSampR));
	
	//see if we need a new table entry?
	
	//mov regTmpA, regNeedSampleLoad
	EMIT(LLmov, regTmpA, regNeedSampleLoad, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	#ifdef HAVE_v8M_BASE	//can use cbnz
	
		//cbnz regTmpA, do_load_sample
		EMIT(SaveSpace, &bneSpot, 1);
	#else
	
		//cmp regTmpA, #0
		EMIT(LLcmpImm, regTmpA, 0);
	
		//bne do_load_sample
		EMIT(SaveSpace, &bneSpot, 1);
	#endif
	
	// this is the path for when we DO NOT need a new sample
	
	//cmp regDstEndPtr, regDst	//see if we're done
	EMIT(LLcmpReg, regDstEndPtr, regDst, EmitShiftLsl, 0);
	
	//loop to start (without sample load) using bne
	EMIT(LLbranch, loopPostLoadSample, EmitCcNe);
	
	//source pointer now points to PAST what should be the "next" sample next run - adjust it
	//subs r1, in_sample_sz
	EMIT(LLsubImm, regSrc, regSrc, sampleSz, EmitSetFlags, false);
	
	//we'll need a jump here to exit code. save a slot for it
	EMIT(SaveSpace, &jumpToExitCodeSpot, 1);
	
	// this is the path for when we DO need a new sample
	#ifdef HAVE_v8M_BASE	//can use cbnz
	
		//set up that "cbnz" we saved a space for
		EMIT_TO(LLcbnz, &bneSpot, regTmpA, emitGetPtrToJumpHere(dest));
	
	#else
	
		//this is "do_load_sample", fill the above "bne"
		EMIT_TO(LLbranch, &bneSpot, emitGetPtrToJumpHere(dest), EmitCcNe);
	
	#endif
	
	//cmp regDstEndPtr, regDst	//see if we're done
	EMIT(LLcmpReg, regDstEndPtr, regDst, EmitShiftLsl, 0);
	
	//loop to start (with sample load) using bne (short one should work)
	EMIT(LLbranch, loopLoadSample, EmitCcNe);
	
	//nonetheless shift the "next" sample to "cur" one so we can stash it properly below
	EMIT(LLmov, regCurSampL, regNextSampL, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	if (nativeFmtIsStereo) {
		
		EMIT(LLmov, regCurSampR, regNextSampR, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//source pointer now points to what should be the "next" sample next run. Good
	
	// this is the common exit path. we saved a slot above to insert a jump to here. generate the jump
	EMIT_TO(LLbranch, &jumpToExitCodeSpot, emitGetPtrToJumpHere(dest), EmitCcAl);
	
	// we also saved a spot above when we ran out of input data, generate that beq
	EMIT_TO(LLbranch, &beqOutOfInput, emitGetPtrToJumpHere(dest), EmitCcEq);

	// save "src" pointer
	//ldr regTmpA, [sp, #ofst_to_pushed_r1]   //we need to store r1
	EMIT(LLloadImm, regTmpA, EMIT_REG_NO_SP, sizeof(uint32_t) * 4, EmitSzWord, false, EmitAdrModeIndex);
	
	//str regSrc, [regTmpA]
	EMIT(LLstoreImm, regSrc, regTmpA, 0, EmitSzWord, EmitAdrModeIndex);
	
	// save resamp state
	//ldr regTmpA, [sp, #ofst_to_pushed_r3]
	EMIT(LLloadImm, regTmpA, EMIT_REG_NO_SP, sizeof(uint32_t) * 5, EmitSzWord, false, EmitAdrModeIndex);
	
	//mov regTmpB, regTabPtr
	EMIT(LLmov, regTmpB, regTabPtr, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//stmia regTmpA!, {regCurSampL, regCurSampR, regTmpB}
	EMIT(LLstmia, regTmpA, (1 << regTmpB) | (1 << regCurSampL) | (1 << regCurSampR), true);

	//pop {r4-r7}	//pop r8..r11
	EMIT(HLpop, 0x00f0);

	//mov r8..r11, r4..r7
	for (i = 0; i < 4; i++)
		EMIT(LLmov, 8 + i, 4 + i, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

	//pop {r1,r3-r7, pc}
	EMIT(HLpop, 0x80fa);

	return EmitErrNone;
}

enum EmitStatus audioPrvStreamCreateOutputMixFuncGutsDownsample(struct EmitBuf *dest, const uint16_t* resampTab, enum AudioSampleType sampTyp, enum AudioChannelConfig chans, bool nativeFmtIsStereo)
{
	const uint8_t regDst = 0, regSrc = 1, regTmpC = 2, regResampState = 3, regCurSampL = 4, regCurSampR = 5, regTmpA = 6, regTmpB = 7;
	const uint8_t regTmpHi = 8, regTabPtr = 9, regVolL = 10, regVolR = 11, regDstEndPtr = 12, regSrcEndPtr = 14;
	
	const uint8_t stateOfstCurSampL = 0, stateOfstCurSampR = 1, stateOfstLeftSumLo = 2, stateOfstRightSumLo = 3;
	const uint8_t stateOfstLeftSumHi = 4, stateOfstRightSumHi = 5, stateOfstTabPtr = 6;
	
	uint32_t i, nCh = (chans == AudioStereo) ? 2 : 1, sampleSzShift = (mSampleShifts[sampTyp] + (nCh - 1));
	struct EmitBuf savedSpaceForJump;
	enum EmitStatus now;
	uintptr_t mainLoop;
	
	//int32_t* downsample(int32_t* dst, const uint8_t** srcP, uint32_t maxOutSamples, void* resampleStateP, uint32_t volumeL, uint32_t volumeR, uint32_t numInSamples)

	//r0 is dst
	//r1 is src
	//r2 is tmpRegC
	//r3 is resampState
	//r4 is CurSampL (or mono)
	//r5 is CurSampR
	//r6 is tmpRegA
	//r7 is tmpRegB	(used to keep tab value)
	//r8 is tmpRegHi
	//r9 is tab ptr
	//r10 is volume L (or mono)
	//r11 is volume R
	//r12 is dst endPtr
	//lr  is source end ptr
	
	
	//RESAMP STATE is:
	// [0] - regCurSampL (or mono)
	// [1] - regCurSampR
	// [2] - leftSum lo (or mono)
	// [3] - right sum lo
	// [4] - leftSum hi (or mono)
	// [5] - right sum hi
	// [6] - tabptr

	//push {r1, r4-r7, lr}
	EMIT(HLpush, 0x40f2);

	//mov r4..r7, r8..r11
	for (i = 0; i < 4; i++)
		EMIT(LLmov, 4 + i, 8 + i, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

	//push {r4-r7}	//push r8..r11 and one word of space
	EMIT(HLpush, 0x00f0);

	//ldr regSrc, [r1]							//get source pointer
	EMIT(LLloadImm, regSrc, 1, 0, EmitSzWord, false, EmitAdrModeIndex);
	
	//calculate source end pointer
	//ldr tmpRegA, [sp, #ofst_to_numInSamples]
	EMIT(LLloadImm, regTmpA, EMIT_REG_NO_SP, 12 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//lsl regTmpA, #sampleSzShift
	EMIT(LLmov, regTmpA, regTmpA, EmitShiftLsl, sampleSzShift, EmitFlagsDoNotCare, false);
	
	//add regTmpA, regSrc
	EMIT(LLaddReg, regTmpA, regTmpA, regSrc, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//mov regSrcEndPtr, regTmpA
	EMIT(LLmov, regSrcEndPtr, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//calculate destination end pointer
	//lsl regTmpA, r2, #2 + isStereo
	EMIT(LLmov, regTmpA, 2, EmitShiftLsl, nativeFmtIsStereo ? 3 : 2, EmitFlagsDoNotCare, false);
	
	//add regTmpA, regDst		//add in start pointer
	EMIT(LLaddReg, regTmpA, regTmpA, regDst, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//mov regDstEndPtr, regTmpA
	EMIT(LLmov, regDstEndPtr, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//ldr regTmpA, [sp, #ofst_to_volume_L]
	EMIT(LLloadImm, regTmpA, EMIT_REG_NO_SP, 10 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//ldr regTmpB, [sp, #ofst_to_volume_R]
	EMIT(LLloadImm, regTmpB, EMIT_REG_NO_SP, 11 * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//if input & output are both mono, we need to calculate average volume (we'll store it in r10)
	if (!nativeFmtIsStereo && chans == AudioMono) {
		
		//add regTmpA, regTmpB
		EMIT(LLaddReg, regTmpA, regTmpA, regTmpB, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//lsr regTmpA, #1
		EMIT(LLmov, regTmpA, regTmpA, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
	}
	else {
		
		//mov regVolR, regTmpB
		EMIT(LLmov, regVolR, regTmpB, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//mov regVolL, regTmpA
	EMIT(LLmov, regVolL, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//ldr regCurSampL, [state, #4 * stateOfstTabPtr]		//load CurSampL
	EMIT(LLloadImm, regCurSampL, regResampState, stateOfstCurSampL * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//ldr regCurSampR, [state, #4 * stateOfstTabPtr]		//load CurSampR
	EMIT(LLloadImm, regCurSampR, regResampState, stateOfstCurSampR * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//ldr regTmpA, [state, #4 * stateOfstTabPtr]		//load tab ptr
	EMIT(LLloadImm, regTmpA, regResampState, stateOfstTabPtr * sizeof(uint32_t), EmitSzWord, false, EmitAdrModeIndex);
	
	//mov regTabPtr, regTmpA
	EMIT(LLmov, regTabPtr, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//loop:
	mainLoop = emitGetPtrToJumpHere(dest);
	
	//load tab entry
	
	//mov regTmpA, regTabPtr
	EMIT(LLmov, regTmpA, regTabPtr, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//ldrh regTmpB, [regTmpA] //grab resamp tab value
	EMIT(LLloadImm, regTmpB, regTmpA, 0, EmitSzHalfword, false, EmitAdrModeIndex);

	//add regTmpA, sizeof(uint16_t)
	EMIT(LLaddImm, regTmpA, regTmpA, sizeof(uint16_t), EmitFlagsDoNotCare, false);
	
	#ifdef HAVE_v8M_BASE	//can use cbnz
		
		//save space for "cbnz regTmpB" to skip table reload
		EMIT(SaveSpace, &savedSpaceForJump, 1);
		
	#else
	
		//cmp regTmpB, #0
		EMIT(LLcmpImm, regTmpB, 0);
		
		//save space for a "bne" to skip table reload
		EMIT(SaveSpace, &savedSpaceForJump, 1);
	
	#endif
	
	
	//ldr regTmpA, =resampTab + 1	//reload table. point right to elem idx 1 since we'll inline idx 0 here
	EMIT(HLloadImmToReg, regTmpA, (uintptr_t)(resampTab + 1), true, true, false);
	
	//ldr regTmpB, =resampTab[0]	//reload table. point right to elem idx 1 since we'll inline idx 0 here
	EMIT(HLloadImmToReg, regTmpB, resampTab[0], true, true, false);
	
	#ifdef HAVE_v8M_BASE	//can use cbnz
	
		//set up that "cbnz" we saved a space for
		EMIT_TO(LLcbnz, &savedSpaceForJump, regTmpB, emitGetPtrToJumpHere(dest));
	
	#else
	
		//set up that "bne" we saved a space for
		EMIT_TO(LLbranch, &savedSpaceForJump, emitGetPtrToJumpHere(dest), EmitCcNe);
	
	#endif
	
	//mov regTabPtr, regTmpA
	EMIT(LLmov, regTabPtr, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//lsls tabentry(aka regTmpB), #17
	EMIT(LLmov, regTmpB, regTmpB, EmitShiftLsl, 17, EmitSetFlags, false);

	//save space for "bcc skip_load_sample"
	EMIT(SaveSpace, &savedSpaceForJump, 1);
	
	//get "next" sample
	now = audioPrvEmitLoadSample(dest, regCurSampL, regCurSampR, regTmpA, sampTyp, chans);
	if (now != EmitErrNone)
		return now;
		
	//skip_load_sample:
	
	//fill in that jump above to skip loading the sample
	EMIT_TO(LLbranch, &savedSpaceForJump, emitGetPtrToJumpHere(dest), EmitCcCc);
	
	//mov regTmpA, #0
	EMIT(LLmovImm, regTmpA, 0, 0, EmitFlagsDoNotCare, false);
	
	//lsrs tabentry(aka regTmpB), #18		//now only has the multiplier (in bottom 14 bits), and C bit has whether we need to emit
	EMIT(LLmov, regTmpB, regTmpB, EmitShiftLsr, 18, EmitSetFlags, false);

	//adc regTmpA, regTmpA					//1 if we need to emit
	EMIT(LLadcReg, regTmpA, regTmpA, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

	//lsl tabentry(aka regTmpB), #2		//now only has the multiplier (in 16 bits for convenience)
	EMIT(LLmov, regTmpB, regTmpB, EmitShiftLsl, 2, EmitSetFlags, false);

	//mov regTmpHi, regTmpA			//save it there
	EMIT(LLmov, regTmpHi, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//multiply it into the sums
	for (i = 0; i < nCh; i++) {
		uint32_t ofstLo = i ? stateOfstRightSumLo : stateOfstLeftSumLo;
		uint32_t ofstHi = i ? stateOfstRightSumHi : stateOfstLeftSumHi;
		uint32_t regCurSamp = i ? regCurSampR : regCurSampL;
		
		//uxth regTmpA, regCurSamp
		EMIT(LLextend, regTmpA, regCurSamp, 0, false, true);

		//mul regTmpA, tabentry(aka regTmpB)
		EMIT(LLmulReg, regTmpA, regTmpA, regTmpB, EmitFlagsDoNotCare, false);

		//ldr regTmpC, [state, #4 * ofstLo]
		EMIT(LLloadImm, regTmpC, regResampState, sizeof(uint32_t) * ofstLo, EmitSzWord, false, EmitAdrModeIndex);

		//adds regTmpC, regTmpA
		EMIT(LLaddReg, regTmpC, regTmpC, regTmpA, EmitShiftLsl, 0, EmitSetFlags, false);
		
		//str regTmpC, [state, #4 * ofstLo]
		EMIT(LLstoreImm, regTmpC, regResampState, sizeof(uint32_t) * ofstLo, EmitSzWord, EmitAdrModeIndex);
	
		//mov regTmpC, #0		///does not affect C flag (!IMPORTANT!)
		EMIT(LLmovImm, regTmpC, 0, 0, EmitFlagsDoNotCare, false);
		
		//adc regTmpC, regTmpC					//1 if we carried
		EMIT(LLadcReg, regTmpC, regTmpC, regTmpC, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//lsl regTmpC, #16						//move carry into proper place
		EMIT(LLmov, regTmpC, regTmpC, EmitShiftLsl, 16, EmitFlagsDoNotCare, false);
		
		//ldr regTmpA, [state, #4 * ofstHi]
		EMIT(LLloadImm, regTmpA, regResampState, sizeof(uint32_t) * ofstHi, EmitSzWord, false, EmitAdrModeIndex);
		
		//add regTmpC, regTmpA		//add in the carry
		EMIT(LLaddReg, regTmpC, regTmpC, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//asr regTmpA, regCurSamp, #16
		EMIT(LLmov, regTmpA, regCurSamp, EmitShiftAsr, 16, EmitFlagsDoNotCare, false);

		//mul regTmpA, tabentry(aka regTmpB)
		EMIT(LLmulReg, regTmpA, regTmpA, regTmpB, EmitFlagsDoNotCare, false);

		//add regTmpC, regTmpA		//add in the carry
		EMIT(LLaddReg, regTmpC, regTmpC, regTmpA, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//str regTmpC, [state, #4 * ofstHi]
		EMIT(LLstoreImm, regTmpC, regResampState, sizeof(uint32_t) * ofstHi, EmitSzWord, EmitAdrModeIndex);
	}
	
	//mov regTmpA, regTmpHi			//grab it
	EMIT(LLmov, regTmpA, regTmpHi, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

	#ifdef HAVE_v8M_BASE	//can use cbnz
	
		//save space for "cbz regTmpA, noemit"
		EMIT(SaveSpace, &savedSpaceForJump, 1);
	
	#else
	
		//cmp regTmpA, #0
		EMIT(LLcmpImm, regTmpA, 0);
	
		//save space for "beq noemit"
		EMIT(SaveSpace, &savedSpaceForJump, 1);
	#endif
	
	//collapse the samples to a single reg
	for (i = 0; i < nCh; i++) {
		uint32_t ofstLo = i ? stateOfstRightSumLo : stateOfstLeftSumLo;
		uint32_t ofstHi = i ? stateOfstRightSumHi : stateOfstLeftSumHi;
		uint32_t regOut = i ? regTmpB : regTmpA;
		
		//ldr regTmpC, [state, #4 * ofstLo]
		EMIT(LLloadImm, regTmpC, regResampState, sizeof(uint32_t) * ofstLo, EmitSzWord, false, EmitAdrModeIndex);
		
		//ldr regOut, [state, #4 * ofstHi]
		EMIT(LLloadImm, regOut, regResampState, sizeof(uint32_t) * ofstHi, EmitSzWord, false, EmitAdrModeIndex);
		
		//lsr regTmpC, #16
		EMIT(LLmov, regTmpC, regTmpC, EmitShiftLsr, 16, EmitFlagsDoNotCare, false);
		
		//add regOut, regTmpC
		EMIT(LLaddReg, regOut, regOut, regTmpC, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//mov regTmpHi, resampState			//free up resampState reg
	EMIT(LLmov, regTmpHi, regResampState, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//scale by volume and maybe merge
	now = audioPrvMixInFuncScaleChansByVolume(dest, regTmpA, regTmpB, regVolL, regVolR, regTmpC, regResampState, chans, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
		
	//emit samples (leftSum, rightSum, hi regs free as temps)
	now = audioPrvMixInFuncEmitSampleExport(dest, regTmpA, regTmpB, regTmpC, regResampState, nativeFmtIsStereo);
	if (now != EmitErrNone)
		return now;
	
	//mov resampState, regTmpHi 			//restore resampState reg
	EMIT(LLmov, regResampState, regTmpHi, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//we need to zero all the result regs now
	EMIT(HLloadImmToReg, regTmpA, 0, true, true, false);
	
	//str regTmpA, [state, #4 * stateOfstRightSumLo]
	EMIT(LLstoreImm, regTmpA, regResampState, sizeof(uint32_t) * stateOfstLeftSumLo, EmitSzWord, EmitAdrModeIndex);
	EMIT(LLstoreImm, regTmpA, regResampState, sizeof(uint32_t) * stateOfstRightSumLo, EmitSzWord, EmitAdrModeIndex);
	EMIT(LLstoreImm, regTmpA, regResampState, sizeof(uint32_t) * stateOfstLeftSumHi, EmitSzWord, EmitAdrModeIndex);
	EMIT(LLstoreImm, regTmpA, regResampState, sizeof(uint32_t) * stateOfstRightSumHi, EmitSzWord, EmitAdrModeIndex);
	
	//noemit:
	
	#ifdef HAVE_v8M_BASE	//can use cbnz

		//fill in that cbz above to skip producing a sample
		EMIT_TO(LLcbz, &savedSpaceForJump, regTmpA, emitGetPtrToJumpHere(dest));
		
	#else
	
		//fill in that jump above to skip producing a sample
		EMIT_TO(LLbranch, &savedSpaceForJump, emitGetPtrToJumpHere(dest), EmitCcEq);
	#endif
	
	//cmp regDst, regDstEndPtr		//see if we're done with the output
	EMIT(LLcmpReg, regDst, regDstEndPtr, EmitShiftLsl, 0);
	
	//if yes, bail
	EMIT(SaveSpace, &savedSpaceForJump, 1);
	
	//cmp regSrc, regSrcEndPtr		//or maybe we're done with output?
	EMIT(LLcmpReg, regSrc, regSrcEndPtr, EmitShiftLsl, 0);
	
	//if both not done, go loop around
	EMIT(LLbranch, mainLoop, EmitCcNe);

	//bail jump above is to here - emit it
	EMIT_TO(LLbranch, &savedSpaceForJump, emitGetPtrToJumpHere(dest), EmitCcEq);

	//loop is over - save state

	// save "src" pointer
	//ldr r2, [sp, #4 * 4]   //we need to store regSrc
	EMIT(LLloadImm, 2, EMIT_REG_NO_SP, sizeof(uint32_t) * 4, EmitSzWord, false, EmitAdrModeIndex);
	
	//str regSrc, [r2]
	EMIT(LLstoreImm, regSrc, 2, 0, EmitSzWord, EmitAdrModeIndex);
	
	// save resamp state
	
	//str regCurSampL, [state, #4 * stateOfstTabPtr]		//store CurSampL
	EMIT(LLstoreImm, regCurSampL, regResampState, stateOfstCurSampL * sizeof(uint32_t), EmitSzWord, EmitAdrModeIndex);
	
	//str regCurSampR, [state, #4 * stateOfstTabPtr]		//store CurSampR
	EMIT(LLstoreImm, regCurSampR, regResampState, stateOfstCurSampR * sizeof(uint32_t), EmitSzWord, EmitAdrModeIndex);
	
	//mov regTmpA, regTabPtr 
	EMIT(LLmov, regTmpA, regTabPtr, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//str regTmpA, [state, #4 * stateOfstTabPtr]			//store tab ptr
	EMIT(LLstoreImm, regTmpA, regResampState, stateOfstTabPtr * sizeof(uint32_t), EmitSzWord, EmitAdrModeIndex);

	//pop {r4-r7}	//pop r8..r11
	EMIT(HLpop, 0x00f0);

	//mov r8..r11, r4..r7
	for (i = 0; i < 4; i++)
		EMIT(LLmov, 8 + i, 4 + i, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

	//pop {r1,r4-r7, pc}
	EMIT(HLpop, 0x80f2);

	return EmitErrNone;
}

static enum EmitStatus audioPrvMicSignedSat(struct EmitBuf *dest, uint_fast8_t regData, uint_fast8_t regTemp, uint_fast8_t toBits)
{
	struct EmitBuf beqOut1, beqOut2, branchToOut, bplSatPositive;
	
	//theory of operation:
	// 1. arithmetically shift out to the right all bits we want to remain useful but one
	// 2. if remaining bits are all the same, we're done
	// 3. else we need to replace the value with saturated value:
	
	//asrs rTmp, rData, #26
	EMIT(LLmov, regTemp, regData, EmitShiftAsr, toBits - 1, EmitSetFlags, false);
	
	//beq out				//result was zero
	EMIT(SaveSpace, &beqOut1, 1);
	
	//bpl satPositive		//result needs to be positive
	EMIT(SaveSpace, &bplSatPositive, 1);
	
	//adds rTmp, #1		//check for -1
	EMIT(LLaddImm, regTemp, regTemp, 1, EmitSetFlags, false);
	
	//beq out				//result was zero
	EMIT(SaveSpace, &beqOut2, 1);
	
//sat_negative:
	//ldr rData, MIN_VAL	//if we are here, we need to saturate to negative
	EMIT(HLloadImmToReg, regData, 1UL << (toBits - 1), true, true, false);
	
	//b out					//done
	EMIT(SaveSpace, &branchToOut, 1);
	
//sat_positive
	EMIT_TO(LLbranch, &bplSatPositive, emitGetPtrToJumpHere(dest), EmitCcPl);
	
	//ldr rData, MAX_VAL	//if we are here, we need to saturate to positive
	EMIT(HLloadImmToReg, regData, (1UL << (toBits - 1)) - 1, true, true, false);
	
//out:
	EMIT_TO(LLbranch, &beqOut1, emitGetPtrToJumpHere(dest), EmitCcEq);
	EMIT_TO(LLbranch, &beqOut2, emitGetPtrToJumpHere(dest), EmitCcEq);
	EMIT_TO(LLbranch, &branchToOut, emitGetPtrToJumpHere(dest), EmitCcAl);
	
	return EmitErrNone;
}

static float audioMicPrvToFloat(int32_t i)
{
	float ret = i;
	
	return ret / 33554432.f;
}

//prototype is void* MicCvtF(void* dst, const int16_t *src, uint32_t volumeL, uint32_t volumeR, uint32_t nSamplesOver2);
//source guaranteed four byte aligned always
enum EmitStatus audioPrvMicCreateConvertFunc(struct EmitBuf *dest, enum AudioSampleType sampTyp, enum AudioChannelConfig chans)
{
	bool stereo = chans != AudioMono, unsign = false, bswap = false;
	uint_fast8_t width, widthShift = 0;
	enum EmitStatus now;
	uintptr_t loopStart;
	
	//lsr r1, #1									//yes this is weird, but it is so we can use ldrsh effectively!
	EMIT(LLmov, 1, 1, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
	
	if (!stereo) {
		
		//add r2, r3
		EMIT(LLaddReg, 2, 2, 3, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

		//lsrs r2, #1
		EMIT(LLmov, 2, 2, EmitShiftLsr, 1, EmitFlagsDoNotCare, false);
	}
	
	//push {r4-r7, lr}
	EMIT(HLpush, 0x40f0);
	
	switch (sampTyp) {
		case AudioSampleU8:
			unsign = true;
			//fallthrough
		case AudioSampleS8:
			width = 8;
			break;
		
		case AudioSampleU16BE:
			bswap = true;
			//fallthrough
		
		case AudioSampleU16LE:
			unsign = true;
			//fallthrough
			
		case AudioSampleS16BE:
			if (sampTyp != AudioSampleU16LE)
				bswap = true;
			//fallthrough
			
		case AudioSampleS16LE:
			width = 16;
			widthShift = 1;
			break;
		
		case AudioSampleU32BE:
			bswap = true;
			//fallthrough
		
		case AudioSampleU32LE:
			unsign = true;
			//fallthrough
			
		case AudioSampleFloatBE:
		case AudioSampleS32BE:
			if (sampTyp != AudioSampleU32LE)
				bswap = true;
			//fallthrough
			
		case AudioSampleFloatLE:
		case AudioSampleS32LE:
			width = 32;
			widthShift = 2;
			break;
		
		default:
			return EmitErrInvalidInput;
	}

	//ldr r4, [sp, #proper_ofst]					//get nSamplesOver2 into r4
	EMIT(LLloadImm, 4, EMIT_REG_NO_SP, 0x14, EmitSzWord, false, EmitAdrModeIndex);
	
	//lsls r4, #(stereo + widthShift + 1)			//convert to number of bytes (rememebr we had samplesOver2)
	EMIT(LLmov, 4, 4, EmitShiftLsl, widthShift + (stereo ? 1 : 0), EmitFlagsDoNotCare, false);
	
	//add r4, r0									//calc dst's end
	EMIT(LLaddReg, 4, 4, 0, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	//mov r12, r4
	EMIT(LLmov, 12, 4, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
	if (unsign) {
		
		//load value to add to convert to unsigned
		EMIT(HLloadImmToReg, 4, 1UL << 25, true, true, false);
	}

	//loopstart:
	loopStart = emitGetPtrToJumpHere(dest);
	
	//ldrsh r6, [r1, r1]	//load a sample
	EMIT(LLloadRegReg, 6, 1, 1, 0, EmitSzHalfword, true);

	//add r1, #1			//point to next
	EMIT(LLaddImm, 1, 1, 1, EmitFlagsDoNotCare, false);

	if (stereo) {
		
		//mov r7, r6
		EMIT(LLmov, 7, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
		//mul r7, volR
		EMIT(LLmulReg, 7, 7, 3, EmitFlagsDoNotCare, false);
	}
	
	//mul r6, volL
	EMIT(LLmulReg, 6, 6, 2, EmitFlagsDoNotCare, false);
	
	//now we need to saturate to 26 bits (use r5 for temp)
	if (stereo) {
		
		now = audioPrvMicSignedSat(dest, 7, 5, 26);
		if (now != EmitErrNone)
			return now;
	}
	now = audioPrvMicSignedSat(dest, 6, 5, 26);
	if (now != EmitErrNone)
		return now;

	//if we need unsignedness, do so now
	if (unsign) {
		if (stereo) {
		
			//add r7, r4
			EMIT(LLaddReg, 7, 7, 4, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		}
		
		//add r6, r4
		EMIT(LLaddReg, 6, 6,4, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	}
	
	//convert to desired size, swap as needed, write
	if (width == 8) {
		
		if (stereo) {
		
			//lsr r7, #18
			EMIT(LLmov, 7, 7, EmitShiftLsr, 18, EmitFlagsDoNotCare, false);
			
			//strb r7, [dst, #1]
			EMIT(LLstoreImm, 7, 0, 1, EmitSzByte, EmitAdrModeIndex);
		}
		
		//lsr r6, #18
		EMIT(LLmov, 6, 6, EmitShiftLsr, 18, EmitFlagsDoNotCare, false);
		
		//strb r6, [dst, #0]
		EMIT(LLstoreImm, 6, 0, 0, EmitSzByte, EmitAdrModeIndex);
		
		//add dst, #proper_sz
		EMIT(LLaddImm, 0, 0, sizeof(uint8_t) * (stereo ? 2 : 1), EmitFlagsDoNotCare, false);
	}
	else if (width == 16) {
		
		if (stereo) {
		
			//lsr r7, #10
			EMIT(LLmov, 7, 7, EmitShiftLsr, 10, EmitFlagsDoNotCare, false);
			
			if (bswap) {
				
				//rev16 r7, r7
				EMIT(LLrev16, 7, 7);
			}
			
			//strh r7, [dst, #2]
			EMIT(LLstoreImm, 7, 0, 2, EmitSzHalfword, EmitAdrModeIndex);
		}
		
		//lsr r6, #10
		EMIT(LLmov, 6, 6, EmitShiftLsr, 10, EmitFlagsDoNotCare, false);
		
		if (bswap) {
				
			//rev16 r6, r6
			EMIT(LLrev16, 6, 6);
		}
			
		//strh r6, [dst, #0]
		EMIT(LLstoreImm, 6, 0, 0, EmitSzHalfword, EmitAdrModeIndex);
		
		//add dst, #proper_sz
		EMIT(LLaddImm, 0, 0, sizeof(uint16_t) * (stereo ? 2 : 1), EmitFlagsDoNotCare, false);
	}
	else if (sampTyp != AudioSampleFloatLE && sampTyp != AudioSampleFloatBE) {
		
		if (stereo) {
		
			//lsl r7, #6
			EMIT(LLmov, 7, 7, EmitShiftLsl, 6, EmitFlagsDoNotCare, false);
			
			if (bswap) {
				
				//rev r7, r7
				EMIT(LLrev, 7, 7);
			}
		}
		
		//lsl r6, #6
		EMIT(LLmov, 6, 6, EmitShiftLsl, 6, EmitFlagsDoNotCare, false);
		
		if (bswap) {
				
			//rev r6, r6
			EMIT(LLrev, 7, 7);
		}
		
		//stmia r0!, {regs}
		EMIT(LLstmia, 0, (1 << 6) | (stereo ? (1 << 7) : 0), true);
	}
	else {	//float output
		
		//push {r0-r5}
		EMIT(HLpush, 0x003f);
		
		//mov r4, r12
		EMIT(LLmov, 4, 12, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
		//ldr r5, =toFloatF
		EMIT(HLloadImmToReg, 5, (uintptr_t)&audioMicPrvToFloat, true, true, false);
		
		if (stereo) {
			
			//mov r0, r7
			EMIT(LLmov, 0, 7, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
		
			//blx 5
			EMIT(LLblx, 5);

			//mov r7, r0
			EMIT(LLmov, 7, 0, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
			
			if (bswap) {
				
				//rev r7, r7
				EMIT(LLrev, 7, 7);
			}
		}
		
		//mov r0, r6
		EMIT(LLmov, 0, 6, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
		//blx 5
		EMIT(LLblx, 5);

		//mov r6, r0
		EMIT(LLmov, 6, 0, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);

		if (bswap) {
				
			//rev r6, r6
			EMIT(LLrev, 6, 6);
		}

		//mov r12, r4
		EMIT(LLmov, 12, 4, EmitShiftLsl, 0, EmitFlagsDoNotCare, false);
	
		//pop {r0-r5}
		EMIT(HLpop, 0x003f);
		
		//stmia r0!, {regs}
		EMIT(LLstmia, 0, (1 << 6) | (stereo ? (1 << 7) : 0), true);
	}
	
	//cmp r0, r12
	EMIT(LLcmpReg, 0, 12, EmitShiftLsl, 0);

	//bne loopstart
	EMIT(LLbranch, loopStart, EmitCcNe);
	
	//pop {r4-r7, pc}
	EMIT(HLpop, 0x80f0);

	return EmitErrNone;
}

