Some Optimizations

This commit is contained in:
ChibiDenDen 2015-04-17 02:05:15 +03:00
parent 3ee9f6c5d8
commit b25a9c60a9
4 changed files with 345 additions and 294 deletions

View File

@ -247,10 +247,11 @@ static void LnSWoUB(ImmediateOffset)(ARMul_State* cpu, unsigned int inst, unsign
unsigned int Rn = BITS(inst, 16, 19); unsigned int Rn = BITS(inst, 16, 19);
unsigned int addr; unsigned int addr;
if (U_BIT) //if (U_BIT)
addr = CHECK_READ_REG15_WA(cpu, Rn) + OFFSET_12; // addr = CHECK_READ_REG15_WA(cpu, Rn) + OFFSET_12;
else //else
addr = CHECK_READ_REG15_WA(cpu, Rn) - OFFSET_12; // addr = CHECK_READ_REG15_WA(cpu, Rn) - OFFSET_12;
addr = CHECK_READ_REG15_WA(cpu, Rn) + (OFFSET_12 ^ (U_BIT - 1)) + (U_BIT ^ 1);
virt_addr = addr; virt_addr = addr;
} }
@ -1126,6 +1127,22 @@ int CondPassed(ARMul_State* cpu, unsigned int cond) {
#define CFLAG cpu->CFlag #define CFLAG cpu->CFlag
#define VFLAG cpu->VFlag #define VFLAG cpu->VFlag
//uint8_t bits =
// (ZFLAG << 0) | // EQ, NE
// (CFLAG << 1) | // CS, CC
// (NFLAG << 2) | // MI, PL
// (VFLAG << 3) | // VS, VC
// ((CFLAG & ~ZFLAG) << 4) | // HI, LS
// ((NFLAG ^ VFLAG ^ 1) << 5) | // GE, LT
// (((NFLAG ^ VFLAG ^ 1) & ~ZFLAG) << 6) | // GT, LE
// (1 << 7);
// return ((bits >> (cond >> 1)) & 1) ^ (cond & 1) ^ ((cond + 1) >> 4);
uint8_t bits[8] = { ZFLAG, CFLAG, NFLAG, VFLAG, CFLAG & ~ZFLAG, NFLAG ^ VFLAG ^ 1, ~ZFLAG, 1 };
bits[6] &= bits[5];
return bits[cond >> 1] ^ (cond & 1) ^ ((cond + 1) >> 4);
/*
int temp = 0; int temp = 0;
switch (cond) { switch (cond) {
@ -1178,7 +1195,9 @@ int CondPassed(ARMul_State* cpu, unsigned int cond) {
temp = 1; temp = 1;
break; break;
} }
return temp; return temp;
*/
} }
enum DECODE_STATUS { enum DECODE_STATUS {
@ -3533,15 +3552,42 @@ const transop_fp_t arm_instruction_trans[] = {
INTERPRETER_TRANSLATE(blx_1_thumb) INTERPRETER_TRANSLATE(blx_1_thumb)
}; };
typedef std::unordered_map<u32, int> bb_map; //typedef std::unordered_map<u32, int> bb_map;
static bb_map CreamCache; //static bb_map CreamCache;
static int *CreamCache = nullptr, CreamCacheSize = 0;
static void insert_bb(unsigned int addr, int start) { static void insert_bb(unsigned int addr, int start) {
CreamCache[addr] = start; //CreamCache[addr] = start;
if (addr >= CreamCacheSize)
{
unsigned int v = addr;
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
auto NewPtr = new int[v];
if (CreamCache)
{
memcpy(NewPtr, CreamCache, CreamCacheSize * 4);
delete [] CreamCache;
}
memset(NewPtr + CreamCacheSize, 0xFF, (v - CreamCacheSize) * 4);
CreamCache = NewPtr;
CreamCacheSize = v;
}
CreamCache[addr] = start;
} }
static int find_bb(unsigned int addr, int& start) { static int find_bb(unsigned int addr, int& start) {
int ret = -1; if (addr >= CreamCacheSize) return -1;
start = CreamCache[addr];
if (start == -1) return -1;
return 0;
/*int ret = -1;
bb_map::const_iterator it = CreamCache.find(addr); bb_map::const_iterator it = CreamCache.find(addr);
if (it != CreamCache.end()) { if (it != CreamCache.end()) {
start = static_cast<int>(it->second); start = static_cast<int>(it->second);
@ -3549,7 +3595,7 @@ static int find_bb(unsigned int addr, int& start) {
} else { } else {
ret = -1; ret = -1;
} }
return ret; return ret;*/
} }
enum { enum {
@ -3716,6 +3762,9 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
#define INC_PC(l) ptr += sizeof(arm_inst) + l #define INC_PC(l) ptr += sizeof(arm_inst) + l
unsigned int num_instrs = 0;
ARMul_State* cpu = state;
arm_inst* inst_base;
// GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback to a // GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback to a
// clunky switch statement. // clunky switch statement.
#if defined __GNUC__ || defined __clang__ #if defined __GNUC__ || defined __clang__
@ -3724,208 +3773,211 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
num_instrs++; \ num_instrs++; \
goto *InstLabel[inst_base->idx] goto *InstLabel[inst_base->idx]
#else #else
#define GOTO_NEXT_INST \ goto next_inst_end;
if (num_instrs >= cpu->NumInstrsToExecute) goto END; \ #define GOTO_NEXT_INST goto next_inst;
num_instrs++; \ next_inst:
switch(inst_base->idx) { \ if (num_instrs >= cpu->NumInstrsToExecute) goto END;
case 0: goto VMLA_INST; \ num_instrs++;
case 1: goto VMLS_INST; \ switch(inst_base->idx) {
case 2: goto VNMLA_INST; \ case 0: goto VMLA_INST;
case 3: goto VNMLA_INST; \ case 1: goto VMLS_INST;
case 4: goto VNMLS_INST; \ case 2: goto VNMLA_INST;
case 5: goto VNMUL_INST; \ case 3: goto VNMLA_INST;
case 6: goto VMUL_INST; \ case 4: goto VNMLS_INST;
case 7: goto VADD_INST; \ case 5: goto VNMUL_INST;
case 8: goto VSUB_INST; \ case 6: goto VMUL_INST;
case 9: goto VDIV_INST; \ case 7: goto VADD_INST;
case 10: goto VMOVI_INST; \ case 8: goto VSUB_INST;
case 11: goto VMOVR_INST; \ case 9: goto VDIV_INST;
case 12: goto VABS_INST; \ case 10: goto VMOVI_INST;
case 13: goto VNEG_INST; \ case 11: goto VMOVR_INST;
case 14: goto VSQRT_INST; \ case 12: goto VABS_INST;
case 15: goto VCMP_INST; \ case 13: goto VNEG_INST;
case 16: goto VCMP2_INST; \ case 14: goto VSQRT_INST;
case 17: goto VCVTBDS_INST; \ case 15: goto VCMP_INST;
case 18: goto VCVTBFF_INST; \ case 16: goto VCMP2_INST;
case 19: goto VCVTBFI_INST; \ case 17: goto VCVTBDS_INST;
case 20: goto VMOVBRS_INST; \ case 18: goto VCVTBFF_INST;
case 21: goto VMSR_INST; \ case 19: goto VCVTBFI_INST;
case 22: goto VMOVBRC_INST; \ case 20: goto VMOVBRS_INST;
case 23: goto VMRS_INST; \ case 21: goto VMSR_INST;
case 24: goto VMOVBCR_INST; \ case 22: goto VMOVBRC_INST;
case 25: goto VMOVBRRSS_INST; \ case 23: goto VMRS_INST;
case 26: goto VMOVBRRD_INST; \ case 24: goto VMOVBCR_INST;
case 27: goto VSTR_INST; \ case 25: goto VMOVBRRSS_INST;
case 28: goto VPUSH_INST; \ case 26: goto VMOVBRRD_INST;
case 29: goto VSTM_INST; \ case 27: goto VSTR_INST;
case 30: goto VPOP_INST; \ case 28: goto VPUSH_INST;
case 31: goto VLDR_INST; \ case 29: goto VSTM_INST;
case 32: goto VLDM_INST ; \ case 30: goto VPOP_INST;
case 33: goto SRS_INST; \ case 31: goto VLDR_INST;
case 34: goto RFE_INST; \ case 32: goto VLDM_INST ;
case 35: goto BKPT_INST; \ case 33: goto SRS_INST;
case 36: goto BLX_INST; \ case 34: goto RFE_INST;
case 37: goto CPS_INST; \ case 35: goto BKPT_INST;
case 38: goto PLD_INST; \ case 36: goto BLX_INST;
case 39: goto SETEND_INST; \ case 37: goto CPS_INST;
case 40: goto CLREX_INST; \ case 38: goto PLD_INST;
case 41: goto REV16_INST; \ case 39: goto SETEND_INST;
case 42: goto USAD8_INST; \ case 40: goto CLREX_INST;
case 43: goto SXTB_INST; \ case 41: goto REV16_INST;
case 44: goto UXTB_INST; \ case 42: goto USAD8_INST;
case 45: goto SXTH_INST; \ case 43: goto SXTB_INST;
case 46: goto SXTB16_INST; \ case 44: goto UXTB_INST;
case 47: goto UXTH_INST; \ case 45: goto SXTH_INST;
case 48: goto UXTB16_INST; \ case 46: goto SXTB16_INST;
case 49: goto CPY_INST; \ case 47: goto UXTH_INST;
case 50: goto UXTAB_INST; \ case 48: goto UXTB16_INST;
case 51: goto SSUB8_INST; \ case 49: goto CPY_INST;
case 52: goto SHSUB8_INST; \ case 50: goto UXTAB_INST;
case 53: goto SSUBADDX_INST; \ case 51: goto SSUB8_INST;
case 54: goto STREX_INST; \ case 52: goto SHSUB8_INST;
case 55: goto STREXB_INST; \ case 53: goto SSUBADDX_INST;
case 56: goto SWP_INST; \ case 54: goto STREX_INST;
case 57: goto SWPB_INST; \ case 55: goto STREXB_INST;
case 58: goto SSUB16_INST; \ case 56: goto SWP_INST;
case 59: goto SSAT16_INST; \ case 57: goto SWPB_INST;
case 60: goto SHSUBADDX_INST; \ case 58: goto SSUB16_INST;
case 61: goto QSUBADDX_INST; \ case 59: goto SSAT16_INST;
case 62: goto SHADDSUBX_INST; \ case 60: goto SHSUBADDX_INST;
case 63: goto SHADD8_INST; \ case 61: goto QSUBADDX_INST;
case 64: goto SHADD16_INST; \ case 62: goto SHADDSUBX_INST;
case 65: goto SEL_INST; \ case 63: goto SHADD8_INST;
case 66: goto SADDSUBX_INST; \ case 64: goto SHADD16_INST;
case 67: goto SADD8_INST; \ case 65: goto SEL_INST;
case 68: goto SADD16_INST; \ case 66: goto SADDSUBX_INST;
case 69: goto SHSUB16_INST; \ case 67: goto SADD8_INST;
case 70: goto UMAAL_INST; \ case 68: goto SADD16_INST;
case 71: goto UXTAB16_INST; \ case 69: goto SHSUB16_INST;
case 72: goto USUBADDX_INST; \ case 70: goto UMAAL_INST;
case 73: goto USUB8_INST; \ case 71: goto UXTAB16_INST;
case 74: goto USUB16_INST; \ case 72: goto USUBADDX_INST;
case 75: goto USAT16_INST; \ case 73: goto USUB8_INST;
case 76: goto USADA8_INST; \ case 74: goto USUB16_INST;
case 77: goto UQSUBADDX_INST; \ case 75: goto USAT16_INST;
case 78: goto UQSUB8_INST; \ case 76: goto USADA8_INST;
case 79: goto UQSUB16_INST; \ case 77: goto UQSUBADDX_INST;
case 80: goto UQADDSUBX_INST; \ case 78: goto UQSUB8_INST;
case 81: goto UQADD8_INST; \ case 79: goto UQSUB16_INST;
case 82: goto UQADD16_INST; \ case 80: goto UQADDSUBX_INST;
case 83: goto SXTAB_INST; \ case 81: goto UQADD8_INST;
case 84: goto UHSUBADDX_INST; \ case 82: goto UQADD16_INST;
case 85: goto UHSUB8_INST; \ case 83: goto SXTAB_INST;
case 86: goto UHSUB16_INST; \ case 84: goto UHSUBADDX_INST;
case 87: goto UHADDSUBX_INST; \ case 85: goto UHSUB8_INST;
case 88: goto UHADD8_INST; \ case 86: goto UHSUB16_INST;
case 89: goto UHADD16_INST; \ case 87: goto UHADDSUBX_INST;
case 90: goto UADDSUBX_INST; \ case 88: goto UHADD8_INST;
case 91: goto UADD8_INST; \ case 89: goto UHADD16_INST;
case 92: goto UADD16_INST; \ case 90: goto UADDSUBX_INST;
case 93: goto SXTAH_INST; \ case 91: goto UADD8_INST;
case 94: goto SXTAB16_INST; \ case 92: goto UADD16_INST;
case 95: goto QADD8_INST; \ case 93: goto SXTAH_INST;
case 96: goto BXJ_INST; \ case 94: goto SXTAB16_INST;
case 97: goto CLZ_INST; \ case 95: goto QADD8_INST;
case 98: goto UXTAH_INST; \ case 96: goto BXJ_INST;
case 99: goto BX_INST; \ case 97: goto CLZ_INST;
case 100: goto REV_INST; \ case 98: goto UXTAH_INST;
case 101: goto BLX_INST; \ case 99: goto BX_INST;
case 102: goto REVSH_INST; \ case 100: goto REV_INST;
case 103: goto QADD_INST; \ case 101: goto BLX_INST;
case 104: goto QADD16_INST; \ case 102: goto REVSH_INST;
case 105: goto QADDSUBX_INST; \ case 103: goto QADD_INST;
case 106: goto LDREX_INST; \ case 104: goto QADD16_INST;
case 107: goto QDADD_INST; \ case 105: goto QADDSUBX_INST;
case 108: goto QDSUB_INST; \ case 106: goto LDREX_INST;
case 109: goto QSUB_INST; \ case 107: goto QDADD_INST;
case 110: goto LDREXB_INST; \ case 108: goto QDSUB_INST;
case 111: goto QSUB8_INST; \ case 109: goto QSUB_INST;
case 112: goto QSUB16_INST; \ case 110: goto LDREXB_INST;
case 113: goto SMUAD_INST; \ case 111: goto QSUB8_INST;
case 114: goto SMMUL_INST; \ case 112: goto QSUB16_INST;
case 115: goto SMUSD_INST; \ case 113: goto SMUAD_INST;
case 116: goto SMLSD_INST; \ case 114: goto SMMUL_INST;
case 117: goto SMLSLD_INST; \ case 115: goto SMUSD_INST;
case 118: goto SMMLA_INST; \ case 116: goto SMLSD_INST;
case 119: goto SMMLS_INST; \ case 117: goto SMLSLD_INST;
case 120: goto SMLALD_INST; \ case 118: goto SMMLA_INST;
case 121: goto SMLAD_INST; \ case 119: goto SMMLS_INST;
case 122: goto SMLAW_INST; \ case 120: goto SMLALD_INST;
case 123: goto SMULW_INST; \ case 121: goto SMLAD_INST;
case 124: goto PKHTB_INST; \ case 122: goto SMLAW_INST;
case 125: goto PKHBT_INST; \ case 123: goto SMULW_INST;
case 126: goto SMUL_INST; \ case 124: goto PKHTB_INST;
case 127: goto SMLALXY_INST; \ case 125: goto PKHBT_INST;
case 128: goto SMLA_INST; \ case 126: goto SMUL_INST;
case 129: goto MCRR_INST; \ case 127: goto SMLALXY_INST;
case 130: goto MRRC_INST; \ case 128: goto SMLA_INST;
case 131: goto CMP_INST; \ case 129: goto MCRR_INST;
case 132: goto TST_INST; \ case 130: goto MRRC_INST;
case 133: goto TEQ_INST; \ case 131: goto CMP_INST;
case 134: goto CMN_INST; \ case 132: goto TST_INST;
case 135: goto SMULL_INST; \ case 133: goto TEQ_INST;
case 136: goto UMULL_INST; \ case 134: goto CMN_INST;
case 137: goto UMLAL_INST; \ case 135: goto SMULL_INST;
case 138: goto SMLAL_INST; \ case 136: goto UMULL_INST;
case 139: goto MUL_INST; \ case 137: goto UMLAL_INST;
case 140: goto MLA_INST; \ case 138: goto SMLAL_INST;
case 141: goto SSAT_INST; \ case 139: goto MUL_INST;
case 142: goto USAT_INST; \ case 140: goto MLA_INST;
case 143: goto MRS_INST; \ case 141: goto SSAT_INST;
case 144: goto MSR_INST; \ case 142: goto USAT_INST;
case 145: goto AND_INST; \ case 143: goto MRS_INST;
case 146: goto BIC_INST; \ case 144: goto MSR_INST;
case 147: goto LDM_INST; \ case 145: goto AND_INST;
case 148: goto EOR_INST; \ case 146: goto BIC_INST;
case 149: goto ADD_INST; \ case 147: goto LDM_INST;
case 150: goto RSB_INST; \ case 148: goto EOR_INST;
case 151: goto RSC_INST; \ case 149: goto ADD_INST;
case 152: goto SBC_INST; \ case 150: goto RSB_INST;
case 153: goto ADC_INST; \ case 151: goto RSC_INST;
case 154: goto SUB_INST; \ case 152: goto SBC_INST;
case 155: goto ORR_INST; \ case 153: goto ADC_INST;
case 156: goto MVN_INST; \ case 154: goto SUB_INST;
case 157: goto MOV_INST; \ case 155: goto ORR_INST;
case 158: goto STM_INST; \ case 156: goto MVN_INST;
case 159: goto LDM_INST; \ case 157: goto MOV_INST;
case 160: goto LDRSH_INST; \ case 158: goto STM_INST;
case 161: goto STM_INST; \ case 159: goto LDM_INST;
case 162: goto LDM_INST; \ case 160: goto LDRSH_INST;
case 163: goto LDRSB_INST; \ case 161: goto STM_INST;
case 164: goto STRD_INST; \ case 162: goto LDM_INST;
case 165: goto LDRH_INST; \ case 163: goto LDRSB_INST;
case 166: goto STRH_INST; \ case 164: goto STRD_INST;
case 167: goto LDRD_INST; \ case 165: goto LDRH_INST;
case 168: goto STRT_INST; \ case 166: goto STRH_INST;
case 169: goto STRBT_INST; \ case 167: goto LDRD_INST;
case 170: goto LDRBT_INST; \ case 168: goto STRT_INST;
case 171: goto LDRT_INST; \ case 169: goto STRBT_INST;
case 172: goto MRC_INST; \ case 170: goto LDRBT_INST;
case 173: goto MCR_INST; \ case 171: goto LDRT_INST;
case 174: goto MSR_INST; \ case 172: goto MRC_INST;
case 175: goto LDRB_INST; \ case 173: goto MCR_INST;
case 176: goto STRB_INST; \ case 174: goto MSR_INST;
case 177: goto LDR_INST; \ case 175: goto LDRB_INST;
case 178: goto LDRCOND_INST ; \ case 176: goto STRB_INST;
case 179: goto STR_INST; \ case 177: goto LDR_INST;
case 180: goto CDP_INST; \ case 178: goto LDRCOND_INST ;
case 181: goto STC_INST; \ case 179: goto STR_INST;
case 182: goto LDC_INST; \ case 180: goto CDP_INST;
case 183: goto SWI_INST; \ case 181: goto STC_INST;
case 184: goto BBL_INST; \ case 182: goto LDC_INST;
case 185: goto LDREXD_INST; \ case 183: goto SWI_INST;
case 186: goto STREXD_INST; \ case 184: goto BBL_INST;
case 187: goto LDREXH_INST; \ case 185: goto LDREXD_INST;
case 188: goto STREXH_INST; \ case 186: goto STREXD_INST;
case 189: goto B_2_THUMB ; \ case 187: goto LDREXH_INST;
case 190: goto B_COND_THUMB ; \ case 188: goto STREXH_INST;
case 191: goto BL_1_THUMB ; \ case 189: goto B_2_THUMB ;
case 192: goto BL_2_THUMB ; \ case 190: goto B_COND_THUMB ;
case 193: goto BLX_1_THUMB ; \ case 191: goto BL_1_THUMB ;
case 194: goto DISPATCH; \ case 192: goto BL_2_THUMB ;
case 195: goto INIT_INST_LENGTH; \ case 193: goto BLX_1_THUMB ;
case 196: goto END; \ case 194: goto DISPATCH;
} case 195: goto INIT_INST_LENGTH;
case 196: goto END;
}
next_inst_end:
#endif #endif
#define UPDATE_NFLAG(dst) (cpu->NFlag = BIT(dst, 31) ? 1 : 0) #define UPDATE_NFLAG(dst) (cpu->NFlag = BIT(dst, 31) ? 1 : 0)
@ -3948,7 +4000,6 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
#define PC (cpu->Reg[15]) #define PC (cpu->Reg[15])
#define CHECK_EXT_INT if (!cpu->NirqSig && !(cpu->Cpsr & 0x80)) goto END; #define CHECK_EXT_INT if (!cpu->NirqSig && !(cpu->Cpsr & 0x80)) goto END;
ARMul_State* cpu = state;
// GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback // GCC and Clang have a C++ extension to support a lookup table of labels. Otherwise, fallback
// to a clunky switch statement. // to a clunky switch statement.
@ -3978,10 +4029,8 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
&&INIT_INST_LENGTH,&&END &&INIT_INST_LENGTH,&&END
}; };
#endif #endif
arm_inst* inst_base;
unsigned int addr; unsigned int addr;
unsigned int phys_addr; unsigned int phys_addr;
unsigned int num_instrs = 0;
int ptr; int ptr;

View File

@ -26,7 +26,8 @@ void switch_mode(ARMul_State* core, uint32_t mode);
// two bytes in size. Thus we don't need to worry about ThumbEE // two bytes in size. Thus we don't need to worry about ThumbEE
// or Thumb-2 where instructions can be 4 bytes in length. // or Thumb-2 where instructions can be 4 bytes in length.
static inline u32 GET_INST_SIZE(ARMul_State* core) { static inline u32 GET_INST_SIZE(ARMul_State* core) {
return core->TFlag? 2 : 4; //return core->TFlag? 2 : 4;
return 4 - (core->TFlag << 1);
} }
/** /**
@ -40,7 +41,9 @@ static inline u32 GET_INST_SIZE(ARMul_State* core) {
* If the PC is not being read, then the value stored in the register is returned. * If the PC is not being read, then the value stored in the register is returned.
*/ */
static inline u32 CHECK_READ_REG15_WA(ARMul_State* core, int Rn) { static inline u32 CHECK_READ_REG15_WA(ARMul_State* core, int Rn) {
return (Rn == 15) ? ((core->Reg[15] & ~0x3) + GET_INST_SIZE(core) * 2) : core->Reg[Rn]; //return (Rn == 15) ? ((core->Reg[15] & ~0x3) + GET_INST_SIZE(core) * 2) : core->Reg[Rn];
auto Rn15 = (Rn + 1) >> 4;
return (core->Reg[Rn] & ~(Rn15 | (Rn15 << 1))) + ((GET_INST_SIZE(core) << 1) & ~(Rn15 - 1));
} }
/** /**
@ -53,5 +56,7 @@ static inline u32 CHECK_READ_REG15_WA(ARMul_State* core, int Rn) {
* If the PC is not being read, then the values stored in the register is returned. * If the PC is not being read, then the values stored in the register is returned.
*/ */
static inline u32 CHECK_READ_REG15(ARMul_State* core, int Rn) { static inline u32 CHECK_READ_REG15(ARMul_State* core, int Rn) {
return (Rn == 15) ? ((core->Reg[15] & ~0x1) + GET_INST_SIZE(core) * 2) : core->Reg[Rn]; //return (Rn == 15) ? ((core->Reg[15] & ~0x1) + GET_INST_SIZE(core) * 2) : core->Reg[Rn];
auto Rn15 = (Rn + 1) >> 4;
return (core->Reg[Rn] & ~(Rn15)) + ((GET_INST_SIZE(core) << 1) & ~(Rn15 - 1));
} }

View File

@ -177,15 +177,31 @@ private:
* *
* @todo define orientation concretely. * @todo define orientation concretely.
*/ */
static int SignedArea (const Math::Vec2<Fix12P4>& vtx1, static int SignedArea (Math::Vec2<Fix12P4> vtx1,
const Math::Vec2<Fix12P4>& vtx2, Math::Vec2<Fix12P4> vtx2,
const Math::Vec2<Fix12P4>& vtx3) { Math::Vec2<Fix12P4> vtx3) {
const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0);
const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0);
// TODO: There is a very small chance this will overflow for sizeof(int) == 4 // TODO: There is a very small chance this will overflow for sizeof(int) == 4
return Math::Cross(vec1, vec2).z; return Math::Cross(vec1, vec2).z;
}; };
static u8 GetAlphaModifierMatrixA[] = { 0, 0, 0, 0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0 };
static u8 GetAlphaModifierMatrixB[] = { 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0, 0 };
inline u8 GetAlphaModifier(Regs::TevStageConfig::AlphaModifier factor, Math::Vec4<u8> values)
{
using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
auto fi = (int) factor;
return
values.a() * GetAlphaModifierMatrixA[fi + 6] + GetAlphaModifierMatrixB[fi + 6] +
values.r() * GetAlphaModifierMatrixA[fi + 4] + GetAlphaModifierMatrixB[fi + 4] +
values.g() * GetAlphaModifierMatrixA[fi + 2] + GetAlphaModifierMatrixB[fi + 2] +
values.b() * GetAlphaModifierMatrixA[fi + 0] + GetAlphaModifierMatrixB[fi + 0];
}
/** /**
* Helper function for ProcessTriangle with the "reversed" flag to allow for implementing * Helper function for ProcessTriangle with the "reversed" flag to allow for implementing
* culling via recursion. * culling via recursion.
@ -375,41 +391,48 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
// operations on each of them (e.g. inversion) and then calculate the output color // operations on each of them (e.g. inversion) and then calculate the output color
// with some basic arithmetic. Alpha combiners can be configured separately but work // with some basic arithmetic. Alpha combiners can be configured separately but work
// analogously. // analogously.
using Source = Regs::TevStageConfig::Source;
Math::Vec4<u8> combiner_output; Math::Vec4<u8> combiner_output;
Math::Vec4<u8> constant, empty = {};
Math::Vec4<u8> *results[0x11];
results[(int) Source::PrimaryColor] = results[(int) Source::PrimaryFragmentColor] = &primary_color;
results[(int) Source::Texture0] = texture_color + 0;
results[(int) Source::Texture1] = texture_color + 1;
results[(int) Source::Texture2] = texture_color + 2;
results[(int) 0x7] = &empty;
results[(int) 0x8] = &empty;
results[(int) 0x9] = &empty;
results[(int) 0xa] = &empty;
results[(int) 0xb] = &empty;
results[(int) 0xc] = &empty;
results[(int) 0xd] = &empty;
results[(int) Source::Constant] = &constant;
results[(int) Source::Previous] = &combiner_output;
results[(int) 0x11] = &empty;
int tempsI = 0;
for (const auto& tev_stage : tev_stages) { for (const auto& tev_stage : tev_stages) {
using Source = Regs::TevStageConfig::Source;
using ColorModifier = Regs::TevStageConfig::ColorModifier; using ColorModifier = Regs::TevStageConfig::ColorModifier;
using AlphaModifier = Regs::TevStageConfig::AlphaModifier; using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
using Operation = Regs::TevStageConfig::Operation; using Operation = Regs::TevStageConfig::Operation;
auto GetSource = [&](Source source) -> Math::Vec4<u8> { struct
switch (source) { {
// TODO: What's the difference between these two? Math::Vec4<u8> **results, &constant;
case Source::PrimaryColor: const Pica::Regs::TevStageConfig &tev_stage;
case Source::PrimaryFragmentColor:
return primary_color;
case Source::Texture0: Math::Vec4<u8> &operator()(Source source)
return texture_color[0]; {
constant = { tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a };
case Source::Texture1: return *(results[std::min((int) source, 0x11)]);
return texture_color[1]; }
}
case Source::Texture2: GetSource
return texture_color[2]; {
results, constant, tev_stage
case Source::Constant: };
return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
case Source::Previous:
return combiner_output;
default:
LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
UNIMPLEMENTED();
return {};
}
};
static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> { static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4<u8>& values) -> Math::Vec3<u8> {
switch (factor) { switch (factor) {
@ -445,34 +468,6 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
} }
}; };
static auto GetAlphaModifier = [](AlphaModifier factor, const Math::Vec4<u8>& values) -> u8 {
switch (factor) {
case AlphaModifier::SourceAlpha:
return values.a();
case AlphaModifier::OneMinusSourceAlpha:
return 255 - values.a();
case AlphaModifier::SourceRed:
return values.r();
case AlphaModifier::OneMinusSourceRed:
return 255 - values.r();
case AlphaModifier::SourceGreen:
return values.g();
case AlphaModifier::OneMinusSourceGreen:
return 255 - values.g();
case AlphaModifier::SourceBlue:
return values.b();
case AlphaModifier::OneMinusSourceBlue:
return 255 - values.b();
}
};
static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> { static auto ColorCombine = [](Operation op, const Math::Vec3<u8> input[3]) -> Math::Vec3<u8> {
switch (op) { switch (op) {
case Operation::Replace: case Operation::Replace:

View File

@ -155,10 +155,11 @@ static void ProcessShaderCode(VertexShaderState& state) {
} }
}; };
switch (instr.opcode.Value().GetInfo().type) { auto info = instr.opcode.Value().GetInfo();
switch (info.type) {
case OpCode::Type::Arithmetic: case OpCode::Type::Arithmetic:
{ {
bool is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed); bool is_inverted = 0 != (info.subtype & OpCode::Info::SrcInversed);
// TODO: We don't really support this properly: For instance, the address register // TODO: We don't really support this properly: For instance, the address register
// offset needs to be applied to SRC2 instead, etc. // offset needs to be applied to SRC2 instead, etc.
// For now, we just abort in this situation. // For now, we just abort in this situation.
@ -568,22 +569,23 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
const auto& attribute_register_map = registers.vs_input_register_map; const auto& attribute_register_map = registers.vs_input_register_map;
float24 dummy_register; float24 dummy_register;
boost::fill(state.input_register_table, &dummy_register); boost::fill(state.input_register_table, &dummy_register);
if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; if(num_attributes > 0) {state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x;
if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; if(num_attributes > 1) {state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x;
if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; if(num_attributes > 2) {state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x;
if(num_attributes > 3) state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x; if(num_attributes > 3) {state.input_register_table[attribute_register_map.attribute3_register] = &input.attr[3].x;
if(num_attributes > 4) state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x; if(num_attributes > 4) {state.input_register_table[attribute_register_map.attribute4_register] = &input.attr[4].x;
if(num_attributes > 5) state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x; if(num_attributes > 5) {state.input_register_table[attribute_register_map.attribute5_register] = &input.attr[5].x;
if(num_attributes > 6) state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x; if(num_attributes > 6) {state.input_register_table[attribute_register_map.attribute6_register] = &input.attr[6].x;
if(num_attributes > 7) state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x; if(num_attributes > 7) {state.input_register_table[attribute_register_map.attribute7_register] = &input.attr[7].x;
if(num_attributes > 8) state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x; if(num_attributes > 8) {state.input_register_table[attribute_register_map.attribute8_register] = &input.attr[8].x;
if(num_attributes > 9) state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x; if(num_attributes > 9) {state.input_register_table[attribute_register_map.attribute9_register] = &input.attr[9].x;
if(num_attributes > 10) state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x; if(num_attributes > 10){ state.input_register_table[attribute_register_map.attribute10_register] = &input.attr[10].x;
if(num_attributes > 11) state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x; if(num_attributes > 11){ state.input_register_table[attribute_register_map.attribute11_register] = &input.attr[11].x;
if(num_attributes > 12) state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x; if(num_attributes > 12){ state.input_register_table[attribute_register_map.attribute12_register] = &input.attr[12].x;
if(num_attributes > 13) state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x; if(num_attributes > 13){ state.input_register_table[attribute_register_map.attribute13_register] = &input.attr[13].x;
if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; if(num_attributes > 14){ state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; if(num_attributes > 15){ state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
}}}}}}}}}}}}}}}}
state.conditional_code[0] = false; state.conditional_code[0] = false;
state.conditional_code[1] = false; state.conditional_code[1] = false;