protot/3rdparty/bgfx/src/shader_dxbc.cpp

1935 lines
56 KiB
C++

/*
* Copyright 2011-2016 Branimir Karadzic. All rights reserved.
* License: https://github.com/bkaradzic/bgfx#license-bsd-2-clause
*/
#include "bgfx_p.h"
#include "shader_dxbc.h"
namespace bgfx
{
struct DxbcOpcodeInfo
{
uint8_t numOperands;
uint8_t numValues;
};
static const DxbcOpcodeInfo s_dxbcOpcodeInfo[] =
{
{ 3, 0 }, // ADD
{ 3, 0 }, // AND
{ 0, 0 }, // BREAK
{ 1, 0 }, // BREAKC
{ 0, 0 }, // CALL
{ 0, 0 }, // CALLC
{ 1, 0 }, // CASE
{ 0, 0 }, // CONTINUE
{ 1, 0 }, // CONTINUEC
{ 0, 0 }, // CUT
{ 0, 0 }, // DEFAULT
{ 2, 0 }, // DERIV_RTX
{ 2, 0 }, // DERIV_RTY
{ 1, 0 }, // DISCARD
{ 3, 0 }, // DIV
{ 3, 0 }, // DP2
{ 3, 0 }, // DP3
{ 3, 0 }, // DP4
{ 0, 0 }, // ELSE
{ 0, 0 }, // EMIT
{ 0, 0 }, // EMITTHENCUT
{ 0, 0 }, // ENDIF
{ 0, 0 }, // ENDLOOP
{ 0, 0 }, // ENDSWITCH
{ 3, 0 }, // EQ
{ 2, 0 }, // EXP
{ 2, 0 }, // FRC
{ 2, 0 }, // FTOI
{ 2, 0 }, // FTOU
{ 3, 0 }, // GE
{ 3, 0 }, // IADD
{ 1, 0 }, // IF
{ 3, 0 }, // IEQ
{ 3, 0 }, // IGE
{ 3, 0 }, // ILT
{ 4, 0 }, // IMAD
{ 3, 0 }, // IMAX
{ 3, 0 }, // IMIN
{ 4, 0 }, // IMUL
{ 3, 0 }, // INE
{ 2, 0 }, // INEG
{ 3, 0 }, // ISHL
{ 3, 0 }, // ISHR
{ 2, 0 }, // ITOF
{ 0, 0 }, // LABEL
{ 3, 0 }, // LD
{ 4, 0 }, // LD_MS
{ 2, 0 }, // LOG
{ 0, 0 }, // LOOP
{ 3, 0 }, // LT
{ 4, 0 }, // MAD
{ 3, 0 }, // MIN
{ 3, 0 }, // MAX
{ 0, 1 }, // CUSTOMDATA
{ 2, 0 }, // MOV
{ 4, 0 }, // MOVC
{ 3, 0 }, // MUL
{ 3, 0 }, // NE
{ 0, 0 }, // NOP
{ 2, 0 }, // NOT
{ 3, 0 }, // OR
{ 3, 0 }, // RESINFO
{ 0, 0 }, // RET
{ 1, 0 }, // RETC
{ 2, 0 }, // ROUND_NE
{ 2, 0 }, // ROUND_NI
{ 2, 0 }, // ROUND_PI
{ 2, 0 }, // ROUND_Z
{ 2, 0 }, // RSQ
{ 4, 0 }, // SAMPLE
{ 5, 0 }, // SAMPLE_C
{ 5, 0 }, // SAMPLE_C_LZ
{ 5, 0 }, // SAMPLE_L
{ 6, 0 }, // SAMPLE_D
{ 5, 0 }, // SAMPLE_B
{ 2, 0 }, // SQRT
{ 1, 0 }, // SWITCH
{ 3, 0 }, // SINCOS
{ 3, 0 }, // UDIV
{ 3, 0 }, // ULT
{ 3, 0 }, // UGE
{ 4, 0 }, // UMUL
{ 4, 0 }, // UMAD
{ 3, 0 }, // UMAX
{ 3, 0 }, // UMIN
{ 3, 0 }, // USHR
{ 2, 0 }, // UTOF
{ 3, 0 }, // XOR
{ 1, 1 }, // DCL_RESOURCE
{ 1, 0 }, // DCL_CONSTANT_BUFFER
{ 1, 0 }, // DCL_SAMPLER
{ 1, 1 }, // DCL_INDEX_RANGE
{ 1, 0 }, // DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY
{ 1, 0 }, // DCL_GS_INPUT_PRIMITIVE
{ 0, 1 }, // DCL_MAX_OUTPUT_VERTEX_COUNT
{ 1, 0 }, // DCL_INPUT
{ 1, 1 }, // DCL_INPUT_SGV
{ 1, 0 }, // DCL_INPUT_SIV
{ 1, 0 }, // DCL_INPUT_PS
{ 1, 1 }, // DCL_INPUT_PS_SGV
{ 1, 1 }, // DCL_INPUT_PS_SIV
{ 1, 0 }, // DCL_OUTPUT
{ 1, 0 }, // DCL_OUTPUT_SGV
{ 1, 1 }, // DCL_OUTPUT_SIV
{ 0, 1 }, // DCL_TEMPS
{ 0, 3 }, // DCL_INDEXABLE_TEMP
{ 0, 0 }, // DCL_GLOBAL_FLAGS
{ 0, 0 }, // InstrD3D10
{ 4, 0 }, // LOD
{ 4, 0 }, // GATHER4
{ 0, 0 }, // SAMPLE_POS
{ 0, 0 }, // SAMPLE_INFO
{ 0, 0 }, // InstrD3D10_1
{ 0, 0 }, // HS_DECLS
{ 0, 0 }, // HS_CONTROL_POINT_PHASE
{ 0, 0 }, // HS_FORK_PHASE
{ 0, 0 }, // HS_JOIN_PHASE
{ 0, 0 }, // EMIT_STREAM
{ 0, 0 }, // CUT_STREAM
{ 1, 0 }, // EMITTHENCUT_STREAM
{ 1, 0 }, // INTERFACE_CALL
{ 0, 0 }, // BUFINFO
{ 2, 0 }, // DERIV_RTX_COARSE
{ 2, 0 }, // DERIV_RTX_FINE
{ 2, 0 }, // DERIV_RTY_COARSE
{ 2, 0 }, // DERIV_RTY_FINE
{ 5, 0 }, // GATHER4_C
{ 5, 0 }, // GATHER4_PO
{ 0, 0 }, // GATHER4_PO_C
{ 0, 0 }, // RCP
{ 0, 0 }, // F32TOF16
{ 0, 0 }, // F16TOF32
{ 0, 0 }, // UADDC
{ 0, 0 }, // USUBB
{ 0, 0 }, // COUNTBITS
{ 0, 0 }, // FIRSTBIT_HI
{ 0, 0 }, // FIRSTBIT_LO
{ 0, 0 }, // FIRSTBIT_SHI
{ 0, 0 }, // UBFE
{ 0, 0 }, // IBFE
{ 5, 0 }, // BFI
{ 0, 0 }, // BFREV
{ 5, 0 }, // SWAPC
{ 0, 0 }, // DCL_STREAM
{ 1, 0 }, // DCL_FUNCTION_BODY
{ 0, 0 }, // DCL_FUNCTION_TABLE
{ 0, 0 }, // DCL_INTERFACE
{ 0, 0 }, // DCL_INPUT_CONTROL_POINT_COUNT
{ 0, 0 }, // DCL_OUTPUT_CONTROL_POINT_COUNT
{ 0, 0 }, // DCL_TESS_DOMAIN
{ 0, 0 }, // DCL_TESS_PARTITIONING
{ 0, 0 }, // DCL_TESS_OUTPUT_PRIMITIVE
{ 0, 0 }, // DCL_HS_MAX_TESSFACTOR
{ 0, 0 }, // DCL_HS_FORK_PHASE_INSTANCE_COUNT
{ 0, 0 }, // DCL_HS_JOIN_PHASE_INSTANCE_COUNT
{ 0, 3 }, // DCL_THREAD_GROUP
{ 1, 1 }, // DCL_UNORDERED_ACCESS_VIEW_TYPED
{ 1, 0 }, // DCL_UNORDERED_ACCESS_VIEW_RAW
{ 1, 1 }, // DCL_UNORDERED_ACCESS_VIEW_STRUCTURED
{ 1, 1 }, // DCL_THREAD_GROUP_SHARED_MEMORY_RAW
{ 1, 2 }, // DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED
{ 1, 0 }, // DCL_RESOURCE_RAW
{ 1, 1 }, // DCL_RESOURCE_STRUCTURED
{ 3, 0 }, // LD_UAV_TYPED
{ 3, 0 }, // STORE_UAV_TYPED
{ 3, 0 }, // LD_RAW
{ 3, 0 }, // STORE_RAW
{ 4, 0 }, // LD_STRUCTURED
{ 4, 0 }, // STORE_STRUCTURED
{ 3, 0 }, // ATOMIC_AND
{ 3, 0 }, // ATOMIC_OR
{ 3, 0 }, // ATOMIC_XOR
{ 3, 0 }, // ATOMIC_CMP_STORE
{ 3, 0 }, // ATOMIC_IADD
{ 3, 0 }, // ATOMIC_IMAX
{ 3, 0 }, // ATOMIC_IMIN
{ 3, 0 }, // ATOMIC_UMAX
{ 3, 0 }, // ATOMIC_UMIN
{ 2, 0 }, // IMM_ATOMIC_ALLOC
{ 2, 0 }, // IMM_ATOMIC_CONSUME
{ 0, 0 }, // IMM_ATOMIC_IADD
{ 0, 0 }, // IMM_ATOMIC_AND
{ 0, 0 }, // IMM_ATOMIC_OR
{ 0, 0 }, // IMM_ATOMIC_XOR
{ 0, 0 }, // IMM_ATOMIC_EXCH
{ 0, 0 }, // IMM_ATOMIC_CMP_EXCH
{ 0, 0 }, // IMM_ATOMIC_IMAX
{ 0, 0 }, // IMM_ATOMIC_IMIN
{ 0, 0 }, // IMM_ATOMIC_UMAX
{ 0, 0 }, // IMM_ATOMIC_UMIN
{ 0, 0 }, // SYNC
{ 3, 0 }, // DADD
{ 3, 0 }, // DMAX
{ 3, 0 }, // DMIN
{ 3, 0 }, // DMUL
{ 3, 0 }, // DEQ
{ 3, 0 }, // DGE
{ 3, 0 }, // DLT
{ 3, 0 }, // DNE
{ 2, 0 }, // DMOV
{ 4, 0 }, // DMOVC
{ 0, 0 }, // DTOF
{ 0, 0 }, // FTOD
{ 3, 0 }, // EVAL_SNAPPED
{ 3, 0 }, // EVAL_SAMPLE_INDEX
{ 2, 0 }, // EVAL_CENTROID
{ 0, 1 }, // DCL_GS_INSTANCE_COUNT
{ 0, 0 }, // ABORT
{ 0, 0 }, // DEBUG_BREAK
{ 0, 0 }, // InstrD3D11
{ 0, 0 }, // DDIV
{ 0, 0 }, // DFMA
{ 0, 0 }, // DRCP
{ 0, 0 }, // MSAD
{ 0, 0 }, // DTOI
{ 0, 0 }, // DTOU
{ 0, 0 }, // ITOD
{ 0, 0 }, // UTOD
};
BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOpcodeInfo) == DxbcOpcode::Count);
static const char* s_dxbcOpcode[] =
{
"add",
"and",
"break",
"breakc",
"call",
"callc",
"case",
"continue",
"continuec",
"cut",
"default",
"deriv_rtx",
"deriv_rty",
"discard",
"div",
"dp2",
"dp3",
"dp4",
"else",
"emit",
"emitthencut",
"endif",
"endloop",
"endswitch",
"eq",
"exp",
"frc",
"ftoi",
"ftou",
"ge",
"iadd",
"if",
"ieq",
"ige",
"ilt",
"imad",
"imax",
"imin",
"imul",
"ine",
"ineg",
"ishl",
"ishr",
"itof",
"label",
"ld",
"ld_ms",
"log",
"loop",
"lt",
"mad",
"min",
"max",
"customdata",
"mov",
"movc",
"mul",
"ne",
"nop",
"not",
"or",
"resinfo",
"ret",
"retc",
"round_ne",
"round_ni",
"round_pi",
"round_z",
"rsq",
"sample",
"sample_c",
"sample_c_lz",
"sample_l",
"sample_d",
"sample_b",
"sqrt",
"switch",
"sincos",
"udiv",
"ult",
"uge",
"umul",
"umad",
"umax",
"umin",
"ushr",
"utof",
"xor",
"dcl_resource",
"dcl_constantbuffer",
"dcl_sampler",
"dcl_index_range",
"dcl_gs_output_primitive_topology",
"dcl_gs_input_primitive",
"dcl_max_output_vertex_count",
"dcl_input",
"dcl_input_sgv",
"dcl_input_siv",
"dcl_input_ps",
"dcl_input_ps_sgv",
"dcl_input_ps_siv",
"dcl_output",
"dcl_output_sgv",
"dcl_output_siv",
"dcl_temps",
"dcl_indexable_temp",
"dcl_global_flags",
NULL,
"lod",
"gather4",
"sample_pos",
"sample_info",
NULL,
"hs_decls",
"hs_control_point_phase",
"hs_fork_phase",
"hs_join_phase",
"emit_stream",
"cut_stream",
"emitthencut_stream",
"interface_call",
"bufinfo",
"deriv_rtx_coarse",
"deriv_rtx_fine",
"deriv_rty_coarse",
"deriv_rty_fine",
"gather4_c",
"gather4_po",
"gather4_po_c",
"rcp",
"f32tof16",
"f16tof32",
"uaddc",
"usubb",
"countbits",
"firstbit_hi",
"firstbit_lo",
"firstbit_shi",
"ubfe",
"ibfe",
"bfi",
"bfrev",
"swapc",
"dcl_stream",
"dcl_function_body",
"dcl_function_table",
"dcl_interface",
"dcl_input_control_point_count",
"dcl_output_control_point_count",
"dcl_tess_domain",
"dcl_tess_partitioning",
"dcl_tess_output_primitive",
"dcl_hs_max_tessfactor",
"dcl_hs_fork_phase_instance_count",
"dcl_hs_join_phase_instance_count",
"dcl_thread_group",
"dcl_unordered_access_view_typed",
"dcl_unordered_access_view_raw",
"dcl_unordered_access_view_structured",
"dcl_thread_group_shared_memory_raw",
"dcl_thread_group_shared_memory_structured",
"dcl_resource_raw",
"dcl_resource_structured",
"ld_uav_typed",
"store_uav_typed",
"ld_raw",
"store_raw",
"ld_structured",
"store_structured",
"atomic_and",
"atomic_or",
"atomic_xor",
"atomic_cmp_store",
"atomic_iadd",
"atomic_imax",
"atomic_imin",
"atomic_umax",
"atomic_umin",
"imm_atomic_alloc",
"imm_atomic_consume",
"imm_atomic_iadd",
"imm_atomic_and",
"imm_atomic_or",
"imm_atomic_xor",
"imm_atomic_exch",
"imm_atomic_cmp_exch",
"imm_atomic_imax",
"imm_atomic_imin",
"imm_atomic_umax",
"imm_atomic_umin",
"sync",
"dadd",
"dmax",
"dmin",
"dmul",
"deq",
"dge",
"dlt",
"dne",
"dmov",
"dmovc",
"dtof",
"ftod",
"eval_snapped",
"eval_sample_index",
"eval_centroid",
"dcl_gs_instance_count",
"abort",
"debug_break",
NULL,
"ddiv",
"dfma",
"drcp",
"msad",
"dtoi",
"dtou",
"itod",
"utod",
};
BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOpcode) == DxbcOpcode::Count);
const char* getName(DxbcOpcode::Enum _opcode)
{
BX_CHECK(_opcode < DxbcOpcode::Count, "Unknown opcode id %d.", _opcode);
return s_dxbcOpcode[_opcode];
}
static const char* s_dxbcSrvType[] =
{
"", // Unknown
"Buffer", // Buffer
"Texture1D", // Texture1D
"Texture2D", // Texture2D
"Texture2DMS", // Texture2DMS
"Texture3D", // Texture3D
"TextureCube", // TextureCube
"Texture1DArray", // Texture1DArray
"Texture2DArray", // Texture2DArray
"Texture2DMSArray", // Texture2DMSArray
"TextureCubearray", // TextureCubearray
"RawBuffer", // RawBuffer
"StructuredBuffer", // StructuredBuffer
};
BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcSrvType) == DxbcResourceDim::Count);
const char* s_dxbcInterpolationName[] =
{
"",
"constant",
"linear",
"linear centroid",
"linear noperspective",
"linear noperspective centroid",
"linear sample",
"linear noperspective sample",
};
BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcInterpolationName) == DxbcInterpolation::Count);
// mesa/src/gallium/state_trackers/d3d1x/d3d1xshader/defs/shortfiles.txt
static const char* s_dxbcOperandType[] =
{
"r", // Temp
"v", // Input
"o", // Output
"x", // TempArray
"l", // Imm32
"d", // Imm64
"s", // Sampler
"t", // Resource
"cb", // ConstantBuffer
"icb", // ImmConstantBuffer
"label", // Label
"vPrim", // PrimitiveID
"oDepth", // OutputDepth
"null", // Null
"rasterizer", // Rasterizer
"oMask", // CoverageMask
"stream", // Stream
"function_body", // FunctionBody
"function_table", // FunctionTable
"interface", // Interface
"function_input", // FunctionInput
"function_output", // FunctionOutput
"vOutputControlPointID", // OutputControlPointId
"vForkInstanceID", // InputForkInstanceId
"vJoinInstanceID", // InputJoinInstanceId
"vicp", // InputControlPoint
"vocp", // OutputControlPoint
"vpc", // InputPatchConstant
"vDomain", // InputDomainPoint
"this", // ThisPointer
"u", // UnorderedAccessView
"g", // ThreadGroupSharedMemory
"vThreadID", // InputThreadId
"vThreadGrouID", // InputThreadGroupId
"vThreadIDInGroup", // InputThreadIdInGroup
"vCoverage", // InputCoverageMask
"vThreadIDInGroupFlattened", // InputThreadIdInGroupFlattened
"vGSInstanceID", // InputGsInstanceId
"oDepthGE", // OutputDepthGreaterEqual
"oDepthLE", // OutputDepthLessEqual
"vCycleCounter", // CycleCounter
};
BX_STATIC_ASSERT(BX_COUNTOF(s_dxbcOperandType) == DxbcOperandType::Count);
#define DXBC_MAX_NAME_STRING 512
int32_t readString(bx::ReaderSeekerI* _reader, int64_t _offset, char* _out, uint32_t _max, bx::Error* _err)
{
int64_t oldOffset = bx::seek(_reader);
bx::seek(_reader, _offset, bx::Whence::Begin);
int32_t size = 0;
for (uint32_t ii = 0; ii < _max-1; ++ii)
{
char ch;
size += bx::read(_reader, ch, _err);
*_out++ = ch;
if ('\0' == ch)
{
break;
}
}
*_out = '\0';
bx::seek(_reader, oldOffset, bx::Whence::Begin);
return size;
}
inline uint32_t dxbcMixF(uint32_t _b, uint32_t _c, uint32_t _d)
{
const uint32_t tmp0 = bx::uint32_xor(_c, _d);
const uint32_t tmp1 = bx::uint32_and(_b, tmp0);
const uint32_t result = bx::uint32_xor(_d, tmp1);
return result;
}
inline uint32_t dxbcMixG(uint32_t _b, uint32_t _c, uint32_t _d)
{
return dxbcMixF(_d, _b, _c);
}
inline uint32_t dxbcMixH(uint32_t _b, uint32_t _c, uint32_t _d)
{
const uint32_t tmp0 = bx::uint32_xor(_b, _c);
const uint32_t result = bx::uint32_xor(_d, tmp0);
return result;
}
inline uint32_t dxbcMixI(uint32_t _b, uint32_t _c, uint32_t _d)
{
const uint32_t tmp0 = bx::uint32_orc(_b, _d);
const uint32_t result = bx::uint32_xor(_c, tmp0);
return result;
}
void dxbcHashBlock(const uint32_t* data, uint32_t* hash)
{
const uint32_t d0 = data[ 0];
const uint32_t d1 = data[ 1];
const uint32_t d2 = data[ 2];
const uint32_t d3 = data[ 3];
const uint32_t d4 = data[ 4];
const uint32_t d5 = data[ 5];
const uint32_t d6 = data[ 6];
const uint32_t d7 = data[ 7];
const uint32_t d8 = data[ 8];
const uint32_t d9 = data[ 9];
const uint32_t d10 = data[10];
const uint32_t d11 = data[11];
const uint32_t d12 = data[12];
const uint32_t d13 = data[13];
const uint32_t d14 = data[14];
const uint32_t d15 = data[15];
uint32_t aa = hash[0];
uint32_t bb = hash[1];
uint32_t cc = hash[2];
uint32_t dd = hash[3];
aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d0 + 0xd76aa478, 7);
dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d1 + 0xe8c7b756, 12);
cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d2 + 0x242070db, 15);
bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d3 + 0xc1bdceee, 10);
aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d4 + 0xf57c0faf, 7);
dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d5 + 0x4787c62a, 12);
cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d6 + 0xa8304613, 15);
bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d7 + 0xfd469501, 10);
aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d8 + 0x698098d8, 7);
dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d9 + 0x8b44f7af, 12);
cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d10 + 0xffff5bb1, 15);
bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d11 + 0x895cd7be, 10);
aa = bb + bx::uint32_rol(aa + dxbcMixF(bb, cc, dd) + d12 + 0x6b901122, 7);
dd = aa + bx::uint32_rol(dd + dxbcMixF(aa, bb, cc) + d13 + 0xfd987193, 12);
cc = dd + bx::uint32_ror(cc + dxbcMixF(dd, aa, bb) + d14 + 0xa679438e, 15);
bb = cc + bx::uint32_ror(bb + dxbcMixF(cc, dd, aa) + d15 + 0x49b40821, 10);
aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d1 + 0xf61e2562, 5);
dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d6 + 0xc040b340, 9);
cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d11 + 0x265e5a51, 14);
bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d0 + 0xe9b6c7aa, 12);
aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d5 + 0xd62f105d, 5);
dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d10 + 0x02441453, 9);
cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d15 + 0xd8a1e681, 14);
bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d4 + 0xe7d3fbc8, 12);
aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d9 + 0x21e1cde6, 5);
dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d14 + 0xc33707d6, 9);
cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d3 + 0xf4d50d87, 14);
bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d8 + 0x455a14ed, 12);
aa = bb + bx::uint32_rol(aa + dxbcMixG(bb, cc, dd) + d13 + 0xa9e3e905, 5);
dd = aa + bx::uint32_rol(dd + dxbcMixG(aa, bb, cc) + d2 + 0xfcefa3f8, 9);
cc = dd + bx::uint32_rol(cc + dxbcMixG(dd, aa, bb) + d7 + 0x676f02d9, 14);
bb = cc + bx::uint32_ror(bb + dxbcMixG(cc, dd, aa) + d12 + 0x8d2a4c8a, 12);
aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d5 + 0xfffa3942, 4);
dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d8 + 0x8771f681, 11);
cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d11 + 0x6d9d6122, 16);
bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d14 + 0xfde5380c, 9);
aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d1 + 0xa4beea44, 4);
dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d4 + 0x4bdecfa9, 11);
cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d7 + 0xf6bb4b60, 16);
bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d10 + 0xbebfbc70, 9);
aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d13 + 0x289b7ec6, 4);
dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d0 + 0xeaa127fa, 11);
cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d3 + 0xd4ef3085, 16);
bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d6 + 0x04881d05, 9);
aa = bb + bx::uint32_rol(aa + dxbcMixH(bb, cc, dd) + d9 + 0xd9d4d039, 4);
dd = aa + bx::uint32_rol(dd + dxbcMixH(aa, bb, cc) + d12 + 0xe6db99e5, 11);
cc = dd + bx::uint32_rol(cc + dxbcMixH(dd, aa, bb) + d15 + 0x1fa27cf8, 16);
bb = cc + bx::uint32_ror(bb + dxbcMixH(cc, dd, aa) + d2 + 0xc4ac5665, 9);
aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d0 + 0xf4292244, 6);
dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d7 + 0x432aff97, 10);
cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d14 + 0xab9423a7, 15);
bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d5 + 0xfc93a039, 11);
aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d12 + 0x655b59c3, 6);
dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d3 + 0x8f0ccc92, 10);
cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d10 + 0xffeff47d, 15);
bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d1 + 0x85845dd1, 11);
aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d8 + 0x6fa87e4f, 6);
dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d15 + 0xfe2ce6e0, 10);
cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d6 + 0xa3014314, 15);
bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d13 + 0x4e0811a1, 11);
aa = bb + bx::uint32_rol(aa + dxbcMixI(bb, cc, dd) + d4 + 0xf7537e82, 6);
dd = aa + bx::uint32_rol(dd + dxbcMixI(aa, bb, cc) + d11 + 0xbd3af235, 10);
cc = dd + bx::uint32_rol(cc + dxbcMixI(dd, aa, bb) + d2 + 0x2ad7d2bb, 15);
bb = cc + bx::uint32_ror(bb + dxbcMixI(cc, dd, aa) + d9 + 0xeb86d391, 11);
hash[0] += aa;
hash[1] += bb;
hash[2] += cc;
hash[3] += dd;
}
// dxbc hash function is slightly modified version of MD5 hash.
// https://tools.ietf.org/html/rfc1321
// http://www.efgh.com/software/md5.txt
//
// Assumption is that data pointer, size are both 4-byte aligned,
// and little endian.
//
void dxbcHash(const void* _data, uint32_t _size, void* _digest)
{
uint32_t hash[4] =
{
0x67452301,
0xefcdab89,
0x98badcfe,
0x10325476,
};
const uint32_t* data = (const uint32_t*)_data;
for (uint32_t ii = 0, num = _size/64; ii < num; ++ii)
{
dxbcHashBlock(data, hash);
data += 16;
}
uint32_t last[16];
memset(last, 0, sizeof(last) );
const uint32_t remaining = _size & 0x3f;
if (remaining >= 56)
{
memcpy(&last[0], data, remaining);
last[remaining/4] = 0x80;
dxbcHashBlock(last, hash);
memset(&last[1], 0, 56);
}
else
{
memcpy(&last[1], data, remaining);
last[1 + remaining/4] = 0x80;
}
last[ 0] = _size * 8;
last[15] = _size * 2 + 1;
dxbcHashBlock(last, hash);
memcpy(_digest, hash, 16);
}
int32_t read(bx::ReaderI* _reader, DxbcSubOperand& _subOperand, bx::Error* _err)
{
uint32_t token;
int32_t size = 0;
// 0 1 2 3
// 76543210765432107654321076543210
// e222111000nnttttttttssssssssmmoo
// ^^ ^ ^ ^ ^ ^ ^ ^-- number of operands
// || | | | | | +---- operand mode
// || | | | | +------------ operand mode bits
// || | | | +-------------------- type
// || | | +---------------------- number of addressing modes
// || | +------------------------- addressing mode 0
// || +---------------------------- addressing mode 1
// |+------------------------------- addressing mode 2
// +-------------------------------- extended
size += bx::read(_reader, token, _err);
_subOperand.type = DxbcOperandType::Enum( (token & UINT32_C(0x000ff000) ) >> 12);
_subOperand.numAddrModes = uint8_t( (token & UINT32_C(0x00300000) ) >> 20);
_subOperand.addrMode = uint8_t( (token & UINT32_C(0x01c00000) ) >> 22);
_subOperand.mode = DxbcOperandMode::Enum( (token & UINT32_C(0x0000000c) ) >> 2);
_subOperand.modeBits = uint8_t( (token & UINT32_C(0x00000ff0) ) >> 4) & "\x0f\xff\x03\x00"[_subOperand.mode];
_subOperand.num = uint8_t( (token & UINT32_C(0x00000003) ) );
switch (_subOperand.addrMode)
{
case DxbcOperandAddrMode::Imm32:
size += bx::read(_reader, _subOperand.regIndex, _err);
break;
case DxbcOperandAddrMode::Reg:
{
DxbcSubOperand subOperand;
size += read(_reader, subOperand, _err);
}
break;
case DxbcOperandAddrMode::RegImm32:
{
size += bx::read(_reader, _subOperand.regIndex, _err);
DxbcSubOperand subOperand;
size += read(_reader, subOperand, _err);
}
break;
case DxbcOperandAddrMode::RegImm64:
{
size += bx::read(_reader, _subOperand.regIndex, _err);
size += bx::read(_reader, _subOperand.regIndex, _err);
DxbcSubOperand subOperand;
size += read(_reader, subOperand, _err);
}
break;
default:
BX_CHECK(false, "sub operand addressing mode %d", _subOperand.addrMode);
break;
}
return size;
}
int32_t write(bx::WriterI* _writer, const DxbcSubOperand& _subOperand, bx::Error* _err)
{
int32_t size = 0;
uint32_t token = 0;
token |= (_subOperand.type << 12) & UINT32_C(0x000ff000);
token |= (_subOperand.numAddrModes << 20) & UINT32_C(0x00300000);
token |= (_subOperand.addrMode << 22) & UINT32_C(0x01c00000);
token |= (_subOperand.mode << 2) & UINT32_C(0x0000000c);
token |= (_subOperand.modeBits << 4) & UINT32_C(0x00000ff0);
token |= _subOperand.num & UINT32_C(0x00000003);
size += bx::write(_writer, token, _err);
switch (_subOperand.addrMode)
{
case DxbcOperandAddrMode::Imm32:
size += bx::write(_writer, _subOperand.regIndex, _err);
break;
case DxbcOperandAddrMode::Reg:
{
DxbcSubOperand subOperand;
size += write(_writer, subOperand, _err);
}
break;
case DxbcOperandAddrMode::RegImm32:
{
size += bx::write(_writer, _subOperand.regIndex, _err);
DxbcSubOperand subOperand;
size += write(_writer, subOperand, _err);
}
break;
case DxbcOperandAddrMode::RegImm64:
{
size += bx::write(_writer, _subOperand.regIndex, _err);
size += bx::write(_writer, _subOperand.regIndex, _err);
DxbcSubOperand subOperand;
size += write(_writer, subOperand, _err);
}
break;
default:
BX_CHECK(false, "sub operand addressing mode %d", _subOperand.addrMode);
break;
}
return size;
}
int32_t read(bx::ReaderI* _reader, DxbcOperand& _operand, bx::Error* _err)
{
int32_t size = 0;
uint32_t token;
size += bx::read(_reader, token, _err);
// 0 1 2 3
// 76543210765432107654321076543210
// e222111000nnttttttttssssssssmmoo
// ^^ ^ ^ ^ ^ ^ ^ ^-- number of operands
// || | | | | | +---- operand mode
// || | | | | +------------ operand mode bits
// || | | | +-------------------- type
// || | | +---------------------- number of addressing modes
// || | +------------------------- addressing mode 0
// || +---------------------------- addressing mode 1
// |+------------------------------- addressing mode 2
// +-------------------------------- extended
_operand.extended = 0 != (token & UINT32_C(0x80000000) );
_operand.numAddrModes = uint8_t( (token & UINT32_C(0x00300000) ) >> 20);
_operand.addrMode[0] = uint8_t( (token & UINT32_C(0x01c00000) ) >> 22);
_operand.addrMode[1] = uint8_t( (token & UINT32_C(0x0e000000) ) >> 25);
_operand.addrMode[2] = uint8_t( (token & UINT32_C(0x70000000) ) >> 28);
_operand.type = DxbcOperandType::Enum( (token & UINT32_C(0x000ff000) ) >> 12);
_operand.mode = DxbcOperandMode::Enum( (token & UINT32_C(0x0000000c) ) >> 2);
_operand.modeBits = uint8_t( (token & UINT32_C(0x00000ff0) ) >> 4) & "\x0f\xff\x03\x00"[_operand.mode];
_operand.num = uint8_t( (token & UINT32_C(0x00000003) ) );
if (_operand.extended)
{
size += bx::read(_reader, _operand.extBits, _err);
}
switch (_operand.type)
{
case DxbcOperandType::Imm32:
_operand.num = 2 == _operand.num ? 4 : _operand.num;
for (uint32_t ii = 0; ii < _operand.num; ++ii)
{
size += bx::read(_reader, _operand.un.imm32[ii], _err);
}
break;
case DxbcOperandType::Imm64:
_operand.num = 2 == _operand.num ? 4 : _operand.num;
for (uint32_t ii = 0; ii < _operand.num; ++ii)
{
size += bx::read(_reader, _operand.un.imm64[ii], _err);
}
break;
default:
break;
}
for (uint32_t ii = 0; ii < _operand.numAddrModes; ++ii)
{
switch (_operand.addrMode[ii])
{
case DxbcOperandAddrMode::Imm32:
size += bx::read(_reader, _operand.regIndex[ii], _err);
break;
case DxbcOperandAddrMode::Reg:
size += read(_reader, _operand.subOperand[ii], _err);
break;
case DxbcOperandAddrMode::RegImm32:
size += bx::read(_reader, _operand.regIndex[ii], _err);
size += read(_reader, _operand.subOperand[ii], _err);
break;
default:
BX_CHECK(false, "operand %d addressing mode %d", ii, _operand.addrMode[ii]);
break;
}
}
return size;
}
int32_t write(bx::WriterI* _writer, const DxbcOperand& _operand, bx::Error* _err)
{
int32_t size = 0;
uint32_t token = 0;
token |= _operand.extended ? UINT32_C(0x80000000) : 0;
token |= (_operand.numAddrModes << 20) & UINT32_C(0x00300000);
token |= (_operand.addrMode[0] << 22) & UINT32_C(0x01c00000);
token |= (_operand.addrMode[1] << 25) & UINT32_C(0x0e000000);
token |= (_operand.addrMode[2] << 28) & UINT32_C(0x70000000);
token |= (_operand.type << 12) & UINT32_C(0x000ff000);
token |= (_operand.mode << 2) & UINT32_C(0x0000000c);
token |= (4 == _operand.num ? 2 : _operand.num) & UINT32_C(0x00000003);
token |= ( (_operand.modeBits & "\x0f\xff\x03\x00"[_operand.mode]) << 4) & UINT32_C(0x00000ff0);
size += bx::write(_writer, token, _err);
if (_operand.extended)
{
size += bx::write(_writer, _operand.extBits);
}
switch (_operand.type)
{
case DxbcOperandType::Imm32:
for (uint32_t ii = 0; ii < _operand.num; ++ii)
{
size += bx::write(_writer, _operand.un.imm32[ii], _err);
}
break;
case DxbcOperandType::Imm64:
for (uint32_t ii = 0; ii < _operand.num; ++ii)
{
size += bx::write(_writer, _operand.un.imm64[ii], _err);
}
break;
default:
break;
}
for (uint32_t ii = 0; ii < _operand.numAddrModes; ++ii)
{
switch (_operand.addrMode[ii])
{
case DxbcOperandAddrMode::Imm32:
size += bx::write(_writer, _operand.regIndex[ii], _err);
break;
case DxbcOperandAddrMode::Reg:
size += write(_writer, _operand.subOperand[ii], _err);
break;
case DxbcOperandAddrMode::RegImm32:
size += bx::write(_writer, _operand.regIndex[ii], _err);
size += write(_writer, _operand.subOperand[ii], _err);
break;
default:
BX_CHECK(false, "operand %d addressing mode %d", ii, _operand.addrMode[ii]);
break;
}
}
return size;
}
int32_t read(bx::ReaderI* _reader, DxbcInstruction& _instruction, bx::Error* _err)
{
uint32_t size = 0;
uint32_t token;
size += bx::read(_reader, token, _err);
// 0 1 2 3
// 76543210765432107654321076543210
// elllllll.............ooooooooooo
// ^^ ^----------- opcode
// |+------------------------------- length
// +-------------------------------- extended
_instruction.opcode = DxbcOpcode::Enum( (token & UINT32_C(0x000007ff) ) );
_instruction.length = uint8_t( (token & UINT32_C(0x7f000000) ) >> 24);
bool extended = 0 != (token & UINT32_C(0x80000000) );
_instruction.srv = DxbcResourceDim::Unknown;
_instruction.samples = 0;
_instruction.shadow = false;
_instruction.mono = false;
_instruction.allowRefactoring = false;
_instruction.fp64 = false;
_instruction.earlyDepth = false;
_instruction.enableBuffers = false;
_instruction.skipOptimization = false;
_instruction.enableMinPrecision = false;
_instruction.enableDoubleExtensions = false;
_instruction.enableShaderExtensions = false;
_instruction.threadsInGroup = false;
_instruction.sharedMemory = false;
_instruction.uavGroup = false;
_instruction.uavGlobal = false;
_instruction.saturate = false;
_instruction.testNZ = false;
_instruction.retType = DxbcResourceReturnType::Unused;
switch (_instruction.opcode)
{
case DxbcOpcode::CUSTOMDATA:
{
// uint32_t dataClass;
size += bx::read(_reader, _instruction.length);
for (uint32_t ii = 0, num = (_instruction.length-2)/4; ii < num; ++ii)
{
char temp[16];
size += bx::read(_reader, temp, 16, _err);
}
}
return size;
case DxbcOpcode::DCL_CONSTANT_BUFFER:
// 0 1 2 3
// 76543210765432107654321076543210
// ........ a...........
// ^------------ Allow refactoring
_instruction.allowRefactoring = 0 != (token & UINT32_C(0x00000800) );
break;
case DxbcOpcode::DCL_GLOBAL_FLAGS:
// 0 1 2 3
// 76543210765432107654321076543210
// ........ sxmoudfa...........
// ^^^^^^^^------------ Allow refactoring
// ||||||+------------- FP64
// |||||+-------------- Force early depth/stencil
// ||||+--------------- Enable raw and structured buffers
// |||+---------------- Skip optimizations
// ||+----------------- Enable minimum precision
// |+------------------ Enable double extension
// +------------------- Enable shader extension
_instruction.allowRefactoring = 0 != (token & UINT32_C(0x00000800) );
_instruction.fp64 = 0 != (token & UINT32_C(0x00001000) );
_instruction.earlyDepth = 0 != (token & UINT32_C(0x00002000) );
_instruction.enableBuffers = 0 != (token & UINT32_C(0x00004000) );
_instruction.skipOptimization = 0 != (token & UINT32_C(0x00008000) );
_instruction.enableMinPrecision = 0 != (token & UINT32_C(0x00010000) );
_instruction.enableDoubleExtensions = 0 != (token & UINT32_C(0x00020000) );
_instruction.enableShaderExtensions = 0 != (token & UINT32_C(0x00040000) );
break;
case DxbcOpcode::DCL_INPUT_PS:
// 0 1 2 3
// 76543210765432107654321076543210
// ........ iiiii...........
// ^---------------- Interploation
_instruction.interpolation = DxbcInterpolation::Enum( (token & UINT32_C(0x0000f800) ) >> 11);
break;
case DxbcOpcode::DCL_RESOURCE:
// 0 1 2 3
// 76543210765432107654321076543210
// ........ sssssssrrrrr...........
// ^ ^---------------- SRV
// +----------------------- MSAA samples
_instruction.srv = DxbcResourceDim::Enum( (token & UINT32_C(0x0000f800) ) >> 11);
_instruction.samples = uint8_t( (token & UINT32_C(0x007f0000) ) >> 16);
break;
case DxbcOpcode::DCL_SAMPLER:
// 0 1 2 3
// 76543210765432107654321076543210
// ........ ms...........
// ^^------------ Shadow sampler
// +------------- Mono
_instruction.shadow = 0 != (token & UINT32_C(0x00000800) );
_instruction.mono = 0 != (token & UINT32_C(0x00001000) );
break;
case DxbcOpcode::SYNC:
// 0 1 2 3
// 76543210765432107654321076543210
// ........ gust...........
// ^^^^------------ Threads in group
// ||+------------- Shared memory
// |+-------------- UAV group
// +--------------- UAV global
_instruction.threadsInGroup = 0 != (token & UINT32_C(0x00000800) );
_instruction.sharedMemory = 0 != (token & UINT32_C(0x00001000) );
_instruction.uavGroup = 0 != (token & UINT32_C(0x00002000) );
_instruction.uavGlobal = 0 != (token & UINT32_C(0x00004000) );
break;
default:
// 0 1 2 3
// 76543210765432107654321076543210
// ........ ppppn stt...........
// ^ ^ ^^------------- Resource info return type
// | | +-------------- Saturate
// | +------------------- Test not zero
// +----------------------- Precise mask
_instruction.retType = DxbcResourceReturnType::Enum( (token & UINT32_C(0x00001800) ) >> 11);
_instruction.saturate = 0 != (token & UINT32_C(0x00002000) );
_instruction.testNZ = 0 != (token & UINT32_C(0x00040000) );
// _instruction.precise = uint8_t( (token & UINT32_C(0x00780000) ) >> 19);
break;
}
_instruction.extended[0] = DxbcInstruction::ExtendedType::Count;
for (uint32_t ii = 0; extended; ++ii)
{
// 0 1 2 3
// 76543210765432107654321076543210
// e..........................ttttt
// ^ ^
// | +----- type
// +-------------------------------- extended
uint32_t extBits;
size += bx::read(_reader, extBits);
extended = 0 != (extBits & UINT32_C(0x80000000) );
_instruction.extended[ii ] = DxbcInstruction::ExtendedType::Enum(extBits & UINT32_C(0x0000001f) );
_instruction.extended[ii+1] = DxbcInstruction::ExtendedType::Count;
switch (_instruction.extended[ii])
{
case DxbcInstruction::ExtendedType::SampleControls:
// 0 1 2 3
// 76543210765432107654321076543210
// . zzzzyyyyxxxx .....
// ^ ^ ^
// | | +------------- x
// | +----------------- y
// +--------------------- z
_instruction.sampleOffsets[0] = uint8_t( (extBits & UINT32_C(0x00001e00) ) >> 9);
_instruction.sampleOffsets[1] = uint8_t( (extBits & UINT32_C(0x0001e000) ) >> 13);
_instruction.sampleOffsets[2] = uint8_t( (extBits & UINT32_C(0x001e0000) ) >> 17);
break;
case DxbcInstruction::ExtendedType::ResourceDim:
// 0 1 2 3
// 76543210765432107654321076543210
// . .....
//
_instruction.resourceTarget = uint8_t( (extBits & UINT32_C(0x000003e0) ) >> 6);
_instruction.resourceStride = uint8_t( (extBits & UINT32_C(0x0000f800) ) >> 11);
break;
case DxbcInstruction::ExtendedType::ResourceReturnType:
// 0 1 2 3
// 76543210765432107654321076543210
// . 3333222211110000.....
// ^ ^ ^
// | | +------------- x
// | +----------------- y
// +--------------------- z
_instruction.resourceReturnTypes[0] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x000001e0) ) >> 6);
_instruction.resourceReturnTypes[1] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x00001e00) ) >> 9);
_instruction.resourceReturnTypes[2] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x0001e000) ) >> 13);
_instruction.resourceReturnTypes[3] = DxbcResourceReturnType::Enum( (extBits & UINT32_C(0x001e0000) ) >> 17);
break;
default:
break;
}
}
switch (_instruction.opcode)
{
case DxbcOpcode::DCL_FUNCTION_TABLE:
{
uint32_t tableId;
size += read(_reader, tableId, _err);
uint32_t num;
size += read(_reader, num);
for (uint32_t ii = 0; ii < num; ++ii)
{
uint32_t bodyId;
size += read(_reader, bodyId, _err);
}
}
break;
case DxbcOpcode::DCL_INTERFACE:
{
uint32_t interfaceId;
size += read(_reader, interfaceId, _err);
uint32_t num;
size += read(_reader, num, _err);
BX_CHECK(false, "not implemented.");
}
break;
default:
break;
};
uint32_t currOp = 0;
const DxbcOpcodeInfo& info = s_dxbcOpcodeInfo[_instruction.opcode];
_instruction.numOperands = info.numOperands;
switch (info.numOperands)
{
case 6: size += read(_reader, _instruction.operand[currOp++], _err);
case 5: size += read(_reader, _instruction.operand[currOp++], _err);
case 4: size += read(_reader, _instruction.operand[currOp++], _err);
case 3: size += read(_reader, _instruction.operand[currOp++], _err);
case 2: size += read(_reader, _instruction.operand[currOp++], _err);
case 1: size += read(_reader, _instruction.operand[currOp++], _err);
case 0:
if (0 < info.numValues)
{
size += read(_reader, _instruction.value, info.numValues*sizeof(uint32_t), _err);
}
break;
default:
BX_CHECK(false, "Instruction %s with invalid number of operands %d (numValues %d)."
, getName(_instruction.opcode)
, info.numOperands
, info.numValues
);
break;
}
return size;
}
int32_t write(bx::WriterI* _writer, const DxbcInstruction& _instruction, bx::Error* _err)
{
uint32_t token = 0;
token |= (_instruction.opcode ) & UINT32_C(0x000007ff);
token |= (_instruction.length << 24) & UINT32_C(0x7f000000);
token |= DxbcInstruction::ExtendedType::Count != _instruction.extended[0]
? UINT32_C(0x80000000)
: 0
;
switch (_instruction.opcode)
{
// case DxbcOpcode::CUSTOMDATA:
// return size;
case DxbcOpcode::DCL_CONSTANT_BUFFER:
token |= _instruction.allowRefactoring ? UINT32_C(0x00000800) : 0;
break;
case DxbcOpcode::DCL_GLOBAL_FLAGS:
token |= _instruction.allowRefactoring ? UINT32_C(0x00000800) : 0;
token |= _instruction.fp64 ? UINT32_C(0x00001000) : 0;
token |= _instruction.earlyDepth ? UINT32_C(0x00002000) : 0;
token |= _instruction.enableBuffers ? UINT32_C(0x00004000) : 0;
token |= _instruction.skipOptimization ? UINT32_C(0x00008000) : 0;
token |= _instruction.enableMinPrecision ? UINT32_C(0x00010000) : 0;
token |= _instruction.enableDoubleExtensions ? UINT32_C(0x00020000) : 0;
token |= _instruction.enableShaderExtensions ? UINT32_C(0x00040000) : 0;
break;
case DxbcOpcode::DCL_INPUT_PS:
token |= (_instruction.interpolation << 11) & UINT32_C(0x0000f800);
break;
case DxbcOpcode::DCL_RESOURCE:
token |= (_instruction.srv << 11) & UINT32_C(0x0000f800);
token |= (_instruction.samples << 16) & UINT32_C(0x007f0000);
break;
case DxbcOpcode::DCL_SAMPLER:
token |= _instruction.shadow ? (0x00000800) : 0;
token |= _instruction.mono ? (0x00001000) : 0;
break;
case DxbcOpcode::SYNC:
token |= _instruction.threadsInGroup ? UINT32_C(0x00000800) : 0;
token |= _instruction.sharedMemory ? UINT32_C(0x00001000) : 0;
token |= _instruction.uavGroup ? UINT32_C(0x00002000) : 0;
token |= _instruction.uavGlobal ? UINT32_C(0x00004000) : 0;
break;
default:
token |= (_instruction.retType << 11) & UINT32_C(0x00001800);
token |= _instruction.saturate ? UINT32_C(0x00002000) : 0;
token |= _instruction.testNZ ? UINT32_C(0x00040000) : 0;
// _instruction.precise = uint8_t( (token & UINT32_C(0x00780000) ) >> 19);
break;
}
uint32_t size =0;
size += bx::write(_writer, token);
for (uint32_t ii = 0; _instruction.extended[ii] != DxbcInstruction::ExtendedType::Count; ++ii)
{
// 0 1 2 3
// 76543210765432107654321076543210
// e..........................ttttt
// ^ ^
// | +----- type
// +-------------------------------- extended
token = _instruction.extended[ii+1] == DxbcInstruction::ExtendedType::Count
? 0
: UINT32_C(0x80000000)
;
token |= uint8_t(_instruction.extended[ii]);
switch (_instruction.extended[ii])
{
case DxbcInstruction::ExtendedType::SampleControls:
// 0 1 2 3
// 76543210765432107654321076543210
// . zzzzyyyyxxxx .....
// ^ ^ ^
// | | +------------- x
// | +----------------- y
// +--------------------- z
token |= (uint32_t(_instruction.sampleOffsets[0]) << 9) & UINT32_C(0x00001e00);
token |= (uint32_t(_instruction.sampleOffsets[1]) << 13) & UINT32_C(0x0001e000);
token |= (uint32_t(_instruction.sampleOffsets[2]) << 17) & UINT32_C(0x001e0000);
break;
case DxbcInstruction::ExtendedType::ResourceDim:
// 0 1 2 3
// 76543210765432107654321076543210
// . .....
//
token |= (uint32_t(_instruction.resourceTarget << 6) & UINT32_C(0x000003e0) );
token |= (uint32_t(_instruction.resourceStride << 11) & UINT32_C(0x0000f800) );
break;
case DxbcInstruction::ExtendedType::ResourceReturnType:
// 0 1 2 3
// 76543210765432107654321076543210
// . 3333222211110000.....
// ^ ^ ^
// | | +------------- x
// | +----------------- y
// +--------------------- z
token |= (uint32_t(_instruction.resourceReturnTypes[0]) << 6) & UINT32_C(0x000001e0);
token |= (uint32_t(_instruction.resourceReturnTypes[1]) << 9) & UINT32_C(0x00001e00);
token |= (uint32_t(_instruction.resourceReturnTypes[2]) << 13) & UINT32_C(0x0001e000);
token |= (uint32_t(_instruction.resourceReturnTypes[3]) << 17) & UINT32_C(0x001e0000);
break;
default:
break;
}
size += bx::write(_writer, token, _err);
}
for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii)
{
size += write(_writer, _instruction.operand[ii], _err);
}
const DxbcOpcodeInfo& info = s_dxbcOpcodeInfo[_instruction.opcode];
if (0 < info.numValues)
{
size += bx::write(_writer, _instruction.value, info.numValues*sizeof(uint32_t), _err);
}
return size;
}
int32_t toString(char* _out, int32_t _size, const DxbcInstruction& _instruction)
{
int32_t size = 0;
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, "%s%s%s"
, getName(_instruction.opcode)
, _instruction.saturate ? "_sat" : ""
, _instruction.testNZ ? "_nz" : ""
);
if (DxbcResourceDim::Unknown != _instruction.srv)
{
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, " %s<%x>"
, s_dxbcSrvType[_instruction.srv]
, _instruction.value[0]
);
}
else if (0 < s_dxbcOpcodeInfo[_instruction.opcode].numValues)
{
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, " %d"
, _instruction.value[0]
);
}
for (uint32_t ii = 0; ii < _instruction.numOperands; ++ii)
{
const DxbcOperand& operand = _instruction.operand[ii];
const bool array = false
|| 1 < operand.numAddrModes
|| DxbcOperandAddrMode::Imm32 != operand.addrMode[0]
;
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, "%s%s%s"
, 0 == ii ? " " : ", "
, operand.extended ? "*" : ""
, s_dxbcOperandType[operand.type]
);
switch (operand.type)
{
case DxbcOperandType::Imm32:
case DxbcOperandType::Imm64:
for (uint32_t jj = 0; jj < operand.num; ++jj)
{
union { uint32_t i; float f; } cast = { operand.un.imm32[jj] };
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, "%s%f"
, 0 == jj ? "(" : ", "
, cast.f
);
}
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, ")"
);
break;
default:
break;
}
const uint32_t first = DxbcOperandAddrMode::RegImm32 == operand.addrMode[0] ? 0 : 1;
if (0 == first)
{
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, "["
);
}
else
{
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, "%d%s"
, operand.regIndex[0]
, array ? "[" : ""
);
}
for (uint32_t jj = first; jj < operand.numAddrModes; ++jj)
{
switch (operand.addrMode[jj])
{
case DxbcOperandAddrMode::Imm32:
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, "%d"
, operand.regIndex[jj]
);
break;
case DxbcOperandAddrMode::Reg:
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, "%s%d"
, s_dxbcOperandType[operand.subOperand[jj].type]
, operand.regIndex[jj]
);
break;
case DxbcOperandAddrMode::RegImm32:
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, "%d + %s%d"
, operand.regIndex[jj]
, s_dxbcOperandType[operand.subOperand[jj].type]
, operand.regIndex[jj]
);
break;
default:
break;
}
}
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, "%s"
, array ? "]" : ""
);
switch (operand.mode)
{
case DxbcOperandMode::Mask:
if (0xf > operand.modeBits
&& 0 < operand.modeBits)
{
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, ".%s%s%s%s"
, 0 == (operand.modeBits & 1) ? "" : "x"
, 0 == (operand.modeBits & 2) ? "" : "y"
, 0 == (operand.modeBits & 4) ? "" : "z"
, 0 == (operand.modeBits & 8) ? "" : "w"
);
}
break;
case DxbcOperandMode::Swizzle:
if (0xe4 != operand.modeBits)
{
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, ".%c%c%c%c"
, "xyzw"[(operand.modeBits )&0x3]
, "xyzw"[(operand.modeBits>>2)&0x3]
, "xyzw"[(operand.modeBits>>4)&0x3]
, "xyzw"[(operand.modeBits>>6)&0x3]
);
}
break;
case DxbcOperandMode::Scalar:
size += bx::snprintf(&_out[size], bx::uint32_imax(0, _size-size)
, ".%c"
, "xyzw"[operand.modeBits]
);
break;
default:
break;
}
}
return size;
}
int32_t read(bx::ReaderSeekerI* _reader, DxbcSignature& _signature, bx::Error* _err)
{
int32_t size = 0;
int64_t offset = bx::seek(_reader);
uint32_t num;
size += bx::read(_reader, num, _err);
size += bx::read(_reader, _signature.key, _err);
for (uint32_t ii = 0; ii < num; ++ii)
{
DxbcSignature::Element element;
uint32_t nameOffset;
size += bx::read(_reader, nameOffset);
char name[DXBC_MAX_NAME_STRING];
readString(_reader, offset + nameOffset, name, DXBC_MAX_NAME_STRING, _err);
element.name = name;
size += bx::read(_reader, element.semanticIndex, _err);
size += bx::read(_reader, element.valueType, _err);
size += bx::read(_reader, element.componentType, _err);
size += bx::read(_reader, element.registerIndex, _err);
size += bx::read(_reader, element.mask, _err);
size += bx::read(_reader, element.readWriteMask, _err);
size += bx::read(_reader, element.stream, _err);
// padding
uint8_t padding;
size += bx::read(_reader, padding, _err);
_signature.elements.push_back(element);
}
return size;
}
int32_t write(bx::WriterI* _writer, const DxbcSignature& _signature, bx::Error* _err)
{
int32_t size = 0;
const uint32_t num = uint32_t(_signature.elements.size() );
size += bx::write(_writer, num, _err);
size += bx::write(_writer, _signature.key, _err);
typedef stl::unordered_map<stl::string, uint32_t> NameOffsetMap;
NameOffsetMap nom;
const uint8_t pad = 0;
uint32_t nameOffset = num * 24 + 8;
for (uint32_t ii = 0; ii < num; ++ii)
{
const DxbcSignature::Element& element = _signature.elements[ii];
NameOffsetMap::iterator it = nom.find(element.name);
if (it == nom.end() )
{
nom.insert(stl::make_pair(element.name, nameOffset) );
size += bx::write(_writer, nameOffset, _err);
nameOffset += uint32_t(element.name.size() + 1);
}
else
{
size += bx::write(_writer, it->second);
}
size += bx::write(_writer, element.semanticIndex, _err);
size += bx::write(_writer, element.valueType, _err);
size += bx::write(_writer, element.componentType, _err);
size += bx::write(_writer, element.registerIndex, _err);
size += bx::write(_writer, element.mask, _err);
size += bx::write(_writer, element.readWriteMask, _err);
size += bx::write(_writer, element.stream, _err);
size += bx::write(_writer, pad, _err);
}
uint32_t len = 0;
for (uint32_t ii = 0; ii < num; ++ii)
{
const DxbcSignature::Element& element = _signature.elements[ii];
NameOffsetMap::iterator it = nom.find(element.name);
if (it != nom.end() )
{
nom.erase(it);
size += bx::write(_writer, element.name.c_str(), uint32_t(element.name.size() + 1), _err);
len += uint32_t(element.name.size() + 1);
}
}
// align 4 bytes
size += bx::writeRep(_writer, 0xab, (len+3)/4*4 - len, _err);
return size;
}
int32_t read(bx::ReaderSeekerI* _reader, DxbcShader& _shader, bx::Error* _err)
{
int32_t size = 0;
size += bx::read(_reader, _shader.version, _err);
uint32_t bcLength;
size += bx::read(_reader, bcLength, _err);
uint32_t len = (bcLength-2)*sizeof(uint32_t);
_shader.byteCode.resize(len);
size += bx::read(_reader, _shader.byteCode.data(), len, _err);
return size;
}
int32_t write(bx::WriterI* _writer, const DxbcShader& _shader, bx::Error* _err)
{
const uint32_t len = uint32_t(_shader.byteCode.size() );
const uint32_t bcLength = len / sizeof(uint32_t) + 2;
int32_t size = 0;
size += bx::write(_writer, _shader.version, _err);
size += bx::write(_writer, bcLength, _err);
size += bx::write(_writer, _shader.byteCode.data(), len, _err);
return size;
}
#define DXBC_CHUNK_SHADER BX_MAKEFOURCC('S', 'H', 'D', 'R')
#define DXBC_CHUNK_SHADER_EX BX_MAKEFOURCC('S', 'H', 'E', 'X')
#define DXBC_CHUNK_INPUT_SIGNATURE BX_MAKEFOURCC('I', 'S', 'G', 'N')
#define DXBC_CHUNK_OUTPUT_SIGNATURE BX_MAKEFOURCC('O', 'S', 'G', 'N')
int32_t read(bx::ReaderSeekerI* _reader, DxbcContext& _dxbc, bx::Error* _err)
{
int32_t size = 0;
size += bx::read(_reader, _dxbc.header, _err);
_dxbc.shader.shex = false;
_dxbc.shader.aon9 = false;
for (uint32_t ii = 0; ii < _dxbc.header.numChunks; ++ii)
{
bx::seek(_reader, sizeof(DxbcContext::Header) + ii*sizeof(uint32_t), bx::Whence::Begin);
uint32_t chunkOffset;
size += bx::read(_reader, chunkOffset, _err);
bx::seek(_reader, chunkOffset, bx::Whence::Begin);
uint32_t fourcc;
size += bx::read(_reader, fourcc, _err);
uint32_t chunkSize;
size += bx::read(_reader, chunkSize, _err);
switch (fourcc)
{
case DXBC_CHUNK_SHADER_EX:
_dxbc.shader.shex = true;
// fallthrough
case DXBC_CHUNK_SHADER:
size += read(_reader, _dxbc.shader, _err);
break;
case BX_MAKEFOURCC('I', 'S', 'G', '1'):
case DXBC_CHUNK_INPUT_SIGNATURE:
size += read(_reader, _dxbc.inputSignature, _err);
break;
case BX_MAKEFOURCC('O', 'S', 'G', '1'):
case BX_MAKEFOURCC('O', 'S', 'G', '5'):
case DXBC_CHUNK_OUTPUT_SIGNATURE:
size += read(_reader, _dxbc.outputSignature, _err);
break;
case BX_MAKEFOURCC('A', 'o', 'n', '9'): // Contains DX9BC for feature level 9.x (*s_4_0_level_9_*) shaders.
_dxbc.shader.aon9 = true;
break;
case BX_MAKEFOURCC('I', 'F', 'C', 'E'): // Interface.
case BX_MAKEFOURCC('R', 'D', 'E', 'F'): // Resource definition.
case BX_MAKEFOURCC('S', 'D', 'G', 'B'): // Shader debugging info (old).
case BX_MAKEFOURCC('S', 'P', 'D', 'B'): // Shader debugging info (new).
case BX_MAKEFOURCC('S', 'F', 'I', '0'): // ?
case BX_MAKEFOURCC('S', 'T', 'A', 'T'): // Statistics.
case BX_MAKEFOURCC('P', 'C', 'S', 'G'): // Patch constant signature.
case BX_MAKEFOURCC('P', 'S', 'O', '1'): // Pipeline State Object 1
case BX_MAKEFOURCC('P', 'S', 'O', '2'): // Pipeline State Object 2
case BX_MAKEFOURCC('X', 'N', 'A', 'P'): // ?
case BX_MAKEFOURCC('X', 'N', 'A', 'S'): // ?
size += chunkSize;
break;
default:
size += chunkSize;
BX_CHECK(false, "UNKNOWN FOURCC %c%c%c%c %d"
, ( (char*)&fourcc)[0]
, ( (char*)&fourcc)[1]
, ( (char*)&fourcc)[2]
, ( (char*)&fourcc)[3]
, size
);
break;
}
}
return size;
}
int32_t write(bx::WriterSeekerI* _writer, const DxbcContext& _dxbc, bx::Error* _err)
{
int32_t size = 0;
int64_t dxbcOffset = bx::seek(_writer);
size += bx::write(_writer, DXBC_CHUNK_HEADER);
size += bx::writeRep(_writer, 0, 16, _err);
size += bx::write(_writer, UINT32_C(1), _err);
int64_t sizeOffset = bx::seek(_writer);
size += bx::writeRep(_writer, 0, 4, _err);
uint32_t numChunks = 3;
size += bx::write(_writer, numChunks, _err);
int64_t chunksOffsets = bx::seek(_writer);
size += bx::writeRep(_writer, 0, numChunks*sizeof(uint32_t), _err);
uint32_t chunkOffset[3];
uint32_t chunkSize[3];
chunkOffset[0] = uint32_t(bx::seek(_writer) - dxbcOffset);
size += write(_writer, DXBC_CHUNK_INPUT_SIGNATURE, _err);
size += write(_writer, UINT32_C(0), _err);
chunkSize[0] = write(_writer, _dxbc.inputSignature, _err);
chunkOffset[1] = uint32_t(bx::seek(_writer) - dxbcOffset);
size += write(_writer, DXBC_CHUNK_OUTPUT_SIGNATURE, _err);
size += write(_writer, UINT32_C(0), _err);
chunkSize[1] = write(_writer, _dxbc.outputSignature, _err);
chunkOffset[2] = uint32_t(bx::seek(_writer) - dxbcOffset);
size += write(_writer, _dxbc.shader.shex ? DXBC_CHUNK_SHADER_EX : DXBC_CHUNK_SHADER, _err);
size += write(_writer, UINT32_C(0), _err);
chunkSize[2] = write(_writer, _dxbc.shader, _err);
size += 0
+ chunkSize[0]
+ chunkSize[1]
+ chunkSize[2]
;
int64_t eof = bx::seek(_writer);
bx::seek(_writer, sizeOffset, bx::Whence::Begin);
bx::write(_writer, size, _err);
bx::seek(_writer, chunksOffsets, bx::Whence::Begin);
bx::write(_writer, chunkOffset, sizeof(chunkOffset), _err);
for (uint32_t ii = 0; ii < BX_COUNTOF(chunkOffset); ++ii)
{
bx::seek(_writer, chunkOffset[ii]+4, bx::Whence::Begin);
bx::write(_writer, chunkSize[ii], _err);
}
bx::seek(_writer, eof, bx::Whence::Begin);
return size;
}
void parse(const DxbcShader& _src, DxbcParseFn _fn, void* _userData, bx::Error* _err)
{
BX_ERROR_SCOPE(_err);
bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) );
for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;)
{
DxbcInstruction instruction;
uint32_t size = read(&reader, instruction, _err);
BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length); BX_UNUSED(size);
bool cont = _fn(token * sizeof(uint32_t), instruction, _userData);
if (!cont)
{
return;
}
token += instruction.length;
}
}
void filter(DxbcShader& _dst, const DxbcShader& _src, DxbcFilterFn _fn, void* _userData, bx::Error* _err)
{
BX_ERROR_SCOPE(_err);
bx::MemoryReader reader(_src.byteCode.data(), uint32_t(_src.byteCode.size() ) );
bx::MemoryBlock mb(g_allocator);
bx::MemoryWriter writer(&mb);
for (uint32_t token = 0, numTokens = uint32_t(_src.byteCode.size() / sizeof(uint32_t) ); token < numTokens;)
{
DxbcInstruction instruction;
uint32_t size = read(&reader, instruction, _err);
BX_CHECK(size/4 == instruction.length, "read %d, expected %d", size/4, instruction.length); BX_UNUSED(size);
_fn(instruction, _userData);
write(&writer, instruction, _err);
token += instruction.length;
}
uint8_t* data = (uint8_t*)mb.more();
uint32_t size = uint32_t(bx::getSize(&writer) );
_dst.byteCode.reserve(size);
memcpy(_dst.byteCode.data(), data, size);
}
} // namespace bgfx