godot/thirdparty/misc/smolv.cpp
reduz 0d2e02945b Implement shader caching
* Shader compilation is now cached. Subsequent loads take less than a millisecond.
* Improved game, editor and project manager startup time.
* Editor uses .godot/shader_cache to store shaders.
* Game uses user://shader_cache
* Project manager uses $config_dir/shader_cache
* Options to tweak shader caching in project settings.
* Editor path configuration moved from EditorSettings to new class, EditorPaths, so it can be available early on (before shaders are compiled).
* Reworked ShaderCompilerRD to ensure deterministic shader code creation (else shader may change and cache will be invalidated).
* Added shader compression with SMOLV: https://github.com/aras-p/smol-v
2021-05-31 10:13:09 +02:00

2108 lines
60 KiB
C++

// smol-v - public domain - https://github.com/aras-p/smol-v
// authored 2016-2020 by Aras Pranckevicius
// no warranty implied; use at your own risk
// See end of file for license information.
#include "smolv.h"
#include <stdint.h>
#include <vector>
#include <algorithm>
#include <cstdio>
#include <cstring>
#if !defined(_MSC_VER) && __cplusplus < 201103L
#define static_assert(x,y)
#endif
#define _SMOLV_ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
// --------------------------------------------------------------------------------------------
// Metadata about known SPIR-V operations
enum SpvOp
{
SpvOpNop = 0,
SpvOpUndef = 1,
SpvOpSourceContinued = 2,
SpvOpSource = 3,
SpvOpSourceExtension = 4,
SpvOpName = 5,
SpvOpMemberName = 6,
SpvOpString = 7,
SpvOpLine = 8,
SpvOpExtension = 10,
SpvOpExtInstImport = 11,
SpvOpExtInst = 12,
SpvOpVectorShuffleCompact = 13, // not in SPIR-V, added for SMOL-V!
SpvOpMemoryModel = 14,
SpvOpEntryPoint = 15,
SpvOpExecutionMode = 16,
SpvOpCapability = 17,
SpvOpTypeVoid = 19,
SpvOpTypeBool = 20,
SpvOpTypeInt = 21,
SpvOpTypeFloat = 22,
SpvOpTypeVector = 23,
SpvOpTypeMatrix = 24,
SpvOpTypeImage = 25,
SpvOpTypeSampler = 26,
SpvOpTypeSampledImage = 27,
SpvOpTypeArray = 28,
SpvOpTypeRuntimeArray = 29,
SpvOpTypeStruct = 30,
SpvOpTypeOpaque = 31,
SpvOpTypePointer = 32,
SpvOpTypeFunction = 33,
SpvOpTypeEvent = 34,
SpvOpTypeDeviceEvent = 35,
SpvOpTypeReserveId = 36,
SpvOpTypeQueue = 37,
SpvOpTypePipe = 38,
SpvOpTypeForwardPointer = 39,
SpvOpConstantTrue = 41,
SpvOpConstantFalse = 42,
SpvOpConstant = 43,
SpvOpConstantComposite = 44,
SpvOpConstantSampler = 45,
SpvOpConstantNull = 46,
SpvOpSpecConstantTrue = 48,
SpvOpSpecConstantFalse = 49,
SpvOpSpecConstant = 50,
SpvOpSpecConstantComposite = 51,
SpvOpSpecConstantOp = 52,
SpvOpFunction = 54,
SpvOpFunctionParameter = 55,
SpvOpFunctionEnd = 56,
SpvOpFunctionCall = 57,
SpvOpVariable = 59,
SpvOpImageTexelPointer = 60,
SpvOpLoad = 61,
SpvOpStore = 62,
SpvOpCopyMemory = 63,
SpvOpCopyMemorySized = 64,
SpvOpAccessChain = 65,
SpvOpInBoundsAccessChain = 66,
SpvOpPtrAccessChain = 67,
SpvOpArrayLength = 68,
SpvOpGenericPtrMemSemantics = 69,
SpvOpInBoundsPtrAccessChain = 70,
SpvOpDecorate = 71,
SpvOpMemberDecorate = 72,
SpvOpDecorationGroup = 73,
SpvOpGroupDecorate = 74,
SpvOpGroupMemberDecorate = 75,
SpvOpVectorExtractDynamic = 77,
SpvOpVectorInsertDynamic = 78,
SpvOpVectorShuffle = 79,
SpvOpCompositeConstruct = 80,
SpvOpCompositeExtract = 81,
SpvOpCompositeInsert = 82,
SpvOpCopyObject = 83,
SpvOpTranspose = 84,
SpvOpSampledImage = 86,
SpvOpImageSampleImplicitLod = 87,
SpvOpImageSampleExplicitLod = 88,
SpvOpImageSampleDrefImplicitLod = 89,
SpvOpImageSampleDrefExplicitLod = 90,
SpvOpImageSampleProjImplicitLod = 91,
SpvOpImageSampleProjExplicitLod = 92,
SpvOpImageSampleProjDrefImplicitLod = 93,
SpvOpImageSampleProjDrefExplicitLod = 94,
SpvOpImageFetch = 95,
SpvOpImageGather = 96,
SpvOpImageDrefGather = 97,
SpvOpImageRead = 98,
SpvOpImageWrite = 99,
SpvOpImage = 100,
SpvOpImageQueryFormat = 101,
SpvOpImageQueryOrder = 102,
SpvOpImageQuerySizeLod = 103,
SpvOpImageQuerySize = 104,
SpvOpImageQueryLod = 105,
SpvOpImageQueryLevels = 106,
SpvOpImageQuerySamples = 107,
SpvOpConvertFToU = 109,
SpvOpConvertFToS = 110,
SpvOpConvertSToF = 111,
SpvOpConvertUToF = 112,
SpvOpUConvert = 113,
SpvOpSConvert = 114,
SpvOpFConvert = 115,
SpvOpQuantizeToF16 = 116,
SpvOpConvertPtrToU = 117,
SpvOpSatConvertSToU = 118,
SpvOpSatConvertUToS = 119,
SpvOpConvertUToPtr = 120,
SpvOpPtrCastToGeneric = 121,
SpvOpGenericCastToPtr = 122,
SpvOpGenericCastToPtrExplicit = 123,
SpvOpBitcast = 124,
SpvOpSNegate = 126,
SpvOpFNegate = 127,
SpvOpIAdd = 128,
SpvOpFAdd = 129,
SpvOpISub = 130,
SpvOpFSub = 131,
SpvOpIMul = 132,
SpvOpFMul = 133,
SpvOpUDiv = 134,
SpvOpSDiv = 135,
SpvOpFDiv = 136,
SpvOpUMod = 137,
SpvOpSRem = 138,
SpvOpSMod = 139,
SpvOpFRem = 140,
SpvOpFMod = 141,
SpvOpVectorTimesScalar = 142,
SpvOpMatrixTimesScalar = 143,
SpvOpVectorTimesMatrix = 144,
SpvOpMatrixTimesVector = 145,
SpvOpMatrixTimesMatrix = 146,
SpvOpOuterProduct = 147,
SpvOpDot = 148,
SpvOpIAddCarry = 149,
SpvOpISubBorrow = 150,
SpvOpUMulExtended = 151,
SpvOpSMulExtended = 152,
SpvOpAny = 154,
SpvOpAll = 155,
SpvOpIsNan = 156,
SpvOpIsInf = 157,
SpvOpIsFinite = 158,
SpvOpIsNormal = 159,
SpvOpSignBitSet = 160,
SpvOpLessOrGreater = 161,
SpvOpOrdered = 162,
SpvOpUnordered = 163,
SpvOpLogicalEqual = 164,
SpvOpLogicalNotEqual = 165,
SpvOpLogicalOr = 166,
SpvOpLogicalAnd = 167,
SpvOpLogicalNot = 168,
SpvOpSelect = 169,
SpvOpIEqual = 170,
SpvOpINotEqual = 171,
SpvOpUGreaterThan = 172,
SpvOpSGreaterThan = 173,
SpvOpUGreaterThanEqual = 174,
SpvOpSGreaterThanEqual = 175,
SpvOpULessThan = 176,
SpvOpSLessThan = 177,
SpvOpULessThanEqual = 178,
SpvOpSLessThanEqual = 179,
SpvOpFOrdEqual = 180,
SpvOpFUnordEqual = 181,
SpvOpFOrdNotEqual = 182,
SpvOpFUnordNotEqual = 183,
SpvOpFOrdLessThan = 184,
SpvOpFUnordLessThan = 185,
SpvOpFOrdGreaterThan = 186,
SpvOpFUnordGreaterThan = 187,
SpvOpFOrdLessThanEqual = 188,
SpvOpFUnordLessThanEqual = 189,
SpvOpFOrdGreaterThanEqual = 190,
SpvOpFUnordGreaterThanEqual = 191,
SpvOpShiftRightLogical = 194,
SpvOpShiftRightArithmetic = 195,
SpvOpShiftLeftLogical = 196,
SpvOpBitwiseOr = 197,
SpvOpBitwiseXor = 198,
SpvOpBitwiseAnd = 199,
SpvOpNot = 200,
SpvOpBitFieldInsert = 201,
SpvOpBitFieldSExtract = 202,
SpvOpBitFieldUExtract = 203,
SpvOpBitReverse = 204,
SpvOpBitCount = 205,
SpvOpDPdx = 207,
SpvOpDPdy = 208,
SpvOpFwidth = 209,
SpvOpDPdxFine = 210,
SpvOpDPdyFine = 211,
SpvOpFwidthFine = 212,
SpvOpDPdxCoarse = 213,
SpvOpDPdyCoarse = 214,
SpvOpFwidthCoarse = 215,
SpvOpEmitVertex = 218,
SpvOpEndPrimitive = 219,
SpvOpEmitStreamVertex = 220,
SpvOpEndStreamPrimitive = 221,
SpvOpControlBarrier = 224,
SpvOpMemoryBarrier = 225,
SpvOpAtomicLoad = 227,
SpvOpAtomicStore = 228,
SpvOpAtomicExchange = 229,
SpvOpAtomicCompareExchange = 230,
SpvOpAtomicCompareExchangeWeak = 231,
SpvOpAtomicIIncrement = 232,
SpvOpAtomicIDecrement = 233,
SpvOpAtomicIAdd = 234,
SpvOpAtomicISub = 235,
SpvOpAtomicSMin = 236,
SpvOpAtomicUMin = 237,
SpvOpAtomicSMax = 238,
SpvOpAtomicUMax = 239,
SpvOpAtomicAnd = 240,
SpvOpAtomicOr = 241,
SpvOpAtomicXor = 242,
SpvOpPhi = 245,
SpvOpLoopMerge = 246,
SpvOpSelectionMerge = 247,
SpvOpLabel = 248,
SpvOpBranch = 249,
SpvOpBranchConditional = 250,
SpvOpSwitch = 251,
SpvOpKill = 252,
SpvOpReturn = 253,
SpvOpReturnValue = 254,
SpvOpUnreachable = 255,
SpvOpLifetimeStart = 256,
SpvOpLifetimeStop = 257,
SpvOpGroupAsyncCopy = 259,
SpvOpGroupWaitEvents = 260,
SpvOpGroupAll = 261,
SpvOpGroupAny = 262,
SpvOpGroupBroadcast = 263,
SpvOpGroupIAdd = 264,
SpvOpGroupFAdd = 265,
SpvOpGroupFMin = 266,
SpvOpGroupUMin = 267,
SpvOpGroupSMin = 268,
SpvOpGroupFMax = 269,
SpvOpGroupUMax = 270,
SpvOpGroupSMax = 271,
SpvOpReadPipe = 274,
SpvOpWritePipe = 275,
SpvOpReservedReadPipe = 276,
SpvOpReservedWritePipe = 277,
SpvOpReserveReadPipePackets = 278,
SpvOpReserveWritePipePackets = 279,
SpvOpCommitReadPipe = 280,
SpvOpCommitWritePipe = 281,
SpvOpIsValidReserveId = 282,
SpvOpGetNumPipePackets = 283,
SpvOpGetMaxPipePackets = 284,
SpvOpGroupReserveReadPipePackets = 285,
SpvOpGroupReserveWritePipePackets = 286,
SpvOpGroupCommitReadPipe = 287,
SpvOpGroupCommitWritePipe = 288,
SpvOpEnqueueMarker = 291,
SpvOpEnqueueKernel = 292,
SpvOpGetKernelNDrangeSubGroupCount = 293,
SpvOpGetKernelNDrangeMaxSubGroupSize = 294,
SpvOpGetKernelWorkGroupSize = 295,
SpvOpGetKernelPreferredWorkGroupSizeMultiple = 296,
SpvOpRetainEvent = 297,
SpvOpReleaseEvent = 298,
SpvOpCreateUserEvent = 299,
SpvOpIsValidEvent = 300,
SpvOpSetUserEventStatus = 301,
SpvOpCaptureEventProfilingInfo = 302,
SpvOpGetDefaultQueue = 303,
SpvOpBuildNDRange = 304,
SpvOpImageSparseSampleImplicitLod = 305,
SpvOpImageSparseSampleExplicitLod = 306,
SpvOpImageSparseSampleDrefImplicitLod = 307,
SpvOpImageSparseSampleDrefExplicitLod = 308,
SpvOpImageSparseSampleProjImplicitLod = 309,
SpvOpImageSparseSampleProjExplicitLod = 310,
SpvOpImageSparseSampleProjDrefImplicitLod = 311,
SpvOpImageSparseSampleProjDrefExplicitLod = 312,
SpvOpImageSparseFetch = 313,
SpvOpImageSparseGather = 314,
SpvOpImageSparseDrefGather = 315,
SpvOpImageSparseTexelsResident = 316,
SpvOpNoLine = 317,
SpvOpAtomicFlagTestAndSet = 318,
SpvOpAtomicFlagClear = 319,
SpvOpImageSparseRead = 320,
SpvOpSizeOf = 321,
SpvOpTypePipeStorage = 322,
SpvOpConstantPipeStorage = 323,
SpvOpCreatePipeFromPipeStorage = 324,
SpvOpGetKernelLocalSizeForSubgroupCount = 325,
SpvOpGetKernelMaxNumSubgroups = 326,
SpvOpTypeNamedBarrier = 327,
SpvOpNamedBarrierInitialize = 328,
SpvOpMemoryNamedBarrier = 329,
SpvOpModuleProcessed = 330,
SpvOpExecutionModeId = 331,
SpvOpDecorateId = 332,
SpvOpGroupNonUniformElect = 333,
SpvOpGroupNonUniformAll = 334,
SpvOpGroupNonUniformAny = 335,
SpvOpGroupNonUniformAllEqual = 336,
SpvOpGroupNonUniformBroadcast = 337,
SpvOpGroupNonUniformBroadcastFirst = 338,
SpvOpGroupNonUniformBallot = 339,
SpvOpGroupNonUniformInverseBallot = 340,
SpvOpGroupNonUniformBallotBitExtract = 341,
SpvOpGroupNonUniformBallotBitCount = 342,
SpvOpGroupNonUniformBallotFindLSB = 343,
SpvOpGroupNonUniformBallotFindMSB = 344,
SpvOpGroupNonUniformShuffle = 345,
SpvOpGroupNonUniformShuffleXor = 346,
SpvOpGroupNonUniformShuffleUp = 347,
SpvOpGroupNonUniformShuffleDown = 348,
SpvOpGroupNonUniformIAdd = 349,
SpvOpGroupNonUniformFAdd = 350,
SpvOpGroupNonUniformIMul = 351,
SpvOpGroupNonUniformFMul = 352,
SpvOpGroupNonUniformSMin = 353,
SpvOpGroupNonUniformUMin = 354,
SpvOpGroupNonUniformFMin = 355,
SpvOpGroupNonUniformSMax = 356,
SpvOpGroupNonUniformUMax = 357,
SpvOpGroupNonUniformFMax = 358,
SpvOpGroupNonUniformBitwiseAnd = 359,
SpvOpGroupNonUniformBitwiseOr = 360,
SpvOpGroupNonUniformBitwiseXor = 361,
SpvOpGroupNonUniformLogicalAnd = 362,
SpvOpGroupNonUniformLogicalOr = 363,
SpvOpGroupNonUniformLogicalXor = 364,
SpvOpGroupNonUniformQuadBroadcast = 365,
SpvOpGroupNonUniformQuadSwap = 366,
};
static const int kKnownOpsCount = SpvOpGroupNonUniformQuadSwap+1;
static const char* kSpirvOpNames[] =
{
"Nop",
"Undef",
"SourceContinued",
"Source",
"SourceExtension",
"Name",
"MemberName",
"String",
"Line",
"#9",
"Extension",
"ExtInstImport",
"ExtInst",
"VectorShuffleCompact",
"MemoryModel",
"EntryPoint",
"ExecutionMode",
"Capability",
"#18",
"TypeVoid",
"TypeBool",
"TypeInt",
"TypeFloat",
"TypeVector",
"TypeMatrix",
"TypeImage",
"TypeSampler",
"TypeSampledImage",
"TypeArray",
"TypeRuntimeArray",
"TypeStruct",
"TypeOpaque",
"TypePointer",
"TypeFunction",
"TypeEvent",
"TypeDeviceEvent",
"TypeReserveId",
"TypeQueue",
"TypePipe",
"TypeForwardPointer",
"#40",
"ConstantTrue",
"ConstantFalse",
"Constant",
"ConstantComposite",
"ConstantSampler",
"ConstantNull",
"#47",
"SpecConstantTrue",
"SpecConstantFalse",
"SpecConstant",
"SpecConstantComposite",
"SpecConstantOp",
"#53",
"Function",
"FunctionParameter",
"FunctionEnd",
"FunctionCall",
"#58",
"Variable",
"ImageTexelPointer",
"Load",
"Store",
"CopyMemory",
"CopyMemorySized",
"AccessChain",
"InBoundsAccessChain",
"PtrAccessChain",
"ArrayLength",
"GenericPtrMemSemantics",
"InBoundsPtrAccessChain",
"Decorate",
"MemberDecorate",
"DecorationGroup",
"GroupDecorate",
"GroupMemberDecorate",
"#76",
"VectorExtractDynamic",
"VectorInsertDynamic",
"VectorShuffle",
"CompositeConstruct",
"CompositeExtract",
"CompositeInsert",
"CopyObject",
"Transpose",
"#85",
"SampledImage",
"ImageSampleImplicitLod",
"ImageSampleExplicitLod",
"ImageSampleDrefImplicitLod",
"ImageSampleDrefExplicitLod",
"ImageSampleProjImplicitLod",
"ImageSampleProjExplicitLod",
"ImageSampleProjDrefImplicitLod",
"ImageSampleProjDrefExplicitLod",
"ImageFetch",
"ImageGather",
"ImageDrefGather",
"ImageRead",
"ImageWrite",
"Image",
"ImageQueryFormat",
"ImageQueryOrder",
"ImageQuerySizeLod",
"ImageQuerySize",
"ImageQueryLod",
"ImageQueryLevels",
"ImageQuerySamples",
"#108",
"ConvertFToU",
"ConvertFToS",
"ConvertSToF",
"ConvertUToF",
"UConvert",
"SConvert",
"FConvert",
"QuantizeToF16",
"ConvertPtrToU",
"SatConvertSToU",
"SatConvertUToS",
"ConvertUToPtr",
"PtrCastToGeneric",
"GenericCastToPtr",
"GenericCastToPtrExplicit",
"Bitcast",
"#125",
"SNegate",
"FNegate",
"IAdd",
"FAdd",
"ISub",
"FSub",
"IMul",
"FMul",
"UDiv",
"SDiv",
"FDiv",
"UMod",
"SRem",
"SMod",
"FRem",
"FMod",
"VectorTimesScalar",
"MatrixTimesScalar",
"VectorTimesMatrix",
"MatrixTimesVector",
"MatrixTimesMatrix",
"OuterProduct",
"Dot",
"IAddCarry",
"ISubBorrow",
"UMulExtended",
"SMulExtended",
"#153",
"Any",
"All",
"IsNan",
"IsInf",
"IsFinite",
"IsNormal",
"SignBitSet",
"LessOrGreater",
"Ordered",
"Unordered",
"LogicalEqual",
"LogicalNotEqual",
"LogicalOr",
"LogicalAnd",
"LogicalNot",
"Select",
"IEqual",
"INotEqual",
"UGreaterThan",
"SGreaterThan",
"UGreaterThanEqual",
"SGreaterThanEqual",
"ULessThan",
"SLessThan",
"ULessThanEqual",
"SLessThanEqual",
"FOrdEqual",
"FUnordEqual",
"FOrdNotEqual",
"FUnordNotEqual",
"FOrdLessThan",
"FUnordLessThan",
"FOrdGreaterThan",
"FUnordGreaterThan",
"FOrdLessThanEqual",
"FUnordLessThanEqual",
"FOrdGreaterThanEqual",
"FUnordGreaterThanEqual",
"#192",
"#193",
"ShiftRightLogical",
"ShiftRightArithmetic",
"ShiftLeftLogical",
"BitwiseOr",
"BitwiseXor",
"BitwiseAnd",
"Not",
"BitFieldInsert",
"BitFieldSExtract",
"BitFieldUExtract",
"BitReverse",
"BitCount",
"#206",
"DPdx",
"DPdy",
"Fwidth",
"DPdxFine",
"DPdyFine",
"FwidthFine",
"DPdxCoarse",
"DPdyCoarse",
"FwidthCoarse",
"#216",
"#217",
"EmitVertex",
"EndPrimitive",
"EmitStreamVertex",
"EndStreamPrimitive",
"#222",
"#223",
"ControlBarrier",
"MemoryBarrier",
"#226",
"AtomicLoad",
"AtomicStore",
"AtomicExchange",
"AtomicCompareExchange",
"AtomicCompareExchangeWeak",
"AtomicIIncrement",
"AtomicIDecrement",
"AtomicIAdd",
"AtomicISub",
"AtomicSMin",
"AtomicUMin",
"AtomicSMax",
"AtomicUMax",
"AtomicAnd",
"AtomicOr",
"AtomicXor",
"#243",
"#244",
"Phi",
"LoopMerge",
"SelectionMerge",
"Label",
"Branch",
"BranchConditional",
"Switch",
"Kill",
"Return",
"ReturnValue",
"Unreachable",
"LifetimeStart",
"LifetimeStop",
"#258",
"GroupAsyncCopy",
"GroupWaitEvents",
"GroupAll",
"GroupAny",
"GroupBroadcast",
"GroupIAdd",
"GroupFAdd",
"GroupFMin",
"GroupUMin",
"GroupSMin",
"GroupFMax",
"GroupUMax",
"GroupSMax",
"#272",
"#273",
"ReadPipe",
"WritePipe",
"ReservedReadPipe",
"ReservedWritePipe",
"ReserveReadPipePackets",
"ReserveWritePipePackets",
"CommitReadPipe",
"CommitWritePipe",
"IsValidReserveId",
"GetNumPipePackets",
"GetMaxPipePackets",
"GroupReserveReadPipePackets",
"GroupReserveWritePipePackets",
"GroupCommitReadPipe",
"GroupCommitWritePipe",
"#289",
"#290",
"EnqueueMarker",
"EnqueueKernel",
"GetKernelNDrangeSubGroupCount",
"GetKernelNDrangeMaxSubGroupSize",
"GetKernelWorkGroupSize",
"GetKernelPreferredWorkGroupSizeMultiple",
"RetainEvent",
"ReleaseEvent",
"CreateUserEvent",
"IsValidEvent",
"SetUserEventStatus",
"CaptureEventProfilingInfo",
"GetDefaultQueue",
"BuildNDRange",
"ImageSparseSampleImplicitLod",
"ImageSparseSampleExplicitLod",
"ImageSparseSampleDrefImplicitLod",
"ImageSparseSampleDrefExplicitLod",
"ImageSparseSampleProjImplicitLod",
"ImageSparseSampleProjExplicitLod",
"ImageSparseSampleProjDrefImplicitLod",
"ImageSparseSampleProjDrefExplicitLod",
"ImageSparseFetch",
"ImageSparseGather",
"ImageSparseDrefGather",
"ImageSparseTexelsResident",
"NoLine",
"AtomicFlagTestAndSet",
"AtomicFlagClear",
"ImageSparseRead",
"SizeOf",
"TypePipeStorage",
"ConstantPipeStorage",
"CreatePipeFromPipeStorage",
"GetKernelLocalSizeForSubgroupCount",
"GetKernelMaxNumSubgroups",
"TypeNamedBarrier",
"NamedBarrierInitialize",
"MemoryNamedBarrier",
"ModuleProcessed",
"ExecutionModeId",
"DecorateId",
"GroupNonUniformElect",
"GroupNonUniformAll",
"GroupNonUniformAny",
"GroupNonUniformAllEqual",
"GroupNonUniformBroadcast",
"GroupNonUniformBroadcastFirst",
"GroupNonUniformBallot",
"GroupNonUniformInverseBallot",
"GroupNonUniformBallotBitExtract",
"GroupNonUniformBallotBitCount",
"GroupNonUniformBallotFindLSB",
"GroupNonUniformBallotFindMSB",
"GroupNonUniformShuffle",
"GroupNonUniformShuffleXor",
"GroupNonUniformShuffleUp",
"GroupNonUniformShuffleDown",
"GroupNonUniformIAdd",
"GroupNonUniformFAdd",
"GroupNonUniformIMul",
"GroupNonUniformFMul",
"GroupNonUniformSMin",
"GroupNonUniformUMin",
"GroupNonUniformFMin",
"GroupNonUniformSMax",
"GroupNonUniformUMax",
"GroupNonUniformFMax",
"GroupNonUniformBitwiseAnd",
"GroupNonUniformBitwiseOr",
"GroupNonUniformBitwiseXor",
"GroupNonUniformLogicalAnd",
"GroupNonUniformLogicalOr",
"GroupNonUniformLogicalXor",
"GroupNonUniformQuadBroadcast",
"GroupNonUniformQuadSwap",
};
static_assert(_SMOLV_ARRAY_SIZE(kSpirvOpNames) == kKnownOpsCount, "kSpirvOpNames table mismatch with known SpvOps");
struct OpData
{
uint8_t hasResult; // does it have result ID?
uint8_t hasType; // does it have type ID?
uint8_t deltaFromResult; // How many words after (optional) type+result to write out as deltas from result?
uint8_t varrest; // should the rest of words be written in varint encoding?
};
static const OpData kSpirvOpData[] =
{
{0, 0, 0, 0}, // Nop
{1, 1, 0, 0}, // Undef
{0, 0, 0, 0}, // SourceContinued
{0, 0, 0, 1}, // Source
{0, 0, 0, 0}, // SourceExtension
{0, 0, 0, 0}, // Name
{0, 0, 0, 0}, // MemberName
{0, 0, 0, 0}, // String
{0, 0, 0, 1}, // Line
{1, 1, 0, 0}, // #9
{0, 0, 0, 0}, // Extension
{1, 0, 0, 0}, // ExtInstImport
{1, 1, 0, 1}, // ExtInst
{1, 1, 2, 1}, // VectorShuffleCompact - new in SMOLV
{0, 0, 0, 1}, // MemoryModel
{0, 0, 0, 1}, // EntryPoint
{0, 0, 0, 1}, // ExecutionMode
{0, 0, 0, 1}, // Capability
{1, 1, 0, 0}, // #18
{1, 0, 0, 1}, // TypeVoid
{1, 0, 0, 1}, // TypeBool
{1, 0, 0, 1}, // TypeInt
{1, 0, 0, 1}, // TypeFloat
{1, 0, 0, 1}, // TypeVector
{1, 0, 0, 1}, // TypeMatrix
{1, 0, 0, 1}, // TypeImage
{1, 0, 0, 1}, // TypeSampler
{1, 0, 0, 1}, // TypeSampledImage
{1, 0, 0, 1}, // TypeArray
{1, 0, 0, 1}, // TypeRuntimeArray
{1, 0, 0, 1}, // TypeStruct
{1, 0, 0, 1}, // TypeOpaque
{1, 0, 0, 1}, // TypePointer
{1, 0, 0, 1}, // TypeFunction
{1, 0, 0, 1}, // TypeEvent
{1, 0, 0, 1}, // TypeDeviceEvent
{1, 0, 0, 1}, // TypeReserveId
{1, 0, 0, 1}, // TypeQueue
{1, 0, 0, 1}, // TypePipe
{0, 0, 0, 1}, // TypeForwardPointer
{1, 1, 0, 0}, // #40
{1, 1, 0, 0}, // ConstantTrue
{1, 1, 0, 0}, // ConstantFalse
{1, 1, 0, 0}, // Constant
{1, 1, 9, 0}, // ConstantComposite
{1, 1, 0, 1}, // ConstantSampler
{1, 1, 0, 0}, // ConstantNull
{1, 1, 0, 0}, // #47
{1, 1, 0, 0}, // SpecConstantTrue
{1, 1, 0, 0}, // SpecConstantFalse
{1, 1, 0, 0}, // SpecConstant
{1, 1, 9, 0}, // SpecConstantComposite
{1, 1, 0, 0}, // SpecConstantOp
{1, 1, 0, 0}, // #53
{1, 1, 0, 1}, // Function
{1, 1, 0, 0}, // FunctionParameter
{0, 0, 0, 0}, // FunctionEnd
{1, 1, 9, 0}, // FunctionCall
{1, 1, 0, 0}, // #58
{1, 1, 0, 1}, // Variable
{1, 1, 0, 0}, // ImageTexelPointer
{1, 1, 1, 1}, // Load
{0, 0, 2, 1}, // Store
{0, 0, 0, 0}, // CopyMemory
{0, 0, 0, 0}, // CopyMemorySized
{1, 1, 0, 1}, // AccessChain
{1, 1, 0, 0}, // InBoundsAccessChain
{1, 1, 0, 0}, // PtrAccessChain
{1, 1, 0, 0}, // ArrayLength
{1, 1, 0, 0}, // GenericPtrMemSemantics
{1, 1, 0, 0}, // InBoundsPtrAccessChain
{0, 0, 0, 1}, // Decorate
{0, 0, 0, 1}, // MemberDecorate
{1, 0, 0, 0}, // DecorationGroup
{0, 0, 0, 0}, // GroupDecorate
{0, 0, 0, 0}, // GroupMemberDecorate
{1, 1, 0, 0}, // #76
{1, 1, 1, 1}, // VectorExtractDynamic
{1, 1, 2, 1}, // VectorInsertDynamic
{1, 1, 2, 1}, // VectorShuffle
{1, 1, 9, 0}, // CompositeConstruct
{1, 1, 1, 1}, // CompositeExtract
{1, 1, 2, 1}, // CompositeInsert
{1, 1, 1, 0}, // CopyObject
{1, 1, 0, 0}, // Transpose
{1, 1, 0, 0}, // #85
{1, 1, 0, 0}, // SampledImage
{1, 1, 2, 1}, // ImageSampleImplicitLod
{1, 1, 2, 1}, // ImageSampleExplicitLod
{1, 1, 3, 1}, // ImageSampleDrefImplicitLod
{1, 1, 3, 1}, // ImageSampleDrefExplicitLod
{1, 1, 2, 1}, // ImageSampleProjImplicitLod
{1, 1, 2, 1}, // ImageSampleProjExplicitLod
{1, 1, 3, 1}, // ImageSampleProjDrefImplicitLod
{1, 1, 3, 1}, // ImageSampleProjDrefExplicitLod
{1, 1, 2, 1}, // ImageFetch
{1, 1, 3, 1}, // ImageGather
{1, 1, 3, 1}, // ImageDrefGather
{1, 1, 2, 1}, // ImageRead
{0, 0, 3, 1}, // ImageWrite
{1, 1, 1, 0}, // Image
{1, 1, 1, 0}, // ImageQueryFormat
{1, 1, 1, 0}, // ImageQueryOrder
{1, 1, 2, 0}, // ImageQuerySizeLod
{1, 1, 1, 0}, // ImageQuerySize
{1, 1, 2, 0}, // ImageQueryLod
{1, 1, 1, 0}, // ImageQueryLevels
{1, 1, 1, 0}, // ImageQuerySamples
{1, 1, 0, 0}, // #108
{1, 1, 1, 0}, // ConvertFToU
{1, 1, 1, 0}, // ConvertFToS
{1, 1, 1, 0}, // ConvertSToF
{1, 1, 1, 0}, // ConvertUToF
{1, 1, 1, 0}, // UConvert
{1, 1, 1, 0}, // SConvert
{1, 1, 1, 0}, // FConvert
{1, 1, 1, 0}, // QuantizeToF16
{1, 1, 1, 0}, // ConvertPtrToU
{1, 1, 1, 0}, // SatConvertSToU
{1, 1, 1, 0}, // SatConvertUToS
{1, 1, 1, 0}, // ConvertUToPtr
{1, 1, 1, 0}, // PtrCastToGeneric
{1, 1, 1, 0}, // GenericCastToPtr
{1, 1, 1, 1}, // GenericCastToPtrExplicit
{1, 1, 1, 0}, // Bitcast
{1, 1, 0, 0}, // #125
{1, 1, 1, 0}, // SNegate
{1, 1, 1, 0}, // FNegate
{1, 1, 2, 0}, // IAdd
{1, 1, 2, 0}, // FAdd
{1, 1, 2, 0}, // ISub
{1, 1, 2, 0}, // FSub
{1, 1, 2, 0}, // IMul
{1, 1, 2, 0}, // FMul
{1, 1, 2, 0}, // UDiv
{1, 1, 2, 0}, // SDiv
{1, 1, 2, 0}, // FDiv
{1, 1, 2, 0}, // UMod
{1, 1, 2, 0}, // SRem
{1, 1, 2, 0}, // SMod
{1, 1, 2, 0}, // FRem
{1, 1, 2, 0}, // FMod
{1, 1, 2, 0}, // VectorTimesScalar
{1, 1, 2, 0}, // MatrixTimesScalar
{1, 1, 2, 0}, // VectorTimesMatrix
{1, 1, 2, 0}, // MatrixTimesVector
{1, 1, 2, 0}, // MatrixTimesMatrix
{1, 1, 2, 0}, // OuterProduct
{1, 1, 2, 0}, // Dot
{1, 1, 2, 0}, // IAddCarry
{1, 1, 2, 0}, // ISubBorrow
{1, 1, 2, 0}, // UMulExtended
{1, 1, 2, 0}, // SMulExtended
{1, 1, 0, 0}, // #153
{1, 1, 1, 0}, // Any
{1, 1, 1, 0}, // All
{1, 1, 1, 0}, // IsNan
{1, 1, 1, 0}, // IsInf
{1, 1, 1, 0}, // IsFinite
{1, 1, 1, 0}, // IsNormal
{1, 1, 1, 0}, // SignBitSet
{1, 1, 2, 0}, // LessOrGreater
{1, 1, 2, 0}, // Ordered
{1, 1, 2, 0}, // Unordered
{1, 1, 2, 0}, // LogicalEqual
{1, 1, 2, 0}, // LogicalNotEqual
{1, 1, 2, 0}, // LogicalOr
{1, 1, 2, 0}, // LogicalAnd
{1, 1, 1, 0}, // LogicalNot
{1, 1, 3, 0}, // Select
{1, 1, 2, 0}, // IEqual
{1, 1, 2, 0}, // INotEqual
{1, 1, 2, 0}, // UGreaterThan
{1, 1, 2, 0}, // SGreaterThan
{1, 1, 2, 0}, // UGreaterThanEqual
{1, 1, 2, 0}, // SGreaterThanEqual
{1, 1, 2, 0}, // ULessThan
{1, 1, 2, 0}, // SLessThan
{1, 1, 2, 0}, // ULessThanEqual
{1, 1, 2, 0}, // SLessThanEqual
{1, 1, 2, 0}, // FOrdEqual
{1, 1, 2, 0}, // FUnordEqual
{1, 1, 2, 0}, // FOrdNotEqual
{1, 1, 2, 0}, // FUnordNotEqual
{1, 1, 2, 0}, // FOrdLessThan
{1, 1, 2, 0}, // FUnordLessThan
{1, 1, 2, 0}, // FOrdGreaterThan
{1, 1, 2, 0}, // FUnordGreaterThan
{1, 1, 2, 0}, // FOrdLessThanEqual
{1, 1, 2, 0}, // FUnordLessThanEqual
{1, 1, 2, 0}, // FOrdGreaterThanEqual
{1, 1, 2, 0}, // FUnordGreaterThanEqual
{1, 1, 0, 0}, // #192
{1, 1, 0, 0}, // #193
{1, 1, 2, 0}, // ShiftRightLogical
{1, 1, 2, 0}, // ShiftRightArithmetic
{1, 1, 2, 0}, // ShiftLeftLogical
{1, 1, 2, 0}, // BitwiseOr
{1, 1, 2, 0}, // BitwiseXor
{1, 1, 2, 0}, // BitwiseAnd
{1, 1, 1, 0}, // Not
{1, 1, 4, 0}, // BitFieldInsert
{1, 1, 3, 0}, // BitFieldSExtract
{1, 1, 3, 0}, // BitFieldUExtract
{1, 1, 1, 0}, // BitReverse
{1, 1, 1, 0}, // BitCount
{1, 1, 0, 0}, // #206
{1, 1, 0, 0}, // DPdx
{1, 1, 0, 0}, // DPdy
{1, 1, 0, 0}, // Fwidth
{1, 1, 0, 0}, // DPdxFine
{1, 1, 0, 0}, // DPdyFine
{1, 1, 0, 0}, // FwidthFine
{1, 1, 0, 0}, // DPdxCoarse
{1, 1, 0, 0}, // DPdyCoarse
{1, 1, 0, 0}, // FwidthCoarse
{1, 1, 0, 0}, // #216
{1, 1, 0, 0}, // #217
{0, 0, 0, 0}, // EmitVertex
{0, 0, 0, 0}, // EndPrimitive
{0, 0, 0, 0}, // EmitStreamVertex
{0, 0, 0, 0}, // EndStreamPrimitive
{1, 1, 0, 0}, // #222
{1, 1, 0, 0}, // #223
{0, 0, 3, 0}, // ControlBarrier
{0, 0, 2, 0}, // MemoryBarrier
{1, 1, 0, 0}, // #226
{1, 1, 0, 0}, // AtomicLoad
{0, 0, 0, 0}, // AtomicStore
{1, 1, 0, 0}, // AtomicExchange
{1, 1, 0, 0}, // AtomicCompareExchange
{1, 1, 0, 0}, // AtomicCompareExchangeWeak
{1, 1, 0, 0}, // AtomicIIncrement
{1, 1, 0, 0}, // AtomicIDecrement
{1, 1, 0, 0}, // AtomicIAdd
{1, 1, 0, 0}, // AtomicISub
{1, 1, 0, 0}, // AtomicSMin
{1, 1, 0, 0}, // AtomicUMin
{1, 1, 0, 0}, // AtomicSMax
{1, 1, 0, 0}, // AtomicUMax
{1, 1, 0, 0}, // AtomicAnd
{1, 1, 0, 0}, // AtomicOr
{1, 1, 0, 0}, // AtomicXor
{1, 1, 0, 0}, // #243
{1, 1, 0, 0}, // #244
{1, 1, 0, 0}, // Phi
{0, 0, 2, 1}, // LoopMerge
{0, 0, 1, 1}, // SelectionMerge
{1, 0, 0, 0}, // Label
{0, 0, 1, 0}, // Branch
{0, 0, 3, 1}, // BranchConditional
{0, 0, 0, 0}, // Switch
{0, 0, 0, 0}, // Kill
{0, 0, 0, 0}, // Return
{0, 0, 0, 0}, // ReturnValue
{0, 0, 0, 0}, // Unreachable
{0, 0, 0, 0}, // LifetimeStart
{0, 0, 0, 0}, // LifetimeStop
{1, 1, 0, 0}, // #258
{1, 1, 0, 0}, // GroupAsyncCopy
{0, 0, 0, 0}, // GroupWaitEvents
{1, 1, 0, 0}, // GroupAll
{1, 1, 0, 0}, // GroupAny
{1, 1, 0, 0}, // GroupBroadcast
{1, 1, 0, 0}, // GroupIAdd
{1, 1, 0, 0}, // GroupFAdd
{1, 1, 0, 0}, // GroupFMin
{1, 1, 0, 0}, // GroupUMin
{1, 1, 0, 0}, // GroupSMin
{1, 1, 0, 0}, // GroupFMax
{1, 1, 0, 0}, // GroupUMax
{1, 1, 0, 0}, // GroupSMax
{1, 1, 0, 0}, // #272
{1, 1, 0, 0}, // #273
{1, 1, 0, 0}, // ReadPipe
{1, 1, 0, 0}, // WritePipe
{1, 1, 0, 0}, // ReservedReadPipe
{1, 1, 0, 0}, // ReservedWritePipe
{1, 1, 0, 0}, // ReserveReadPipePackets
{1, 1, 0, 0}, // ReserveWritePipePackets
{0, 0, 0, 0}, // CommitReadPipe
{0, 0, 0, 0}, // CommitWritePipe
{1, 1, 0, 0}, // IsValidReserveId
{1, 1, 0, 0}, // GetNumPipePackets
{1, 1, 0, 0}, // GetMaxPipePackets
{1, 1, 0, 0}, // GroupReserveReadPipePackets
{1, 1, 0, 0}, // GroupReserveWritePipePackets
{0, 0, 0, 0}, // GroupCommitReadPipe
{0, 0, 0, 0}, // GroupCommitWritePipe
{1, 1, 0, 0}, // #289
{1, 1, 0, 0}, // #290
{1, 1, 0, 0}, // EnqueueMarker
{1, 1, 0, 0}, // EnqueueKernel
{1, 1, 0, 0}, // GetKernelNDrangeSubGroupCount
{1, 1, 0, 0}, // GetKernelNDrangeMaxSubGroupSize
{1, 1, 0, 0}, // GetKernelWorkGroupSize
{1, 1, 0, 0}, // GetKernelPreferredWorkGroupSizeMultiple
{0, 0, 0, 0}, // RetainEvent
{0, 0, 0, 0}, // ReleaseEvent
{1, 1, 0, 0}, // CreateUserEvent
{1, 1, 0, 0}, // IsValidEvent
{0, 0, 0, 0}, // SetUserEventStatus
{0, 0, 0, 0}, // CaptureEventProfilingInfo
{1, 1, 0, 0}, // GetDefaultQueue
{1, 1, 0, 0}, // BuildNDRange
{1, 1, 2, 1}, // ImageSparseSampleImplicitLod
{1, 1, 2, 1}, // ImageSparseSampleExplicitLod
{1, 1, 3, 1}, // ImageSparseSampleDrefImplicitLod
{1, 1, 3, 1}, // ImageSparseSampleDrefExplicitLod
{1, 1, 2, 1}, // ImageSparseSampleProjImplicitLod
{1, 1, 2, 1}, // ImageSparseSampleProjExplicitLod
{1, 1, 3, 1}, // ImageSparseSampleProjDrefImplicitLod
{1, 1, 3, 1}, // ImageSparseSampleProjDrefExplicitLod
{1, 1, 2, 1}, // ImageSparseFetch
{1, 1, 3, 1}, // ImageSparseGather
{1, 1, 3, 1}, // ImageSparseDrefGather
{1, 1, 1, 0}, // ImageSparseTexelsResident
{0, 0, 0, 0}, // NoLine
{1, 1, 0, 0}, // AtomicFlagTestAndSet
{0, 0, 0, 0}, // AtomicFlagClear
{1, 1, 0, 0}, // ImageSparseRead
{1, 1, 0, 0}, // SizeOf
{1, 1, 0, 0}, // TypePipeStorage
{1, 1, 0, 0}, // ConstantPipeStorage
{1, 1, 0, 0}, // CreatePipeFromPipeStorage
{1, 1, 0, 0}, // GetKernelLocalSizeForSubgroupCount
{1, 1, 0, 0}, // GetKernelMaxNumSubgroups
{1, 1, 0, 0}, // TypeNamedBarrier
{1, 1, 0, 1}, // NamedBarrierInitialize
{0, 0, 2, 1}, // MemoryNamedBarrier
{1, 1, 0, 0}, // ModuleProcessed
{0, 0, 0, 1}, // ExecutionModeId
{0, 0, 0, 1}, // DecorateId
{1, 1, 1, 1}, // GroupNonUniformElect
{1, 1, 1, 1}, // GroupNonUniformAll
{1, 1, 1, 1}, // GroupNonUniformAny
{1, 1, 1, 1}, // GroupNonUniformAllEqual
{1, 1, 1, 1}, // GroupNonUniformBroadcast
{1, 1, 1, 1}, // GroupNonUniformBroadcastFirst
{1, 1, 1, 1}, // GroupNonUniformBallot
{1, 1, 1, 1}, // GroupNonUniformInverseBallot
{1, 1, 1, 1}, // GroupNonUniformBallotBitExtract
{1, 1, 1, 1}, // GroupNonUniformBallotBitCount
{1, 1, 1, 1}, // GroupNonUniformBallotFindLSB
{1, 1, 1, 1}, // GroupNonUniformBallotFindMSB
{1, 1, 1, 1}, // GroupNonUniformShuffle
{1, 1, 1, 1}, // GroupNonUniformShuffleXor
{1, 1, 1, 1}, // GroupNonUniformShuffleUp
{1, 1, 1, 1}, // GroupNonUniformShuffleDown
{1, 1, 1, 1}, // GroupNonUniformIAdd
{1, 1, 1, 1}, // GroupNonUniformFAdd
{1, 1, 1, 1}, // GroupNonUniformIMul
{1, 1, 1, 1}, // GroupNonUniformFMul
{1, 1, 1, 1}, // GroupNonUniformSMin
{1, 1, 1, 1}, // GroupNonUniformUMin
{1, 1, 1, 1}, // GroupNonUniformFMin
{1, 1, 1, 1}, // GroupNonUniformSMax
{1, 1, 1, 1}, // GroupNonUniformUMax
{1, 1, 1, 1}, // GroupNonUniformFMax
{1, 1, 1, 1}, // GroupNonUniformBitwiseAnd
{1, 1, 1, 1}, // GroupNonUniformBitwiseOr
{1, 1, 1, 1}, // GroupNonUniformBitwiseXor
{1, 1, 1, 1}, // GroupNonUniformLogicalAnd
{1, 1, 1, 1}, // GroupNonUniformLogicalOr
{1, 1, 1, 1}, // GroupNonUniformLogicalXor
{1, 1, 1, 1}, // GroupNonUniformQuadBroadcast
{1, 1, 1, 1}, // GroupNonUniformQuadSwap
};
static_assert(_SMOLV_ARRAY_SIZE(kSpirvOpData) == kKnownOpsCount, "kSpirvOpData table mismatch with known SpvOps");
// Instruction encoding depends on the table that describes the various SPIR-V opcodes.
// Whenever we change or expand the table, we need to bump up the SMOL-V version, and make
// sure that we can still decode files encoded by an older version.
static int smolv_GetKnownOpsCount(int version)
{
if (version == 0)
return SpvOpModuleProcessed+1;
if (version == 1) // 2020 February, version 1 added ExecutionModeId..GroupNonUniformQuadSwap
return SpvOpGroupNonUniformQuadSwap+1;
return 0;
}
static bool smolv_OpHasResult(SpvOp op, int opsCount)
{
if (op < 0 || op >= opsCount)
return false;
return kSpirvOpData[op].hasResult != 0;
}
static bool smolv_OpHasType(SpvOp op, int opsCount)
{
if (op < 0 || op >= opsCount)
return false;
return kSpirvOpData[op].hasType != 0;
}
static int smolv_OpDeltaFromResult(SpvOp op, int opsCount)
{
if (op < 0 || op >= opsCount)
return 0;
return kSpirvOpData[op].deltaFromResult;
}
static bool smolv_OpVarRest(SpvOp op, int opsCount)
{
if (op < 0 || op >= opsCount)
return false;
return kSpirvOpData[op].varrest != 0;
}
static bool smolv_OpDebugInfo(SpvOp op, int opsCount)
{
return
op == SpvOpSourceContinued ||
op == SpvOpSource ||
op == SpvOpSourceExtension ||
op == SpvOpName ||
op == SpvOpMemberName ||
op == SpvOpString ||
op == SpvOpLine ||
op == SpvOpNoLine ||
op == SpvOpModuleProcessed;
}
static int smolv_DecorationExtraOps(int dec)
{
if (dec == 0 || (dec >= 2 && dec <= 5)) // RelaxedPrecision, Block..ColMajor
return 0;
if (dec >= 29 && dec <= 37) // Stream..XfbStride
return 1;
return -1; // unknown, encode length
}
// --------------------------------------------------------------------------------------------
static bool smolv_CheckGenericHeader(const uint32_t* words, size_t wordCount, uint32_t expectedMagic, uint32_t versionMask)
{
if (!words)
return false;
if (wordCount < 5)
return false;
uint32_t headerMagic = words[0];
if (headerMagic != expectedMagic)
return false;
uint32_t headerVersion = words[1] & versionMask;
if (headerVersion < 0x00010000 || headerVersion > 0x00010500)
return false; // only support 1.0 through 1.5
return true;
}
static const int kSpirVHeaderMagic = 0x07230203;
static const int kSmolHeaderMagic = 0x534D4F4C; // "SMOL"
static const int kSmolCurrEncodingVersion = 1;
static bool smolv_CheckSpirVHeader(const uint32_t* words, size_t wordCount)
{
//@TODO: if SPIR-V header magic was reversed, that means the file got written
// in a "big endian" order. Need to byteswap all words then.
return smolv_CheckGenericHeader(words, wordCount, kSpirVHeaderMagic, 0xFFFFFFFF);
}
static bool smolv_CheckSmolHeader(const uint8_t* bytes, size_t byteCount)
{
if (!smolv_CheckGenericHeader((const uint32_t*)bytes, byteCount/4, kSmolHeaderMagic, 0x00FFFFFF))
return false;
if (byteCount < 24) // one more word past header to store decoded length
return false;
// SMOL-V version
int smolVersion = ((const uint32_t*)bytes)[1] >> 24;
if (smolVersion < 0 || smolVersion > kSmolCurrEncodingVersion)
return false;
return true;
}
static void smolv_Write4(smolv::ByteArray& arr, uint32_t v)
{
arr.push_back(v & 0xFF);
arr.push_back((v >> 8) & 0xFF);
arr.push_back((v >> 16) & 0xFF);
arr.push_back(v >> 24);
}
static void smolv_Write4(uint8_t*& buf, uint32_t v)
{
memcpy(buf, &v, 4);
buf += 4;
}
static bool smolv_Read4(const uint8_t*& data, const uint8_t* dataEnd, uint32_t& outv)
{
if (data + 4 > dataEnd)
return false;
outv = (data[0]) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
data += 4;
return true;
}
// --------------------------------------------------------------------------------------------
// Variable-length integer encoding for unsigned integers. In each byte:
// - highest bit set if more bytes follow, cleared if this is last byte.
// - other 7 bits are the actual value payload.
// Takes 1-5 bytes to encode an integer (values between 0 and 127 take one byte, etc.).
static void smolv_WriteVarint(smolv::ByteArray& arr, uint32_t v)
{
while (v > 127)
{
arr.push_back((v & 127) | 128);
v >>= 7;
}
arr.push_back(v & 127);
}
static bool smolv_ReadVarint(const uint8_t*& data, const uint8_t* dataEnd, uint32_t& outVal)
{
uint32_t v = 0;
uint32_t shift = 0;
while (data < dataEnd)
{
uint8_t b = *data;
v |= (b & 127) << shift;
shift += 7;
data++;
if (!(b & 128))
break;
}
outVal = v;
return true; //@TODO: report failures
}
static uint32_t smolv_ZigEncode(int32_t i)
{
return (uint32_t(i) << 1) ^ (i >> 31);
}
static int32_t smolv_ZigDecode(uint32_t u)
{
return (u & 1) ? ((u >> 1) ^ ~0) : (u >> 1);
}
// Remap most common Op codes (Load, Store, Decorate, VectorShuffle etc.) to be in < 16 range, for
// more compact varint encoding. This basically swaps rarely used op values that are < 16 with the
// ones that are common.
static SpvOp smolv_RemapOp(SpvOp op)
{
# define _SMOLV_SWAP_OP(op1,op2) if (op==op1) return op2; if (op==op2) return op1
_SMOLV_SWAP_OP(SpvOpDecorate,SpvOpNop); // 0: 24%
_SMOLV_SWAP_OP(SpvOpLoad,SpvOpUndef); // 1: 17%
_SMOLV_SWAP_OP(SpvOpStore,SpvOpSourceContinued); // 2: 9%
_SMOLV_SWAP_OP(SpvOpAccessChain,SpvOpSource); // 3: 7.2%
_SMOLV_SWAP_OP(SpvOpVectorShuffle,SpvOpSourceExtension); // 4: 5.0%
// Name - already small enum value - 5: 4.4%
// MemberName - already small enum value - 6: 2.9%
_SMOLV_SWAP_OP(SpvOpMemberDecorate,SpvOpString); // 7: 4.0%
_SMOLV_SWAP_OP(SpvOpLabel,SpvOpLine); // 8: 0.9%
_SMOLV_SWAP_OP(SpvOpVariable,(SpvOp)9); // 9: 3.9%
_SMOLV_SWAP_OP(SpvOpFMul,SpvOpExtension); // 10: 3.9%
_SMOLV_SWAP_OP(SpvOpFAdd,SpvOpExtInstImport); // 11: 2.5%
// ExtInst - already small enum value - 12: 1.2%
// VectorShuffleCompact - already small enum value - used for compact shuffle encoding
_SMOLV_SWAP_OP(SpvOpTypePointer,SpvOpMemoryModel); // 14: 2.2%
_SMOLV_SWAP_OP(SpvOpFNegate,SpvOpEntryPoint); // 15: 1.1%
# undef _SMOLV_SWAP_OP
return op;
}
// For most compact varint encoding of common instructions, the instruction length should come out
// into 3 bits (be <8). SPIR-V instruction lengths are always at least 1, and for some other
// instructions they are guaranteed to be some other minimum length. Adjust the length before encoding,
// and after decoding accordingly.
static uint32_t smolv_EncodeLen(SpvOp op, uint32_t len)
{
len--;
if (op == SpvOpVectorShuffle) len -= 4;
if (op == SpvOpVectorShuffleCompact) len -= 4;
if (op == SpvOpDecorate) len -= 2;
if (op == SpvOpLoad) len -= 3;
if (op == SpvOpAccessChain) len -= 3;
return len;
}
static uint32_t smolv_DecodeLen(SpvOp op, uint32_t len)
{
len++;
if (op == SpvOpVectorShuffle) len += 4;
if (op == SpvOpVectorShuffleCompact) len += 4;
if (op == SpvOpDecorate) len += 2;
if (op == SpvOpLoad) len += 3;
if (op == SpvOpAccessChain) len += 3;
return len;
}
// Shuffling bits of length + opcode to be more compact in varint encoding in typical cases:
// 0x LLLL OOOO is how SPIR-V encodes it (L=length, O=op), we shuffle into:
// 0x LLLO OOLO, so that common case (op<16, len<8) is encoded into one byte.
static bool smolv_WriteLengthOp(smolv::ByteArray& arr, uint32_t len, SpvOp op)
{
len = smolv_EncodeLen(op, len);
// SPIR-V length field is 16 bits; if we get a larger value that means something
// was wrong, e.g. a vector shuffle instruction with less than 4 words (and our
// adjustment to common lengths in smolv_EncodeLen wrapped around)
if (len > 0xFFFF)
return false;
op = smolv_RemapOp(op);
uint32_t oplen = ((len >> 4) << 20) | ((op >> 4) << 8) | ((len & 0xF) << 4) | (op & 0xF);
smolv_WriteVarint(arr, oplen);
return true;
}
static bool smolv_ReadLengthOp(const uint8_t*& data, const uint8_t* dataEnd, uint32_t& outLen, SpvOp& outOp)
{
uint32_t val;
if (!smolv_ReadVarint(data, dataEnd, val))
return false;
outLen = ((val >> 20) << 4) | ((val >> 4) & 0xF);
outOp = (SpvOp)(((val >> 4) & 0xFFF0) | (val & 0xF));
outOp = smolv_RemapOp(outOp);
outLen = smolv_DecodeLen(outOp, outLen);
return true;
}
#define _SMOLV_READ_OP(len, words, op) \
uint32_t len = words[0] >> 16; \
if (len < 1) return false; /* malformed instruction, length needs to be at least 1 */ \
if (words + len > wordsEnd) return false; /* malformed instruction, goes past end of data */ \
SpvOp op = (SpvOp)(words[0] & 0xFFFF)
bool smolv::Encode(const void* spirvData, size_t spirvSize, ByteArray& outSmolv, uint32_t flags, StripOpNameFilterFunc stripFilter)
{
const size_t wordCount = spirvSize / 4;
if (wordCount * 4 != spirvSize)
return false;
const uint32_t* words = (const uint32_t*)spirvData;
const uint32_t* wordsEnd = words + wordCount;
if (!smolv_CheckSpirVHeader(words, wordCount))
return false;
// reserve space in output (typical compression is to about 30%; reserve half of input space)
outSmolv.reserve(outSmolv.size() + spirvSize/2);
// header (matches SPIR-V one, except different magic)
smolv_Write4(outSmolv, kSmolHeaderMagic);
smolv_Write4(outSmolv, (words[1] & 0x00FFFFFF) + (kSmolCurrEncodingVersion<<24)); // SPIR-V version (_XXX) + SMOL-V version (X___)
smolv_Write4(outSmolv, words[2]); // generator
smolv_Write4(outSmolv, words[3]); // bound
smolv_Write4(outSmolv, words[4]); // schema
const size_t headerSpirvSizeOffset = outSmolv.size(); // size field may get updated later if stripping is enabled
smolv_Write4(outSmolv, (uint32_t)spirvSize); // space needed to decode (i.e. original SPIR-V size)
size_t strippedSpirvWordCount = wordCount;
uint32_t prevResult = 0;
uint32_t prevDecorate = 0;
const int knownOpsCount = smolv_GetKnownOpsCount(kSmolCurrEncodingVersion);
words += 5;
while (words < wordsEnd)
{
_SMOLV_READ_OP(instrLen, words, op);
if ((flags & kEncodeFlagStripDebugInfo) && smolv_OpDebugInfo(op, knownOpsCount))
{
if (!stripFilter || op != SpvOpName || !stripFilter(reinterpret_cast<const char*>(&words[2])))
{
strippedSpirvWordCount -= instrLen;
words += instrLen;
continue;
}
}
// A usual case of vector shuffle, with less than 4 components, each with a value
// in [0..3] range: encode it in a more compact form, with the swizzle pattern in one byte.
// Turn this into a VectorShuffleCompact instruction, that takes up unused slot in Ops.
uint32_t swizzle = 0;
if (op == SpvOpVectorShuffle && instrLen <= 9)
{
uint32_t swz0 = instrLen > 5 ? words[5] : 0;
uint32_t swz1 = instrLen > 6 ? words[6] : 0;
uint32_t swz2 = instrLen > 7 ? words[7] : 0;
uint32_t swz3 = instrLen > 8 ? words[8] : 0;
if (swz0 < 4 && swz1 < 4 && swz2 < 4 && swz3 < 4)
{
op = SpvOpVectorShuffleCompact;
swizzle = (swz0 << 6) | (swz1 << 4) | (swz2 << 2) | (swz3);
}
}
// length + opcode
if (!smolv_WriteLengthOp(outSmolv, instrLen, op))
return false;
size_t ioffs = 1;
// write type as varint, if we have it
if (smolv_OpHasType(op, knownOpsCount))
{
if (ioffs >= instrLen)
return false;
smolv_WriteVarint(outSmolv, words[ioffs]);
ioffs++;
}
// write result as delta+zig+varint, if we have it
if (smolv_OpHasResult(op, knownOpsCount))
{
if (ioffs >= instrLen)
return false;
uint32_t v = words[ioffs];
smolv_WriteVarint(outSmolv, smolv_ZigEncode(v - prevResult)); // some deltas are negative, use zig
prevResult = v;
ioffs++;
}
// Decorate & MemberDecorate: IDs relative to previous decorate
if (op == SpvOpDecorate || op == SpvOpMemberDecorate)
{
if (ioffs >= instrLen)
return false;
uint32_t v = words[ioffs];
smolv_WriteVarint(outSmolv, smolv_ZigEncode(v - prevDecorate)); // spirv-remapped deltas often negative, use zig
prevDecorate = v;
ioffs++;
}
// MemberDecorate special encoding: whole row of MemberDecorate instructions is often referring
// to the same type and linearly increasing member indices. Scan ahead to see how many we have,
// and encode whole bunch as one.
if (op == SpvOpMemberDecorate)
{
// scan ahead until we reach end, non-member-decoration or different type
const uint32_t decorationType = words[ioffs-1];
const uint32_t* memberWords = words;
uint32_t prevIndex = 0;
uint32_t prevOffset = 0;
// write a byte on how many we have encoded as a bunch
size_t countLocation = outSmolv.size();
outSmolv.push_back(0);
int count = 0;
while (memberWords < wordsEnd && count < 255)
{
_SMOLV_READ_OP(memberLen, memberWords, memberOp);
if (memberOp != SpvOpMemberDecorate)
break;
if (memberLen < 4)
return false; // invalid input
if (memberWords[1] != decorationType)
break;
// write member index as delta from previous
uint32_t memberIndex = memberWords[2];
smolv_WriteVarint(outSmolv, memberIndex - prevIndex);
prevIndex = memberIndex;
// decoration (and length if not common/known)
uint32_t memberDec = memberWords[3];
smolv_WriteVarint(outSmolv, memberDec);
const int knownExtraOps = smolv_DecorationExtraOps(memberDec);
if (knownExtraOps == -1)
smolv_WriteVarint(outSmolv, memberLen-4);
else if (unsigned(knownExtraOps) + 4 != memberLen)
return false; // invalid input
// Offset decorations are most often linearly increasing, so encode as deltas
if (memberDec == 35) // Offset
{
if (memberLen != 5)
return false;
smolv_WriteVarint(outSmolv, memberWords[4]-prevOffset);
prevOffset = memberWords[4];
}
else
{
// write rest of decorations as varint
for (uint32_t i = 4; i < memberLen; ++i)
smolv_WriteVarint(outSmolv, memberWords[i]);
}
memberWords += memberLen;
++count;
}
outSmolv[countLocation] = uint8_t(count);
words = memberWords;
continue;
}
// Write out this many IDs, encoding them relative+zigzag to result ID
int relativeCount = smolv_OpDeltaFromResult(op, knownOpsCount);
for (int i = 0; i < relativeCount && ioffs < instrLen; ++i, ++ioffs)
{
if (ioffs >= instrLen)
return false;
uint32_t delta = prevResult - words[ioffs];
// some deltas are negative (often on branches, or if program was processed by spirv-remap),
// so use zig encoding
smolv_WriteVarint(outSmolv, smolv_ZigEncode(delta));
}
if (op == SpvOpVectorShuffleCompact)
{
// compact vector shuffle, just write out single swizzle byte
outSmolv.push_back(uint8_t(swizzle));
ioffs = instrLen;
}
else if (smolv_OpVarRest(op, knownOpsCount))
{
// write out rest of words with variable encoding (expected to be small integers)
for (; ioffs < instrLen; ++ioffs)
smolv_WriteVarint(outSmolv, words[ioffs]);
}
else
{
// write out rest of words without any encoding
for (; ioffs < instrLen; ++ioffs)
smolv_Write4(outSmolv, words[ioffs]);
}
words += instrLen;
}
if (strippedSpirvWordCount != wordCount)
{
uint8_t* headerSpirvSize = &outSmolv[headerSpirvSizeOffset];
smolv_Write4(headerSpirvSize, (uint32_t)strippedSpirvWordCount * 4);
}
return true;
}
size_t smolv::GetDecodedBufferSize(const void* smolvData, size_t smolvSize)
{
if (!smolv_CheckSmolHeader((const uint8_t*)smolvData, smolvSize))
return 0;
const uint32_t* words = (const uint32_t*)smolvData;
return words[5];
}
bool smolv::Decode(const void* smolvData, size_t smolvSize, void* spirvOutputBuffer, size_t spirvOutputBufferSize, uint32_t flags)
{
// check header, and whether we have enough output buffer space
const size_t neededBufferSize = GetDecodedBufferSize(smolvData, smolvSize);
if (neededBufferSize == 0)
return false; // invalid SMOL-V
if (spirvOutputBufferSize < neededBufferSize)
return false; // not enough space in output buffer
if (spirvOutputBuffer == NULL)
return false; // output buffer is null
const uint8_t* bytes = (const uint8_t*)smolvData;
const uint8_t* bytesEnd = bytes + smolvSize;
uint8_t* outSpirv = (uint8_t*)spirvOutputBuffer;
uint32_t val;
int smolVersion = 0;
// header
smolv_Write4(outSpirv, kSpirVHeaderMagic); bytes += 4;
smolv_Read4(bytes, bytesEnd, val); smolVersion = val >> 24; val &= 0x00FFFFFF; smolv_Write4(outSpirv, val); // version
smolv_Read4(bytes, bytesEnd, val); smolv_Write4(outSpirv, val); // generator
smolv_Read4(bytes, bytesEnd, val); smolv_Write4(outSpirv, val); // bound
smolv_Read4(bytes, bytesEnd, val); smolv_Write4(outSpirv, val); // schema
bytes += 4; // decode buffer size
// there are two SMOL-V encoding versions, both not indicating anything in their header version field:
// one that is called "before zero" here (2016-08-31 code). Support decoding that one only by presence
// of this special flag.
const bool beforeZeroVersion = smolVersion == 0 && (flags & kDecodeFlagUse20160831AsZeroVersion) != 0;
const int knownOpsCount = smolv_GetKnownOpsCount(smolVersion);
uint32_t prevResult = 0;
uint32_t prevDecorate = 0;
while (bytes < bytesEnd)
{
// read length + opcode
uint32_t instrLen;
SpvOp op;
if (!smolv_ReadLengthOp(bytes, bytesEnd, instrLen, op))
return false;
const bool wasSwizzle = (op == SpvOpVectorShuffleCompact);
if (wasSwizzle)
op = SpvOpVectorShuffle;
smolv_Write4(outSpirv, (instrLen << 16) | op);
size_t ioffs = 1;
// read type as varint, if we have it
if (smolv_OpHasType(op, knownOpsCount))
{
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
smolv_Write4(outSpirv, val);
ioffs++;
}
// read result as delta+varint, if we have it
if (smolv_OpHasResult(op, knownOpsCount))
{
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
val = prevResult + smolv_ZigDecode(val);
smolv_Write4(outSpirv, val);
prevResult = val;
ioffs++;
}
// Decorate: IDs relative to previous decorate
if (op == SpvOpDecorate || op == SpvOpMemberDecorate)
{
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
// "before zero" version did not use zig encoding for the value
val = prevDecorate + (beforeZeroVersion ? val : smolv_ZigDecode(val));
smolv_Write4(outSpirv, val);
prevDecorate = val;
ioffs++;
}
// MemberDecorate special decoding
if (op == SpvOpMemberDecorate && !beforeZeroVersion)
{
if (bytes >= bytesEnd)
return false; // broken input
int count = *bytes++;
int prevIndex = 0;
int prevOffset = 0;
for (int m = 0; m < count; ++m)
{
// read member index
uint32_t memberIndex;
if (!smolv_ReadVarint(bytes, bytesEnd, memberIndex)) return false;
memberIndex += prevIndex;
prevIndex = memberIndex;
// decoration (and length if not common/known)
uint32_t memberDec;
if (!smolv_ReadVarint(bytes, bytesEnd, memberDec)) return false;
const int knownExtraOps = smolv_DecorationExtraOps(memberDec);
uint32_t memberLen;
if (knownExtraOps == -1)
{
if (!smolv_ReadVarint(bytes, bytesEnd, memberLen)) return false;
memberLen += 4;
}
else
memberLen = 4 + knownExtraOps;
// write SPIR-V op+length (unless it's first member decoration, in which case it was written before)
if (m != 0)
{
smolv_Write4(outSpirv, (memberLen << 16) | op);
smolv_Write4(outSpirv, prevDecorate);
}
smolv_Write4(outSpirv, memberIndex);
smolv_Write4(outSpirv, memberDec);
// Special case for Offset decorations
if (memberDec == 35) // Offset
{
if (memberLen != 5)
return false;
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
val += prevOffset;
smolv_Write4(outSpirv, val);
prevOffset = val;
}
else
{
for (uint32_t i = 4; i < memberLen; ++i)
{
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
smolv_Write4(outSpirv, val);
}
}
}
continue;
}
// Read this many IDs, that are relative to result ID
int relativeCount = smolv_OpDeltaFromResult(op, knownOpsCount);
// "before zero" version only used zig encoding for IDs of several ops; after
// that ops got zig encoding for their IDs
bool zigDecodeVals = true;
if (beforeZeroVersion)
{
if (op != SpvOpControlBarrier && op != SpvOpMemoryBarrier && op != SpvOpLoopMerge && op != SpvOpSelectionMerge && op != SpvOpBranch && op != SpvOpBranchConditional && op != SpvOpMemoryNamedBarrier)
zigDecodeVals = false;
}
for (int i = 0; i < relativeCount && ioffs < instrLen; ++i, ++ioffs)
{
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
if (zigDecodeVals)
val = smolv_ZigDecode(val);
smolv_Write4(outSpirv, prevResult - val);
}
if (wasSwizzle && instrLen <= 9)
{
uint32_t swizzle = *bytes++;
if (instrLen > 5) smolv_Write4(outSpirv, (swizzle >> 6) & 3);
if (instrLen > 6) smolv_Write4(outSpirv, (swizzle >> 4) & 3);
if (instrLen > 7) smolv_Write4(outSpirv, (swizzle >> 2) & 3);
if (instrLen > 8) smolv_Write4(outSpirv, swizzle & 3);
}
else if (smolv_OpVarRest(op, knownOpsCount))
{
// read rest of words with variable encoding
for (; ioffs < instrLen; ++ioffs)
{
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
smolv_Write4(outSpirv, val);
}
}
else
{
// read rest of words without any encoding
for (; ioffs < instrLen; ++ioffs)
{
if (!smolv_Read4(bytes, bytesEnd, val)) return false;
smolv_Write4(outSpirv, val);
}
}
}
if ((uint8_t*)spirvOutputBuffer + neededBufferSize != outSpirv)
return false; // something went wrong during decoding? we should have decoded to exact output size
return true;
}
// --------------------------------------------------------------------------------------------
// Calculating instruction count / space stats on SPIR-V and SMOL-V
struct smolv::Stats
{
Stats() { memset(this, 0, sizeof(*this)); }
size_t opCounts[kKnownOpsCount];
size_t opSizes[kKnownOpsCount];
size_t smolOpSizes[kKnownOpsCount];
size_t varintCountsOp[6];
size_t varintCountsType[6];
size_t varintCountsRes[6];
size_t varintCountsOther[6];
size_t totalOps;
size_t totalSize;
size_t totalSizeSmol;
size_t inputCount;
};
smolv::Stats* smolv::StatsCreate()
{
return new Stats();
}
void smolv::StatsDelete(smolv::Stats *s)
{
delete s;
}
bool smolv::StatsCalculate(smolv::Stats* stats, const void* spirvData, size_t spirvSize)
{
if (!stats)
return false;
const size_t wordCount = spirvSize / 4;
if (wordCount * 4 != spirvSize)
return false;
const uint32_t* words = (const uint32_t*)spirvData;
const uint32_t* wordsEnd = words + wordCount;
if (!smolv_CheckSpirVHeader(words, wordCount))
return false;
words += 5;
stats->inputCount++;
stats->totalSize += wordCount;
while (words < wordsEnd)
{
_SMOLV_READ_OP(instrLen, words, op);
if (op < kKnownOpsCount)
{
stats->opCounts[op]++;
stats->opSizes[op] += instrLen;
}
words += instrLen;
stats->totalOps++;
}
return true;
}
bool smolv::StatsCalculateSmol(smolv::Stats* stats, const void* smolvData, size_t smolvSize)
{
if (!stats)
return false;
// debugging helper to dump all encoded bytes to stdout, keep at "if 0"
# if 0
# define _SMOLV_DEBUG_PRINT_ENCODED_BYTES() { \
printf("Op %-22s ", op < kKnownOpsCount ? kSpirvOpNames[op] : "???"); \
for (const uint8_t* b = instrBegin; b < bytes; ++b) \
printf("%02x ", *b); \
printf("\n"); \
}
# else
# define _SMOLV_DEBUG_PRINT_ENCODED_BYTES() {}
# endif
const uint8_t* bytes = (const uint8_t*)smolvData;
const uint8_t* bytesEnd = bytes + smolvSize;
if (!smolv_CheckSmolHeader(bytes, smolvSize))
return false;
uint32_t val;
int smolVersion;
bytes += 4;
smolv_Read4(bytes, bytesEnd, val); smolVersion = val >> 24;
const int knownOpsCount = smolv_GetKnownOpsCount(smolVersion);
bytes += 16;
stats->totalSizeSmol += smolvSize;
while (bytes < bytesEnd)
{
const uint8_t* instrBegin = bytes;
const uint8_t* varBegin;
// read length + opcode
uint32_t instrLen;
SpvOp op;
varBegin = bytes;
if (!smolv_ReadLengthOp(bytes, bytesEnd, instrLen, op))
return false;
const bool wasSwizzle = (op == SpvOpVectorShuffleCompact);
if (wasSwizzle)
op = SpvOpVectorShuffle;
stats->varintCountsOp[bytes-varBegin]++;
size_t ioffs = 1;
if (smolv_OpHasType(op, knownOpsCount))
{
varBegin = bytes;
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
stats->varintCountsType[bytes-varBegin]++;
ioffs++;
}
if (smolv_OpHasResult(op, knownOpsCount))
{
varBegin = bytes;
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
stats->varintCountsRes[bytes-varBegin]++;
ioffs++;
}
if (op == SpvOpDecorate || op == SpvOpMemberDecorate)
{
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
ioffs++;
}
// MemberDecorate special decoding
if (op == SpvOpMemberDecorate)
{
if (bytes >= bytesEnd)
return false; // broken input
int count = *bytes++;
for (int m = 0; m < count; ++m)
{
uint32_t memberIndex;
if (!smolv_ReadVarint(bytes, bytesEnd, memberIndex)) return false;
uint32_t memberDec;
if (!smolv_ReadVarint(bytes, bytesEnd, memberDec)) return false;
const int knownExtraOps = smolv_DecorationExtraOps(memberDec);
uint32_t memberLen;
if (knownExtraOps == -1)
{
if (!smolv_ReadVarint(bytes, bytesEnd, memberLen)) return false;
memberLen += 4;
}
else
memberLen = 4 + knownExtraOps;
for (uint32_t i = 4; i < memberLen; ++i)
{
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
}
}
stats->smolOpSizes[op] += bytes - instrBegin;
_SMOLV_DEBUG_PRINT_ENCODED_BYTES();
continue;
}
int relativeCount = smolv_OpDeltaFromResult(op, knownOpsCount);
for (int i = 0; i < relativeCount && ioffs < instrLen; ++i, ++ioffs)
{
varBegin = bytes;
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
stats->varintCountsRes[bytes-varBegin]++;
}
if (wasSwizzle && instrLen <= 9)
{
bytes++;
}
else if (smolv_OpVarRest(op, knownOpsCount))
{
for (; ioffs < instrLen; ++ioffs)
{
varBegin = bytes;
if (!smolv_ReadVarint(bytes, bytesEnd, val)) return false;
stats->varintCountsOther[bytes-varBegin]++;
}
}
else
{
for (; ioffs < instrLen; ++ioffs)
{
if (!smolv_Read4(bytes, bytesEnd, val)) return false;
}
}
if (op < kKnownOpsCount)
{
stats->smolOpSizes[op] += bytes - instrBegin;
}
_SMOLV_DEBUG_PRINT_ENCODED_BYTES();
}
return true;
}
static bool CompareOpCounters (std::pair<SpvOp,size_t> a, std::pair<SpvOp,size_t> b)
{
return a.second > b.second;
}
void smolv::StatsPrint(const Stats* stats)
{
if (!stats)
return;
typedef std::pair<SpvOp,size_t> OpCounter;
OpCounter counts[kKnownOpsCount];
OpCounter sizes[kKnownOpsCount];
OpCounter sizesSmol[kKnownOpsCount];
for (int i = 0; i < kKnownOpsCount; ++i)
{
counts[i].first = (SpvOp)i;
counts[i].second = stats->opCounts[i];
sizes[i].first = (SpvOp)i;
sizes[i].second = stats->opSizes[i];
sizesSmol[i].first = (SpvOp)i;
sizesSmol[i].second = stats->smolOpSizes[i];
}
std::sort(counts, counts + kKnownOpsCount, CompareOpCounters);
std::sort(sizes, sizes + kKnownOpsCount, CompareOpCounters);
std::sort(sizesSmol, sizesSmol + kKnownOpsCount, CompareOpCounters);
printf("Stats for %i SPIR-V inputs, total size %i words (%.1fKB):\n", (int)stats->inputCount, (int)stats->totalSize, stats->totalSize * 4.0f / 1024.0f);
printf("Most occuring ops:\n");
for (int i = 0; i < 30; ++i)
{
SpvOp op = counts[i].first;
printf(" #%2i: %4i %-20s %4i (%4.1f%%)\n", i, op, kSpirvOpNames[op], (int)counts[i].second, (float)counts[i].second / (float)stats->totalOps * 100.0f);
}
printf("Largest total size of ops:\n");
for (int i = 0; i < 30; ++i)
{
SpvOp op = sizes[i].first;
printf(" #%2i: %-22s %6i (%4.1f%%) avg len %.1f\n",
i,
kSpirvOpNames[op],
(int)sizes[i].second*4,
(float)sizes[i].second / (float)stats->totalSize * 100.0f,
(float)sizes[i].second*4 / (float)stats->opCounts[op]
);
}
printf("SMOL varint encoding counts per byte length:\n");
printf(" B: %6s %6s %6s %6s\n", "Op", "Type", "Result", "Other");
for (int i = 1; i < 6; ++i)
{
printf(" %i: %6i %6i %6i %6i\n", i, (int)stats->varintCountsOp[i], (int)stats->varintCountsType[i], (int)stats->varintCountsRes[i], (int)stats->varintCountsOther[i]);
}
printf("Largest total size of ops in SMOL:\n");
for (int i = 0; i < 30; ++i)
{
SpvOp op = sizesSmol[i].first;
printf(" #%2i: %-22s %6i (%4.1f%%) avg len %.1f\n",
i,
kSpirvOpNames[op],
(int)sizesSmol[i].second,
(float)sizesSmol[i].second / (float)stats->totalSizeSmol * 100.0f,
(float)sizesSmol[i].second / (float)stats->opCounts[op]
);
}
}
// ------------------------------------------------------------------------------
// This software is available under 2 licenses -- choose whichever you prefer.
// ------------------------------------------------------------------------------
// ALTERNATIVE A - MIT License
// Copyright (c) 2016-2020 Aras Pranckevicius
// Permission is hereby granted, free of charge, to any person obtaining a copy of
// this software and associated documentation files (the "Software"), to deal in
// the Software without restriction, including without limitation the rights to
// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
// of the Software, and to permit persons to whom the Software is furnished to do
// so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// ------------------------------------------------------------------------------
// ALTERNATIVE B - Public Domain (www.unlicense.org)
// This is free and unencumbered software released into the public domain.
// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
// software, either in source code form or as a compiled binary, for any purpose,
// commercial or non-commercial, and by any means.
// In jurisdictions that recognize copyright laws, the author or authors of this
// software dedicate any and all copyright interest in the software to the public
// domain. We make this dedication for the benefit of the public at large and to
// the detriment of our heirs and successors. We intend this dedication to be an
// overt act of relinquishment in perpetuity of all present and future rights to
// this software under copyright law.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// ------------------------------------------------------------------------------