1 #include <cinttypes> 2 #include <cstdint> 3 #include <cstdio> 4 5 union alignas(64) zmm_t { 6 uint64_t as_uint64[8]; 7 uint8_t as_uint8[64]; 8 }; 9 10 int main() { 11 constexpr zmm_t zmm_fill = { 12 .as_uint64 = { 0, 0, 0, 0, 0, 0, 0, 0 } 13 }; 14 15 zmm_t zmm[32]; 16 17 asm volatile( 18 "vmovaps %1, %%zmm0\n\t" 19 "vmovaps %1, %%zmm1\n\t" 20 "vmovaps %1, %%zmm2\n\t" 21 "vmovaps %1, %%zmm3\n\t" 22 "vmovaps %1, %%zmm4\n\t" 23 "vmovaps %1, %%zmm5\n\t" 24 "vmovaps %1, %%zmm6\n\t" 25 "vmovaps %1, %%zmm7\n\t" 26 #if defined(__x86_64__) || defined(_M_X64) 27 "vmovaps %1, %%zmm8\n\t" 28 "vmovaps %1, %%zmm9\n\t" 29 "vmovaps %1, %%zmm10\n\t" 30 "vmovaps %1, %%zmm11\n\t" 31 "vmovaps %1, %%zmm12\n\t" 32 "vmovaps %1, %%zmm13\n\t" 33 "vmovaps %1, %%zmm14\n\t" 34 "vmovaps %1, %%zmm15\n\t" 35 "vmovaps %1, %%zmm16\n\t" 36 "vmovaps %1, %%zmm17\n\t" 37 "vmovaps %1, %%zmm18\n\t" 38 "vmovaps %1, %%zmm19\n\t" 39 "vmovaps %1, %%zmm20\n\t" 40 "vmovaps %1, %%zmm21\n\t" 41 "vmovaps %1, %%zmm22\n\t" 42 "vmovaps %1, %%zmm23\n\t" 43 "vmovaps %1, %%zmm24\n\t" 44 "vmovaps %1, %%zmm25\n\t" 45 "vmovaps %1, %%zmm26\n\t" 46 "vmovaps %1, %%zmm27\n\t" 47 "vmovaps %1, %%zmm28\n\t" 48 "vmovaps %1, %%zmm29\n\t" 49 "vmovaps %1, %%zmm30\n\t" 50 "vmovaps %1, %%zmm31\n\t" 51 #endif 52 "\n\t" 53 "int3\n\t" 54 "\n\t" 55 "vmovaps %%zmm0, 0x000(%0)\n\t" 56 "vmovaps %%zmm1, 0x040(%0)\n\t" 57 "vmovaps %%zmm2, 0x080(%0)\n\t" 58 "vmovaps %%zmm3, 0x0C0(%0)\n\t" 59 "vmovaps %%zmm4, 0x100(%0)\n\t" 60 "vmovaps %%zmm5, 0x140(%0)\n\t" 61 "vmovaps %%zmm6, 0x180(%0)\n\t" 62 "vmovaps %%zmm7, 0x1C0(%0)\n\t" 63 #if defined(__x86_64__) || defined(_M_X64) 64 "vmovaps %%zmm8, 0x200(%0)\n\t" 65 "vmovaps %%zmm9, 0x240(%0)\n\t" 66 "vmovaps %%zmm10, 0x280(%0)\n\t" 67 "vmovaps %%zmm11, 0x2C0(%0)\n\t" 68 "vmovaps %%zmm12, 0x300(%0)\n\t" 69 "vmovaps %%zmm13, 0x340(%0)\n\t" 70 "vmovaps %%zmm14, 0x380(%0)\n\t" 71 "vmovaps %%zmm15, 0x3C0(%0)\n\t" 72 "vmovaps %%zmm16, 0x400(%0)\n\t" 73 "vmovaps %%zmm17, 0x440(%0)\n\t" 74 "vmovaps %%zmm18, 0x480(%0)\n\t" 75 "vmovaps %%zmm19, 0x4C0(%0)\n\t" 76 "vmovaps %%zmm20, 0x500(%0)\n\t" 77 "vmovaps %%zmm21, 0x540(%0)\n\t" 78 "vmovaps %%zmm22, 0x580(%0)\n\t" 79 "vmovaps %%zmm23, 0x5C0(%0)\n\t" 80 "vmovaps %%zmm24, 0x600(%0)\n\t" 81 "vmovaps %%zmm25, 0x640(%0)\n\t" 82 "vmovaps %%zmm26, 0x680(%0)\n\t" 83 "vmovaps %%zmm27, 0x6C0(%0)\n\t" 84 "vmovaps %%zmm28, 0x700(%0)\n\t" 85 "vmovaps %%zmm29, 0x740(%0)\n\t" 86 "vmovaps %%zmm30, 0x780(%0)\n\t" 87 "vmovaps %%zmm31, 0x7C0(%0)\n\t" 88 #endif 89 : 90 : "b"(zmm), "m"(zmm_fill) 91 : "%zmm0", "%zmm1", "%zmm2", "%zmm3", "%zmm4", "%zmm5", "%zmm6", "%zmm7" 92 #if defined(__x86_64__) || defined(_M_X64) 93 , "%zmm8", "%zmm9", "%zmm10", "%zmm11", "%zmm12", "%zmm13", "%zmm14", 94 "%zmm15", "%zmm16", "%zmm17", "%zmm18", "%zmm19", "%zmm20", "%zmm21", 95 "%zmm22", "%zmm23", "%zmm24", "%zmm25", "%zmm26", "%zmm27", "%zmm28", 96 "%zmm29", "%zmm30", "%zmm31" 97 #endif 98 ); 99 100 for (int i = 0; i < 32; ++i) { 101 printf("zmm%d = { ", i); 102 for (int j = 0; j < sizeof(zmm->as_uint8); ++j) 103 printf("0x%02x ", zmm[i].as_uint8[j]); 104 printf("}\n"); 105 } 106 107 return 0; 108 } 109