1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; half args should be promoted to float 5 6; GCN-LABEL: {{^}}load_f16_arg: 7; GCN: s_load_dword [[ARG:s[0-9]+]] 8; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]] 9; GCN: buffer_store_short [[CVT]] 10define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 { 11 store half %arg, half addrspace(1)* %out 12 ret void 13} 14 15; GCN-LABEL: {{^}}load_v2f16_arg: 16; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 17; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46 18; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 19; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} 20; GCN: s_endpgm 21define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 { 22 store <2 x half> %arg, <2 x half> addrspace(1)* %out 23 ret void 24} 25 26; GCN-LABEL: {{^}}load_v3f16_arg: 27; GCN: buffer_load_ushort 28; GCN: buffer_load_ushort 29; GCN: buffer_load_ushort 30; GCN-NOT: buffer_load 31; GCN-DAG: buffer_store_dword 32; GCN-DAG: buffer_store_short 33; GCN-NOT: buffer_store 34; GCN: s_endpgm 35define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 { 36 store <3 x half> %arg, <3 x half> addrspace(1)* %out 37 ret void 38} 39 40; GCN-LABEL: {{^}}load_v4f16_arg: 41; GCN: buffer_load_ushort 42; GCN: buffer_load_ushort 43; GCN: buffer_load_ushort 44; GCN: buffer_load_ushort 45; GCN: buffer_store_short 46; GCN: buffer_store_short 47; GCN: buffer_store_short 48; GCN: buffer_store_short 49; GCN: s_endpgm 50define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 { 51 store <4 x half> %arg, <4 x half> addrspace(1)* %out 52 ret void 53} 54 55; GCN-LABEL: {{^}}load_v8f16_arg: 56define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 { 57 store <8 x half> %arg, <8 x half> addrspace(1)* %out 58 ret void 59} 60 61; GCN-LABEL: {{^}}extload_v2f16_arg: 62define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 { 63 %fpext = fpext <2 x half> %in to <2 x float> 64 store <2 x float> %fpext, <2 x float> addrspace(1)* %out 65 ret void 66} 67 68; GCN-LABEL: {{^}}extload_f16_to_f32_arg: 69define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 { 70 %ext = fpext half %arg to float 71 store float %ext, float addrspace(1)* %out 72 ret void 73} 74 75; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg: 76define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 { 77 %ext = fpext <2 x half> %arg to <2 x float> 78 store <2 x float> %ext, <2 x float> addrspace(1)* %out 79 ret void 80} 81 82; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg: 83; GCN: buffer_load_ushort 84; GCN: buffer_load_ushort 85; GCN: buffer_load_ushort 86; GCN-NOT: buffer_load 87; GCN: v_cvt_f32_f16_e32 88; GCN: v_cvt_f32_f16_e32 89; GCN: v_cvt_f32_f16_e32 90; GCN-NOT: v_cvt_f32_f16 91; GCN-DAG: buffer_store_dword 92; GCN-DAG: buffer_store_dwordx2 93; GCN: s_endpgm 94define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 { 95 %ext = fpext <3 x half> %arg to <3 x float> 96 store <3 x float> %ext, <3 x float> addrspace(1)* %out 97 ret void 98} 99 100; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg: 101define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 { 102 %ext = fpext <4 x half> %arg to <4 x float> 103 store <4 x float> %ext, <4 x float> addrspace(1)* %out 104 ret void 105} 106 107; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg: 108; GCN: buffer_load_ushort 109; GCN: buffer_load_ushort 110; GCN: buffer_load_ushort 111; GCN: buffer_load_ushort 112; GCN: buffer_load_ushort 113; GCN: buffer_load_ushort 114; GCN: buffer_load_ushort 115; GCN: buffer_load_ushort 116 117; GCN: v_cvt_f32_f16_e32 118; GCN: v_cvt_f32_f16_e32 119; GCN: v_cvt_f32_f16_e32 120; GCN: v_cvt_f32_f16_e32 121; GCN: v_cvt_f32_f16_e32 122; GCN: v_cvt_f32_f16_e32 123; GCN: v_cvt_f32_f16_e32 124; GCN: v_cvt_f32_f16_e32 125 126; GCN: buffer_store_dwordx4 127; GCN: buffer_store_dwordx4 128define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 { 129 %ext = fpext <8 x half> %arg to <8 x float> 130 store <8 x float> %ext, <8 x float> addrspace(1)* %out 131 ret void 132} 133 134; GCN-LABEL: {{^}}extload_f16_to_f64_arg: 135; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}} 136; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}} 137; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]] 138; GCN: buffer_store_dwordx2 [[RESULT]] 139define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 { 140 %ext = fpext half %arg to double 141 store double %ext, double addrspace(1)* %out 142 ret void 143} 144 145; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg: 146; GCN-DAG: buffer_load_ushort v 147; GCN-DAG: buffer_load_ushort v 148; GCN-DAG: v_cvt_f32_f16_e32 149; GCN-DAG: v_cvt_f32_f16_e32 150; GCN-DAG: v_cvt_f64_f32_e32 151; GCN-DAG: v_cvt_f64_f32_e32 152; GCN: s_endpgm 153define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 { 154 %ext = fpext <2 x half> %arg to <2 x double> 155 store <2 x double> %ext, <2 x double> addrspace(1)* %out 156 ret void 157} 158 159; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg: 160; GCN-DAG: buffer_load_ushort v 161; GCN-DAG: buffer_load_ushort v 162; GCN-DAG: buffer_load_ushort v 163; GCN-DAG: v_cvt_f32_f16_e32 164; GCN-DAG: v_cvt_f32_f16_e32 165; GCN-DAG: v_cvt_f32_f16_e32 166; GCN-DAG: v_cvt_f64_f32_e32 167; GCN-DAG: v_cvt_f64_f32_e32 168; GCN-DAG: v_cvt_f64_f32_e32 169; GCN: s_endpgm 170define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 { 171 %ext = fpext <3 x half> %arg to <3 x double> 172 store <3 x double> %ext, <3 x double> addrspace(1)* %out 173 ret void 174} 175 176; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg: 177; GCN-DAG: buffer_load_ushort v 178; GCN-DAG: buffer_load_ushort v 179; GCN-DAG: buffer_load_ushort v 180; GCN-DAG: buffer_load_ushort v 181; GCN-DAG: v_cvt_f32_f16_e32 182; GCN-DAG: v_cvt_f32_f16_e32 183; GCN-DAG: v_cvt_f32_f16_e32 184; GCN-DAG: v_cvt_f32_f16_e32 185; GCN-DAG: v_cvt_f64_f32_e32 186; GCN-DAG: v_cvt_f64_f32_e32 187; GCN-DAG: v_cvt_f64_f32_e32 188; GCN-DAG: v_cvt_f64_f32_e32 189; GCN: s_endpgm 190define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 { 191 %ext = fpext <4 x half> %arg to <4 x double> 192 store <4 x double> %ext, <4 x double> addrspace(1)* %out 193 ret void 194} 195 196; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg: 197; GCN-DAG: buffer_load_ushort v 198; GCN-DAG: buffer_load_ushort v 199; GCN-DAG: buffer_load_ushort v 200; GCN-DAG: buffer_load_ushort v 201 202; GCN-DAG: buffer_load_ushort v 203; GCN-DAG: buffer_load_ushort v 204; GCN-DAG: buffer_load_ushort v 205; GCN-DAG: buffer_load_ushort v 206 207; GCN-DAG: v_cvt_f32_f16_e32 208; GCN-DAG: v_cvt_f32_f16_e32 209; GCN-DAG: v_cvt_f32_f16_e32 210; GCN-DAG: v_cvt_f32_f16_e32 211 212; GCN-DAG: v_cvt_f32_f16_e32 213; GCN-DAG: v_cvt_f32_f16_e32 214; GCN-DAG: v_cvt_f32_f16_e32 215; GCN-DAG: v_cvt_f32_f16_e32 216 217; GCN-DAG: v_cvt_f64_f32_e32 218; GCN-DAG: v_cvt_f64_f32_e32 219; GCN-DAG: v_cvt_f64_f32_e32 220; GCN-DAG: v_cvt_f64_f32_e32 221 222; GCN-DAG: v_cvt_f64_f32_e32 223; GCN-DAG: v_cvt_f64_f32_e32 224; GCN-DAG: v_cvt_f64_f32_e32 225; GCN-DAG: v_cvt_f64_f32_e32 226 227; GCN: s_endpgm 228define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 { 229 %ext = fpext <8 x half> %arg to <8 x double> 230 store <8 x double> %ext, <8 x double> addrspace(1)* %out 231 ret void 232} 233 234; GCN-LABEL: {{^}}global_load_store_f16: 235; GCN: buffer_load_ushort [[TMP:v[0-9]+]] 236; GCN: buffer_store_short [[TMP]] 237define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 238 %val = load half, half addrspace(1)* %in 239 store half %val, half addrspace(1)* %out 240 ret void 241} 242 243; GCN-LABEL: {{^}}global_load_store_v2f16: 244; GCN: buffer_load_dword [[TMP:v[0-9]+]] 245; GCN: buffer_store_dword [[TMP]] 246define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { 247 %val = load <2 x half>, <2 x half> addrspace(1)* %in 248 store <2 x half> %val, <2 x half> addrspace(1)* %out 249 ret void 250} 251 252; GCN-LABEL: {{^}}global_load_store_v4f16: 253; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]] 254; GCN: buffer_store_dwordx2 [[TMP]] 255define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 { 256 %val = load <4 x half>, <4 x half> addrspace(1)* %in 257 store <4 x half> %val, <4 x half> addrspace(1)* %out 258 ret void 259} 260 261; GCN-LABEL: {{^}}global_load_store_v8f16: 262; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]] 263; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]] 264; GCN: s_endpgm 265define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 { 266 %val = load <8 x half>, <8 x half> addrspace(1)* %in 267 store <8 x half> %val, <8 x half> addrspace(1)* %out 268 ret void 269} 270 271; GCN-LABEL: {{^}}global_extload_f16_to_f32: 272; GCN: buffer_load_ushort [[LOAD:v[0-9]+]] 273; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]] 274; GCN: buffer_store_dword [[CVT]] 275define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 { 276 %val = load half, half addrspace(1)* %in 277 %cvt = fpext half %val to float 278 store float %cvt, float addrspace(1)* %out 279 ret void 280} 281 282; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32: 283; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 284; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} 285; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] 286; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] 287; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}} 288; GCN: s_endpgm 289define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { 290 %val = load <2 x half>, <2 x half> addrspace(1)* %in 291 %cvt = fpext <2 x half> %val to <2 x float> 292 store <2 x float> %cvt, <2 x float> addrspace(1)* %out 293 ret void 294} 295 296; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32: 297define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 { 298 %val = load <3 x half>, <3 x half> addrspace(1)* %in 299 %cvt = fpext <3 x half> %val to <3 x float> 300 store <3 x float> %cvt, <3 x float> addrspace(1)* %out 301 ret void 302} 303 304; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32: 305define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 { 306 %val = load <4 x half>, <4 x half> addrspace(1)* %in 307 %cvt = fpext <4 x half> %val to <4 x float> 308 store <4 x float> %cvt, <4 x float> addrspace(1)* %out 309 ret void 310} 311 312; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32: 313define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 { 314 %val = load <8 x half>, <8 x half> addrspace(1)* %in 315 %cvt = fpext <8 x half> %val to <8 x float> 316 store <8 x float> %cvt, <8 x float> addrspace(1)* %out 317 ret void 318} 319 320; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32: 321; GCN: buffer_load_ushort 322; GCN: buffer_load_ushort 323; GCN: buffer_load_ushort 324; GCN: buffer_load_ushort 325; GCN: buffer_load_ushort 326; GCN: buffer_load_ushort 327; GCN: buffer_load_ushort 328; GCN: buffer_load_ushort 329; GCN: buffer_load_ushort 330; GCN: buffer_load_ushort 331; GCN: buffer_load_ushort 332; GCN: buffer_load_ushort 333; GCN: buffer_load_ushort 334; GCN: buffer_load_ushort 335; GCN: buffer_load_ushort 336; GCN: buffer_load_ushort 337 338; GCN: v_cvt_f32_f16_e32 339; GCN: v_cvt_f32_f16_e32 340; GCN: v_cvt_f32_f16_e32 341; GCN: v_cvt_f32_f16_e32 342; GCN: v_cvt_f32_f16_e32 343; GCN: v_cvt_f32_f16_e32 344; GCN: v_cvt_f32_f16_e32 345; GCN: v_cvt_f32_f16_e32 346; GCN: v_cvt_f32_f16_e32 347; GCN: v_cvt_f32_f16_e32 348; GCN: v_cvt_f32_f16_e32 349; GCN: v_cvt_f32_f16_e32 350; GCN: v_cvt_f32_f16_e32 351; GCN: v_cvt_f32_f16_e32 352; GCN: v_cvt_f32_f16_e32 353; GCN: v_cvt_f32_f16_e32 354 355; GCN: buffer_store_dwordx4 356; GCN: buffer_store_dwordx4 357; GCN: buffer_store_dwordx4 358; GCN: buffer_store_dwordx4 359 360; GCN: s_endpgm 361define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 { 362 %val = load <16 x half>, <16 x half> addrspace(1)* %in 363 %cvt = fpext <16 x half> %val to <16 x float> 364 store <16 x float> %cvt, <16 x float> addrspace(1)* %out 365 ret void 366} 367 368; GCN-LABEL: {{^}}global_extload_f16_to_f64: 369; GCN: buffer_load_ushort [[LOAD:v[0-9]+]] 370; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]] 371; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]] 372; GCN: buffer_store_dwordx2 [[CVT1]] 373define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 { 374 %val = load half, half addrspace(1)* %in 375 %cvt = fpext half %val to double 376 store double %cvt, double addrspace(1)* %out 377 ret void 378} 379 380; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64: 381; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 382; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} 383; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] 384; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] 385; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]] 386; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]] 387; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}} 388; GCN: s_endpgm 389define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { 390 %val = load <2 x half>, <2 x half> addrspace(1)* %in 391 %cvt = fpext <2 x half> %val to <2 x double> 392 store <2 x double> %cvt, <2 x double> addrspace(1)* %out 393 ret void 394} 395 396; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64: 397 398; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]] 399; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}} 400 401; GCN: v_cvt_f32_f16_e32 402; GCN: v_cvt_f32_f16_e32 403; GCN: v_cvt_f32_f16_e32 404; GCN-NOT: v_cvt_f32_f16_e32 405 406; GCN: v_cvt_f64_f32_e32 407; GCN: v_cvt_f64_f32_e32 408; GCN: v_cvt_f64_f32_e32 409; GCN-NOT: v_cvt_f64_f32_e32 410 411; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 412; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 413; GCN: s_endpgm 414define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 { 415 %val = load <3 x half>, <3 x half> addrspace(1)* %in 416 %cvt = fpext <3 x half> %val to <3 x double> 417 store <3 x double> %cvt, <3 x double> addrspace(1)* %out 418 ret void 419} 420 421; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64: 422define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 { 423 %val = load <4 x half>, <4 x half> addrspace(1)* %in 424 %cvt = fpext <4 x half> %val to <4 x double> 425 store <4 x double> %cvt, <4 x double> addrspace(1)* %out 426 ret void 427} 428 429; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64: 430define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 { 431 %val = load <8 x half>, <8 x half> addrspace(1)* %in 432 %cvt = fpext <8 x half> %val to <8 x double> 433 store <8 x double> %cvt, <8 x double> addrspace(1)* %out 434 ret void 435} 436 437; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64: 438define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 { 439 %val = load <16 x half>, <16 x half> addrspace(1)* %in 440 %cvt = fpext <16 x half> %val to <16 x double> 441 store <16 x double> %cvt, <16 x double> addrspace(1)* %out 442 ret void 443} 444 445; GCN-LABEL: {{^}}global_truncstore_f32_to_f16: 446; GCN: buffer_load_dword [[LOAD:v[0-9]+]] 447; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]] 448; GCN: buffer_store_short [[CVT]] 449define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 { 450 %val = load float, float addrspace(1)* %in 451 %cvt = fptrunc float %val to half 452 store half %cvt, half addrspace(1)* %out 453 ret void 454} 455 456; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16: 457; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} 458; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]] 459; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]] 460; GCN-DAG: buffer_store_short [[CVT0]] 461; GCN-DAG: buffer_store_short [[CVT1]] 462; GCN: s_endpgm 463define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 { 464 %val = load <2 x float>, <2 x float> addrspace(1)* %in 465 %cvt = fptrunc <2 x float> %val to <2 x half> 466 store <2 x half> %cvt, <2 x half> addrspace(1)* %out 467 ret void 468} 469 470; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16: 471; GCN: buffer_load_dwordx4 472; GCN: v_cvt_f16_f32_e32 473; GCN: v_cvt_f16_f32_e32 474; GCN: v_cvt_f16_f32_e32 475; GCN-NOT: v_cvt_f16_f32_e32 476; GCN: buffer_store_short 477; GCN: buffer_store_dword 478; GCN: s_endpgm 479define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { 480 %val = load <3 x float>, <3 x float> addrspace(1)* %in 481 %cvt = fptrunc <3 x float> %val to <3 x half> 482 store <3 x half> %cvt, <3 x half> addrspace(1)* %out 483 ret void 484} 485 486; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16: 487; GCN: buffer_load_dwordx4 488; GCN: v_cvt_f16_f32_e32 489; GCN: v_cvt_f16_f32_e32 490; GCN: v_cvt_f16_f32_e32 491; GCN: v_cvt_f16_f32_e32 492; GCN: buffer_store_short 493; GCN: buffer_store_short 494; GCN: buffer_store_short 495; GCN: buffer_store_short 496; GCN: s_endpgm 497define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { 498 %val = load <4 x float>, <4 x float> addrspace(1)* %in 499 %cvt = fptrunc <4 x float> %val to <4 x half> 500 store <4 x half> %cvt, <4 x half> addrspace(1)* %out 501 ret void 502} 503 504; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16: 505; GCN: buffer_load_dwordx4 506; GCN: buffer_load_dwordx4 507; GCN: v_cvt_f16_f32_e32 508; GCN: v_cvt_f16_f32_e32 509; GCN: v_cvt_f16_f32_e32 510; GCN: v_cvt_f16_f32_e32 511; GCN: v_cvt_f16_f32_e32 512; GCN: v_cvt_f16_f32_e32 513; GCN: v_cvt_f16_f32_e32 514; GCN: v_cvt_f16_f32_e32 515; GCN: buffer_store_short 516; GCN: buffer_store_short 517; GCN: buffer_store_short 518; GCN: buffer_store_short 519; GCN: buffer_store_short 520; GCN: buffer_store_short 521; GCN: buffer_store_short 522; GCN: buffer_store_short 523; GCN: s_endpgm 524define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 { 525 %val = load <8 x float>, <8 x float> addrspace(1)* %in 526 %cvt = fptrunc <8 x float> %val to <8 x half> 527 store <8 x half> %cvt, <8 x half> addrspace(1)* %out 528 ret void 529} 530 531; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16: 532; GCN: buffer_load_dwordx4 533; GCN: buffer_load_dwordx4 534; GCN: buffer_load_dwordx4 535; GCN: buffer_load_dwordx4 536; GCN-DAG: v_cvt_f16_f32_e32 537; GCN-DAG: v_cvt_f16_f32_e32 538; GCN-DAG: v_cvt_f16_f32_e32 539; GCN-DAG: v_cvt_f16_f32_e32 540; GCN-DAG: v_cvt_f16_f32_e32 541; GCN-DAG: v_cvt_f16_f32_e32 542; GCN-DAG: v_cvt_f16_f32_e32 543; GCN-DAG: v_cvt_f16_f32_e32 544; GCN-DAG: v_cvt_f16_f32_e32 545; GCN-DAG: v_cvt_f16_f32_e32 546; GCN-DAG: v_cvt_f16_f32_e32 547; GCN-DAG: v_cvt_f16_f32_e32 548; GCN-DAG: v_cvt_f16_f32_e32 549; GCN-DAG: v_cvt_f16_f32_e32 550; GCN-DAG: v_cvt_f16_f32_e32 551; GCN-DAG: v_cvt_f16_f32_e32 552; GCN-DAG: buffer_store_short 553; GCN-DAG: buffer_store_short 554; GCN-DAG: buffer_store_short 555; GCN-DAG: buffer_store_short 556; GCN-DAG: buffer_store_short 557; GCN-DAG: buffer_store_short 558; GCN-DAG: buffer_store_short 559; GCN-DAG: buffer_store_short 560; GCN-DAG: buffer_store_short 561; GCN-DAG: buffer_store_short 562; GCN-DAG: buffer_store_short 563; GCN-DAG: buffer_store_short 564; GCN-DAG: buffer_store_short 565; GCN-DAG: buffer_store_short 566; GCN-DAG: buffer_store_short 567; GCN-DAG: buffer_store_short 568; GCN: s_endpgm 569define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 { 570 %val = load <16 x float>, <16 x float> addrspace(1)* %in 571 %cvt = fptrunc <16 x float> %val to <16 x half> 572 store <16 x half> %cvt, <16 x half> addrspace(1)* %out 573 ret void 574} 575 576; FIXME: Unsafe math should fold conversions away 577; GCN-LABEL: {{^}}fadd_f16: 578; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 579; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 580; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 581; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 582; SI: v_add_f32 583; GCN: s_endpgm 584define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 { 585 %add = fadd half %a, %b 586 store half %add, half addrspace(1)* %out, align 4 587 ret void 588} 589 590; GCN-LABEL: {{^}}fadd_v2f16: 591; SI: v_add_f32 592; SI: v_add_f32 593; GCN: s_endpgm 594define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 { 595 %add = fadd <2 x half> %a, %b 596 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8 597 ret void 598} 599 600; GCN-LABEL: {{^}}fadd_v4f16: 601; SI: v_add_f32 602; SI: v_add_f32 603; SI: v_add_f32 604; SI: v_add_f32 605; GCN: s_endpgm 606define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 { 607 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1 608 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16 609 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16 610 %result = fadd <4 x half> %a, %b 611 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16 612 ret void 613} 614 615; GCN-LABEL: {{^}}fadd_v8f16: 616; SI: v_add_f32 617; SI: v_add_f32 618; SI: v_add_f32 619; SI: v_add_f32 620; SI: v_add_f32 621; SI: v_add_f32 622; SI: v_add_f32 623; SI: v_add_f32 624; GCN: s_endpgm 625define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 { 626 %add = fadd <8 x half> %a, %b 627 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32 628 ret void 629} 630 631; GCN-LABEL: {{^}}fsub_f16: 632; GCN: v_subrev_f32_e32 633; GCN: s_endpgm 634define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 635 %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1 636 %a = load half, half addrspace(1)* %in 637 %b = load half, half addrspace(1)* %b_ptr 638 %sub = fsub half %a, %b 639 store half %sub, half addrspace(1)* %out 640 ret void 641} 642 643; GCN-LABEL: {{^}}test_bitcast_from_half: 644; GCN: buffer_load_ushort [[TMP:v[0-9]+]] 645; GCN: buffer_store_short [[TMP]] 646define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 { 647 %val = load half, half addrspace(1)* %in 648 %val_int = bitcast half %val to i16 649 store i16 %val_int, i16 addrspace(1)* %out 650 ret void 651} 652 653; GCN-LABEL: {{^}}test_bitcast_to_half: 654; GCN: buffer_load_ushort [[TMP:v[0-9]+]] 655; GCN: buffer_store_short [[TMP]] 656define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 657 %val = load i16, i16 addrspace(1)* %in 658 %val_fp = bitcast i16 %val to half 659 store half %val_fp, half addrspace(1)* %out 660 ret void 661} 662 663attributes #0 = { nounwind } 664