1; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 3 4; half args should be promoted to float 5 6; GCN-LABEL: {{^}}load_f16_arg: 7; GCN: s_load_dword [[ARG:s[0-9]+]] 8; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]] 9; GCN: buffer_store_short [[CVT]] 10define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 { 11 store half %arg, half addrspace(1)* %out 12 ret void 13} 14 15; GCN-LABEL: {{^}}load_v2f16_arg: 16; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 17; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46 18; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 19; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} 20; GCN: s_endpgm 21define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 { 22 store <2 x half> %arg, <2 x half> addrspace(1)* %out 23 ret void 24} 25 26; GCN-LABEL: {{^}}load_v3f16_arg: 27; GCN: buffer_load_ushort 28; GCN: buffer_load_ushort 29; GCN: buffer_load_ushort 30; GCN-NOT: buffer_load 31; GCN-DAG: buffer_store_dword 32; GCN-DAG: buffer_store_short 33; GCN-NOT: buffer_store 34; GCN: s_endpgm 35define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 { 36 store <3 x half> %arg, <3 x half> addrspace(1)* %out 37 ret void 38} 39 40; GCN-LABEL: {{^}}load_v4f16_arg: 41; GCN: buffer_load_ushort 42; GCN: buffer_load_ushort 43; GCN: buffer_load_ushort 44; GCN: buffer_load_ushort 45; GCN: buffer_store_short 46; GCN: buffer_store_short 47; GCN: buffer_store_short 48; GCN: buffer_store_short 49; GCN: s_endpgm 50define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 { 51 store <4 x half> %arg, <4 x half> addrspace(1)* %out 52 ret void 53} 54 55; GCN-LABEL: {{^}}load_v8f16_arg: 56define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 { 57 store <8 x half> %arg, <8 x half> addrspace(1)* %out 58 ret void 59} 60 61; GCN-LABEL: {{^}}extload_v2f16_arg: 62define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 { 63 %fpext = fpext <2 x half> %in to <2 x float> 64 store <2 x float> %fpext, <2 x float> addrspace(1)* %out 65 ret void 66} 67 68; GCN-LABEL: {{^}}extload_f16_to_f32_arg: 69define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 { 70 %ext = fpext half %arg to float 71 store float %ext, float addrspace(1)* %out 72 ret void 73} 74 75; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg: 76define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 { 77 %ext = fpext <2 x half> %arg to <2 x float> 78 store <2 x float> %ext, <2 x float> addrspace(1)* %out 79 ret void 80} 81 82; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg: 83; GCN: buffer_load_ushort 84; GCN: buffer_load_ushort 85; GCN: buffer_load_ushort 86; GCN-NOT: buffer_load 87; GCN: v_cvt_f32_f16_e32 88; GCN: v_cvt_f32_f16_e32 89; GCN: v_cvt_f32_f16_e32 90; GCN-NOT: v_cvt_f32_f16 91; GCN-DAG: buffer_store_dword 92; GCN-DAG: buffer_store_dwordx2 93; GCN: s_endpgm 94define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 { 95 %ext = fpext <3 x half> %arg to <3 x float> 96 store <3 x float> %ext, <3 x float> addrspace(1)* %out 97 ret void 98} 99 100; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg: 101define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 { 102 %ext = fpext <4 x half> %arg to <4 x float> 103 store <4 x float> %ext, <4 x float> addrspace(1)* %out 104 ret void 105} 106 107; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg: 108define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 { 109 %ext = fpext <8 x half> %arg to <8 x float> 110 store <8 x float> %ext, <8 x float> addrspace(1)* %out 111 ret void 112} 113 114; GCN-LABEL: {{^}}extload_f16_to_f64_arg: 115; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}} 116; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}} 117; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]] 118; GCN: buffer_store_dwordx2 [[RESULT]] 119define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 { 120 %ext = fpext half %arg to double 121 store double %ext, double addrspace(1)* %out 122 ret void 123} 124 125; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg: 126; GCN-DAG: buffer_load_ushort v 127; GCN-DAG: buffer_load_ushort v 128; GCN-DAG: v_cvt_f32_f16_e32 129; GCN-DAG: v_cvt_f32_f16_e32 130; GCN-DAG: v_cvt_f64_f32_e32 131; GCN-DAG: v_cvt_f64_f32_e32 132; GCN: s_endpgm 133define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 { 134 %ext = fpext <2 x half> %arg to <2 x double> 135 store <2 x double> %ext, <2 x double> addrspace(1)* %out 136 ret void 137} 138 139; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg: 140; GCN-DAG: buffer_load_ushort v 141; GCN-DAG: buffer_load_ushort v 142; GCN-DAG: buffer_load_ushort v 143; GCN-DAG: v_cvt_f32_f16_e32 144; GCN-DAG: v_cvt_f32_f16_e32 145; GCN-DAG: v_cvt_f32_f16_e32 146; GCN-DAG: v_cvt_f64_f32_e32 147; GCN-DAG: v_cvt_f64_f32_e32 148; GCN-DAG: v_cvt_f64_f32_e32 149; GCN: s_endpgm 150define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 { 151 %ext = fpext <3 x half> %arg to <3 x double> 152 store <3 x double> %ext, <3 x double> addrspace(1)* %out 153 ret void 154} 155 156; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg: 157; GCN-DAG: buffer_load_ushort v 158; GCN-DAG: buffer_load_ushort v 159; GCN-DAG: buffer_load_ushort v 160; GCN-DAG: buffer_load_ushort v 161; GCN-DAG: v_cvt_f32_f16_e32 162; GCN-DAG: v_cvt_f32_f16_e32 163; GCN-DAG: v_cvt_f32_f16_e32 164; GCN-DAG: v_cvt_f32_f16_e32 165; GCN-DAG: v_cvt_f64_f32_e32 166; GCN-DAG: v_cvt_f64_f32_e32 167; GCN-DAG: v_cvt_f64_f32_e32 168; GCN-DAG: v_cvt_f64_f32_e32 169; GCN: s_endpgm 170define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 { 171 %ext = fpext <4 x half> %arg to <4 x double> 172 store <4 x double> %ext, <4 x double> addrspace(1)* %out 173 ret void 174} 175 176; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg: 177; GCN-DAG: buffer_load_ushort v 178; GCN-DAG: buffer_load_ushort v 179; GCN-DAG: buffer_load_ushort v 180; GCN-DAG: buffer_load_ushort v 181 182; GCN-DAG: buffer_load_ushort v 183; GCN-DAG: buffer_load_ushort v 184; GCN-DAG: buffer_load_ushort v 185; GCN-DAG: buffer_load_ushort v 186 187; GCN-DAG: v_cvt_f32_f16_e32 188; GCN-DAG: v_cvt_f32_f16_e32 189; GCN-DAG: v_cvt_f32_f16_e32 190; GCN-DAG: v_cvt_f32_f16_e32 191 192; GCN-DAG: v_cvt_f32_f16_e32 193; GCN-DAG: v_cvt_f32_f16_e32 194; GCN-DAG: v_cvt_f32_f16_e32 195; GCN-DAG: v_cvt_f32_f16_e32 196 197; GCN-DAG: v_cvt_f64_f32_e32 198; GCN-DAG: v_cvt_f64_f32_e32 199; GCN-DAG: v_cvt_f64_f32_e32 200; GCN-DAG: v_cvt_f64_f32_e32 201 202; GCN-DAG: v_cvt_f64_f32_e32 203; GCN-DAG: v_cvt_f64_f32_e32 204; GCN-DAG: v_cvt_f64_f32_e32 205; GCN-DAG: v_cvt_f64_f32_e32 206 207; GCN: s_endpgm 208define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 { 209 %ext = fpext <8 x half> %arg to <8 x double> 210 store <8 x double> %ext, <8 x double> addrspace(1)* %out 211 ret void 212} 213 214; GCN-LABEL: {{^}}global_load_store_f16: 215; GCN: buffer_load_ushort [[TMP:v[0-9]+]] 216; GCN: buffer_store_short [[TMP]] 217define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 218 %val = load half, half addrspace(1)* %in 219 store half %val, half addrspace(1)* %out 220 ret void 221} 222 223; GCN-LABEL: {{^}}global_load_store_v2f16: 224; GCN: buffer_load_dword [[TMP:v[0-9]+]] 225; GCN: buffer_store_dword [[TMP]] 226define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { 227 %val = load <2 x half>, <2 x half> addrspace(1)* %in 228 store <2 x half> %val, <2 x half> addrspace(1)* %out 229 ret void 230} 231 232; GCN-LABEL: {{^}}global_load_store_v4f16: 233; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]] 234; GCN: buffer_store_dwordx2 [[TMP]] 235define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 { 236 %val = load <4 x half>, <4 x half> addrspace(1)* %in 237 store <4 x half> %val, <4 x half> addrspace(1)* %out 238 ret void 239} 240 241; GCN-LABEL: {{^}}global_load_store_v8f16: 242; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]] 243; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]] 244; GCN: s_endpgm 245define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 { 246 %val = load <8 x half>, <8 x half> addrspace(1)* %in 247 store <8 x half> %val, <8 x half> addrspace(1)* %out 248 ret void 249} 250 251; GCN-LABEL: {{^}}global_extload_f16_to_f32: 252; GCN: buffer_load_ushort [[LOAD:v[0-9]+]] 253; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]] 254; GCN: buffer_store_dword [[CVT]] 255define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 { 256 %val = load half, half addrspace(1)* %in 257 %cvt = fpext half %val to float 258 store float %cvt, float addrspace(1)* %out 259 ret void 260} 261 262; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32: 263; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 264; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} 265; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] 266; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] 267; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}} 268; GCN: s_endpgm 269define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { 270 %val = load <2 x half>, <2 x half> addrspace(1)* %in 271 %cvt = fpext <2 x half> %val to <2 x float> 272 store <2 x float> %cvt, <2 x float> addrspace(1)* %out 273 ret void 274} 275 276; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32: 277define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 { 278 %val = load <3 x half>, <3 x half> addrspace(1)* %in 279 %cvt = fpext <3 x half> %val to <3 x float> 280 store <3 x float> %cvt, <3 x float> addrspace(1)* %out 281 ret void 282} 283 284; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32: 285define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 { 286 %val = load <4 x half>, <4 x half> addrspace(1)* %in 287 %cvt = fpext <4 x half> %val to <4 x float> 288 store <4 x float> %cvt, <4 x float> addrspace(1)* %out 289 ret void 290} 291 292; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32: 293define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 { 294 %val = load <8 x half>, <8 x half> addrspace(1)* %in 295 %cvt = fpext <8 x half> %val to <8 x float> 296 store <8 x float> %cvt, <8 x float> addrspace(1)* %out 297 ret void 298} 299 300; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32: 301define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 { 302 %val = load <16 x half>, <16 x half> addrspace(1)* %in 303 %cvt = fpext <16 x half> %val to <16 x float> 304 store <16 x float> %cvt, <16 x float> addrspace(1)* %out 305 ret void 306} 307 308; GCN-LABEL: {{^}}global_extload_f16_to_f64: 309; GCN: buffer_load_ushort [[LOAD:v[0-9]+]] 310; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]] 311; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]] 312; GCN: buffer_store_dwordx2 [[CVT1]] 313define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 { 314 %val = load half, half addrspace(1)* %in 315 %cvt = fpext half %val to double 316 store double %cvt, double addrspace(1)* %out 317 ret void 318} 319 320; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64: 321; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} 322; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} 323; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] 324; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] 325; GCN-DAG: v_cvt_f64_f32_e32 [[CVT2:v\[[0-9]+:[0-9]+\]]], v[[CVT0]] 326; GCN-DAG: v_cvt_f64_f32_e32 [[CVT3:v\[[0-9]+:[0-9]+\]]], v[[CVT1]] 327; GCN-DAG: buffer_store_dwordx2 [[CVT2]] 328; GCN-DAG: buffer_store_dwordx2 [[CVT3]] 329; GCN: s_endpgm 330define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 { 331 %val = load <2 x half>, <2 x half> addrspace(1)* %in 332 %cvt = fpext <2 x half> %val to <2 x double> 333 store <2 x double> %cvt, <2 x double> addrspace(1)* %out 334 ret void 335} 336 337; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64: 338define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 { 339 %val = load <3 x half>, <3 x half> addrspace(1)* %in 340 %cvt = fpext <3 x half> %val to <3 x double> 341 store <3 x double> %cvt, <3 x double> addrspace(1)* %out 342 ret void 343} 344 345; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64: 346define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 { 347 %val = load <4 x half>, <4 x half> addrspace(1)* %in 348 %cvt = fpext <4 x half> %val to <4 x double> 349 store <4 x double> %cvt, <4 x double> addrspace(1)* %out 350 ret void 351} 352 353; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64: 354define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 { 355 %val = load <8 x half>, <8 x half> addrspace(1)* %in 356 %cvt = fpext <8 x half> %val to <8 x double> 357 store <8 x double> %cvt, <8 x double> addrspace(1)* %out 358 ret void 359} 360 361; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64: 362define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 { 363 %val = load <16 x half>, <16 x half> addrspace(1)* %in 364 %cvt = fpext <16 x half> %val to <16 x double> 365 store <16 x double> %cvt, <16 x double> addrspace(1)* %out 366 ret void 367} 368 369; GCN-LABEL: {{^}}global_truncstore_f32_to_f16: 370; GCN: buffer_load_dword [[LOAD:v[0-9]+]] 371; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]] 372; GCN: buffer_store_short [[CVT]] 373define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 { 374 %val = load float, float addrspace(1)* %in 375 %cvt = fptrunc float %val to half 376 store half %cvt, half addrspace(1)* %out 377 ret void 378} 379 380; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16: 381; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} 382; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]] 383; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]] 384; GCN-DAG: buffer_store_short [[CVT0]] 385; GCN-DAG: buffer_store_short [[CVT1]] 386; GCN: s_endpgm 387define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 { 388 %val = load <2 x float>, <2 x float> addrspace(1)* %in 389 %cvt = fptrunc <2 x float> %val to <2 x half> 390 store <2 x half> %cvt, <2 x half> addrspace(1)* %out 391 ret void 392} 393 394; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16: 395; GCN: buffer_load_dwordx4 396; GCN: v_cvt_f16_f32_e32 397; GCN: v_cvt_f16_f32_e32 398; GCN: v_cvt_f16_f32_e32 399; GCN-NOT: v_cvt_f16_f32_e32 400; GCN: buffer_store_short 401; GCN: buffer_store_dword 402; GCN: s_endpgm 403define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { 404 %val = load <3 x float>, <3 x float> addrspace(1)* %in 405 %cvt = fptrunc <3 x float> %val to <3 x half> 406 store <3 x half> %cvt, <3 x half> addrspace(1)* %out 407 ret void 408} 409 410; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16: 411; GCN: buffer_load_dwordx4 412; GCN: v_cvt_f16_f32_e32 413; GCN: v_cvt_f16_f32_e32 414; GCN: v_cvt_f16_f32_e32 415; GCN: v_cvt_f16_f32_e32 416; GCN: buffer_store_short 417; GCN: buffer_store_short 418; GCN: buffer_store_short 419; GCN: buffer_store_short 420; GCN: s_endpgm 421define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { 422 %val = load <4 x float>, <4 x float> addrspace(1)* %in 423 %cvt = fptrunc <4 x float> %val to <4 x half> 424 store <4 x half> %cvt, <4 x half> addrspace(1)* %out 425 ret void 426} 427 428; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16: 429; GCN: buffer_load_dword 430; GCN: buffer_load_dword 431; GCN: buffer_load_dword 432; GCN: buffer_load_dword 433; GCN: buffer_load_dword 434; GCN: buffer_load_dword 435; GCN: buffer_load_dword 436; GCN: buffer_load_dword 437; GCN: v_cvt_f16_f32_e32 438; GCN: v_cvt_f16_f32_e32 439; GCN: v_cvt_f16_f32_e32 440; GCN: v_cvt_f16_f32_e32 441; GCN: v_cvt_f16_f32_e32 442; GCN: v_cvt_f16_f32_e32 443; GCN: v_cvt_f16_f32_e32 444; GCN: v_cvt_f16_f32_e32 445; GCN: buffer_store_short 446; GCN: buffer_store_short 447; GCN: buffer_store_short 448; GCN: buffer_store_short 449; GCN: buffer_store_short 450; GCN: buffer_store_short 451; GCN: buffer_store_short 452; GCN: buffer_store_short 453; GCN: s_endpgm 454define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 { 455 %val = load <8 x float>, <8 x float> addrspace(1)* %in 456 %cvt = fptrunc <8 x float> %val to <8 x half> 457 store <8 x half> %cvt, <8 x half> addrspace(1)* %out 458 ret void 459} 460 461; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16: 462; GCN: buffer_load_dword 463; GCN: buffer_load_dword 464; GCN: buffer_load_dword 465; GCN: buffer_load_dword 466; GCN: buffer_load_dword 467; GCN: buffer_load_dword 468; GCN: buffer_load_dword 469; GCN: buffer_load_dword 470; GCN: buffer_load_dword 471; GCN: buffer_load_dword 472; GCN: buffer_load_dword 473; GCN: buffer_load_dword 474; GCN: buffer_load_dword 475; GCN: buffer_load_dword 476; GCN: buffer_load_dword 477; GCN: buffer_load_dword 478; GCN-DAG: v_cvt_f16_f32_e32 479; GCN-DAG: v_cvt_f16_f32_e32 480; GCN-DAG: v_cvt_f16_f32_e32 481; GCN-DAG: v_cvt_f16_f32_e32 482; GCN-DAG: v_cvt_f16_f32_e32 483; GCN-DAG: v_cvt_f16_f32_e32 484; GCN-DAG: v_cvt_f16_f32_e32 485; GCN-DAG: v_cvt_f16_f32_e32 486; GCN-DAG: v_cvt_f16_f32_e32 487; GCN-DAG: v_cvt_f16_f32_e32 488; GCN-DAG: v_cvt_f16_f32_e32 489; GCN-DAG: v_cvt_f16_f32_e32 490; GCN-DAG: v_cvt_f16_f32_e32 491; GCN-DAG: v_cvt_f16_f32_e32 492; GCN-DAG: v_cvt_f16_f32_e32 493; GCN-DAG: v_cvt_f16_f32_e32 494; GCN-DAG: buffer_store_short 495; GCN-DAG: buffer_store_short 496; GCN-DAG: buffer_store_short 497; GCN-DAG: buffer_store_short 498; GCN-DAG: buffer_store_short 499; GCN-DAG: buffer_store_short 500; GCN-DAG: buffer_store_short 501; GCN-DAG: buffer_store_short 502; GCN-DAG: buffer_store_short 503; GCN-DAG: buffer_store_short 504; GCN-DAG: buffer_store_short 505; GCN-DAG: buffer_store_short 506; GCN-DAG: buffer_store_short 507; GCN-DAG: buffer_store_short 508; GCN-DAG: buffer_store_short 509; GCN-DAG: buffer_store_short 510; GCN: s_endpgm 511define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 { 512 %val = load <16 x float>, <16 x float> addrspace(1)* %in 513 %cvt = fptrunc <16 x float> %val to <16 x half> 514 store <16 x half> %cvt, <16 x half> addrspace(1)* %out 515 ret void 516} 517 518; FIXME: Unsafe math should fold conversions away 519; GCN-LABEL: {{^}}fadd_f16: 520; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 521; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 522; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 523; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}}, 524; SI: v_add_f32 525; GCN: s_endpgm 526define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 { 527 %add = fadd half %a, %b 528 store half %add, half addrspace(1)* %out, align 4 529 ret void 530} 531 532; GCN-LABEL: {{^}}fadd_v2f16: 533; SI: v_add_f32 534; SI: v_add_f32 535; GCN: s_endpgm 536define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 { 537 %add = fadd <2 x half> %a, %b 538 store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8 539 ret void 540} 541 542; GCN-LABEL: {{^}}fadd_v4f16: 543; SI: v_add_f32 544; SI: v_add_f32 545; SI: v_add_f32 546; SI: v_add_f32 547; GCN: s_endpgm 548define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 { 549 %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1 550 %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16 551 %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16 552 %result = fadd <4 x half> %a, %b 553 store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16 554 ret void 555} 556 557; GCN-LABEL: {{^}}fadd_v8f16: 558; SI: v_add_f32 559; SI: v_add_f32 560; SI: v_add_f32 561; SI: v_add_f32 562; SI: v_add_f32 563; SI: v_add_f32 564; SI: v_add_f32 565; SI: v_add_f32 566; GCN: s_endpgm 567define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 { 568 %add = fadd <8 x half> %a, %b 569 store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32 570 ret void 571} 572 573; GCN-LABEL: {{^}}fsub_f16: 574; GCN: v_subrev_f32_e32 575; GCN: s_endpgm 576define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 { 577 %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1 578 %a = load half, half addrspace(1)* %in 579 %b = load half, half addrspace(1)* %b_ptr 580 %sub = fsub half %a, %b 581 store half %sub, half addrspace(1)* %out 582 ret void 583} 584 585; GCN-LABEL: {{^}}test_bitcast_from_half: 586; GCN: buffer_load_ushort [[TMP:v[0-9]+]] 587; GCN: buffer_store_short [[TMP]] 588define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 { 589 %val = load half, half addrspace(1)* %in 590 %val_int = bitcast half %val to i16 591 store i16 %val_int, i16 addrspace(1)* %out 592 ret void 593} 594 595; GCN-LABEL: {{^}}test_bitcast_to_half: 596; GCN: buffer_load_ushort [[TMP:v[0-9]+]] 597; GCN: buffer_store_short [[TMP]] 598define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 599 %val = load i16, i16 addrspace(1)* %in 600 %val_fp = bitcast i16 %val to half 601 store half %val_fp, half addrspace(1)* %out 602 ret void 603} 604 605attributes #0 = { nounwind } 606