1; Verifies correctness of load/store of parameters and return values. 2; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s 3; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify -arch=sm_35 %} 4 5%s_i1 = type { i1 } 6%s_i8 = type { i8 } 7%s_i16 = type { i16 } 8%s_f16 = type { half } 9%s_i32 = type { i32 } 10%s_f32 = type { float } 11%s_i64 = type { i64 } 12%s_f64 = type { double } 13 14; More complicated types. i64 is used to increase natural alignment 15; requirement for the type. 16%s_i32x4 = type { i32, i32, i32, i32, i64} 17%s_i32f32 = type { i32, float, i32, float, i64} 18%s_i8i32x4 = type { i32, i32, i8, i32, i32, i64} 19%s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}> 20%s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]} 21; All scalar parameters must be at least 32 bits in size. 22; i1 is loaded/stored as i8. 23 24; CHECK: .func (.param .b32 func_retval0) 25; CHECK-LABEL: test_i1( 26; CHECK-NEXT: .param .b32 test_i1_param_0 27; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1_param_0]; 28; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1; 29; CHECK: setp.eq.b16 %p1, [[A]], 1 30; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]] 31; CHECK: and.b32 [[C:%r[0-9]+]], [[B]], 1; 32; CHECK: .param .b32 param0; 33; CHECK: st.param.b32 [param0+0], [[C]] 34; CHECK: .param .b32 retval0; 35; CHECK: call.uni 36; CHECK-NEXT: test_i1, 37; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0]; 38; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1; 39; CHECK: st.param.b32 [func_retval0+0], [[R]]; 40; CHECK: ret; 41define i1 @test_i1(i1 %a) { 42 %r = tail call i1 @test_i1(i1 %a); 43 ret i1 %r; 44} 45 46; Signed i1 is a somewhat special case. We only care about one bit and 47; then us neg.s32 to convert it to 32-bit -1 if it's set. 48; CHECK: .func (.param .b32 func_retval0) 49; CHECK-LABEL: test_i1s( 50; CHECK-NEXT: .param .b32 test_i1s_param_0 51; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0]; 52; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; 53; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1; 54; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]]; 55; CHECK: .param .b32 param0; 56; CHECK: st.param.b32 [param0+0], [[A]]; 57; CHECK: .param .b32 retval0; 58; CHECK: call.uni 59; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0]; 60; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1; 61; CHECK: neg.s32 [[R:%r[0-9]+]], [[R1]]; 62; CHECK: st.param.b32 [func_retval0+0], [[R]]; 63; CHECK-NEXT: ret; 64define signext i1 @test_i1s(i1 signext %a) { 65 %r = tail call signext i1 @test_i1s(i1 signext %a); 66 ret i1 %r; 67} 68 69; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment. 70; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 71; CHECK-LABEL: test_v3i1( 72; CHECK-NEXT: .param .align 1 .b8 test_v3i1_param_0[1] 73; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2]; 74; CHECK-DAG: ld.param.u8 [[E0:%rs[0-9]+]], [test_v3i1_param_0] 75; CHECK: .param .align 1 .b8 param0[1]; 76; CHECK-DAG: st.param.b8 [param0+0], [[E0]]; 77; CHECK-DAG: st.param.b8 [param0+2], [[E2]]; 78; CHECK: .param .align 1 .b8 retval0[1]; 79; CHECK: call.uni (retval0), 80; CHECK-NEXT: test_v3i1, 81; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0]; 82; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; 83; CHECK-DAG: st.param.b8 [func_retval0+0], [[RE0]] 84; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]]; 85; CHECK-NEXT: ret; 86define <3 x i1> @test_v3i1(<3 x i1> %a) { 87 %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a); 88 ret <3 x i1> %r; 89} 90 91; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 92; CHECK-LABEL: test_v4i1( 93; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1] 94; CHECK: ld.param.u8 [[E0:%rs[0-9]+]], [test_v4i1_param_0] 95; CHECK: .param .align 1 .b8 param0[1]; 96; CHECK: st.param.b8 [param0+0], [[E0]]; 97; CHECK: .param .align 1 .b8 retval0[1]; 98; CHECK: call.uni (retval0), 99; CHECK: test_v4i1, 100; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0]; 101; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1]; 102; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; 103; CHECK: ld.param.b8 [[RE3:%rs[0-9]+]], [retval0+3]; 104; CHECK: st.param.b8 [func_retval0+0], [[RE0]]; 105; CHECK: st.param.b8 [func_retval0+1], [[RE1]]; 106; CHECK: st.param.b8 [func_retval0+2], [[RE2]]; 107; CHECK: st.param.b8 [func_retval0+3], [[RE3]]; 108; CHECK-NEXT: ret; 109define <4 x i1> @test_v4i1(<4 x i1> %a) { 110 %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a); 111 ret <4 x i1> %r; 112} 113 114; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 115; CHECK-LABEL: test_v5i1( 116; CHECK-NEXT: .param .align 1 .b8 test_v5i1_param_0[1] 117; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4]; 118; CHECK-DAG: ld.param.u8 [[E0:%rs[0-9]+]], [test_v5i1_param_0] 119; CHECK: .param .align 1 .b8 param0[1]; 120; CHECK-DAG: st.param.b8 [param0+0], [[E0]]; 121; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; 122; CHECK: .param .align 1 .b8 retval0[1]; 123; CHECK: call.uni (retval0), 124; CHECK-NEXT: test_v5i1, 125; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0+0]; 126; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; 127; CHECK-DAG: st.param.b8 [func_retval0+0], [[RE0]] 128; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; 129; CHECK-NEXT: ret; 130define <5 x i1> @test_v5i1(<5 x i1> %a) { 131 %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a); 132 ret <5 x i1> %r; 133} 134 135; CHECK: .func (.param .b32 func_retval0) 136; CHECK-LABEL: test_i2( 137; CHECK-NEXT: .param .b32 test_i2_param_0 138; CHECK: ld.param.u8 {{%rs[0-9]+}}, [test_i2_param_0]; 139; CHECK: .param .b32 param0; 140; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; 141; CHECK: .param .b32 retval0; 142; CHECK: call.uni (retval0), 143; CHECK: test_i2, 144; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; 145; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; 146; CHECK-NEXT: ret; 147define i2 @test_i2(i2 %a) { 148 %r = tail call i2 @test_i2(i2 %a); 149 ret i2 %r; 150} 151 152; CHECK: .func (.param .b32 func_retval0) 153; CHECK-LABEL: test_i3( 154; CHECK-NEXT: .param .b32 test_i3_param_0 155; CHECK: ld.param.u8 {{%rs[0-9]+}}, [test_i3_param_0]; 156; CHECK: .param .b32 param0; 157; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; 158; CHECK: .param .b32 retval0; 159; CHECK: call.uni (retval0), 160; CHECK: test_i3, 161; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; 162; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; 163; CHECK-NEXT: ret; 164define i3 @test_i3(i3 %a) { 165 %r = tail call i3 @test_i3(i3 %a); 166 ret i3 %r; 167} 168 169; Unsigned i8 is loaded directly into 32-bit register. 170; CHECK: .func (.param .b32 func_retval0) 171; CHECK-LABEL: test_i8( 172; CHECK-NEXT: .param .b32 test_i8_param_0 173; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0]; 174; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; 175; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255; 176; CHECK: .param .b32 param0; 177; CHECK: st.param.b32 [param0+0], [[A]]; 178; CHECK: .param .b32 retval0; 179; CHECK: call.uni (retval0), 180; CHECK: test_i8, 181; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0]; 182; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255; 183; CHECK: st.param.b32 [func_retval0+0], [[R]]; 184; CHECK-NEXT: ret; 185define i8 @test_i8(i8 %a) { 186 %r = tail call i8 @test_i8(i8 %a); 187 ret i8 %r; 188} 189 190; signed i8 is loaded into 16-bit register which is then sign-extended to i32. 191; CHECK: .func (.param .b32 func_retval0) 192; CHECK-LABEL: test_i8s( 193; CHECK-NEXT: .param .b32 test_i8s_param_0 194; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0]; 195; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]]; 196; CHECK: .param .b32 param0; 197; CHECK: st.param.b32 [param0+0], [[A]]; 198; CHECK: .param .b32 retval0; 199; CHECK: call.uni (retval0), 200; CHECK: test_i8s, 201; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0]; 202; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ? 203; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]]; 204; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[R16]]; 205; CHECK: st.param.b32 [func_retval0+0], [[R]]; 206; CHECK-NEXT: ret; 207define signext i8 @test_i8s(i8 signext %a) { 208 %r = tail call signext i8 @test_i8s(i8 signext %a); 209 ret i8 %r; 210} 211 212; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 213; CHECK-LABEL: test_v3i8( 214; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4] 215; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i8_param_0+2]; 216; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i8_param_0]; 217; CHECK: .param .align 4 .b8 param0[4]; 218; CHECK: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]}; 219; CHECK: st.param.b8 [param0+2], [[E2]]; 220; CHECK: .param .align 4 .b8 retval0[4]; 221; CHECK: call.uni (retval0), 222; CHECK-NEXT: test_v3i8, 223; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; 224; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; 225; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]}; 226; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]]; 227; CHECK-NEXT: ret; 228define <3 x i8> @test_v3i8(<3 x i8> %a) { 229 %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a); 230 ret <3 x i8> %r; 231} 232 233; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 234; CHECK-LABEL: test_v4i8( 235; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4] 236; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i8_param_0] 237; CHECK: .param .align 4 .b8 param0[4]; 238; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 239; CHECK: .param .align 4 .b8 retval0[4]; 240; CHECK: call.uni (retval0), 241; CHECK-NEXT: test_v4i8, 242; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 243; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 244; CHECK-NEXT: ret; 245define <4 x i8> @test_v4i8(<4 x i8> %a) { 246 %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a); 247 ret <4 x i8> %r; 248} 249 250; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 251; CHECK-LABEL: test_v5i8( 252; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8] 253; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4]; 254; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0] 255; CHECK: .param .align 8 .b8 param0[8]; 256; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 257; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; 258; CHECK: .param .align 8 .b8 retval0[8]; 259; CHECK: call.uni (retval0), 260; CHECK-NEXT: test_v5i8, 261; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 262; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; 263; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 264; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; 265; CHECK-NEXT: ret; 266define <5 x i8> @test_v5i8(<5 x i8> %a) { 267 %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a); 268 ret <5 x i8> %r; 269} 270 271; CHECK: .func (.param .b32 func_retval0) 272; CHECK-LABEL: test_i11( 273; CHECK-NEXT: .param .b32 test_i11_param_0 274; CHECK: ld.param.u16 {{%rs[0-9]+}}, [test_i11_param_0]; 275; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; 276; CHECK: .param .b32 retval0; 277; CHECK: call.uni (retval0), 278; CHECK-NEXT: test_i11, 279; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; 280; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; 281; CHECK-NEXT: ret; 282define i11 @test_i11(i11 %a) { 283 %r = tail call i11 @test_i11(i11 %a); 284 ret i11 %r; 285} 286 287; CHECK: .func (.param .b32 func_retval0) 288; CHECK-LABEL: test_i16( 289; CHECK-NEXT: .param .b32 test_i16_param_0 290; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16_param_0]; 291; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]]; 292; CHECK: .param .b32 param0; 293; CHECK: st.param.b32 [param0+0], [[E32]]; 294; CHECK: .param .b32 retval0; 295; CHECK: call.uni (retval0), 296; CHECK-NEXT: test_i16, 297; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0]; 298; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535; 299; CHECK: st.param.b32 [func_retval0+0], [[R]]; 300; CHECK-NEXT: ret; 301define i16 @test_i16(i16 %a) { 302 %r = tail call i16 @test_i16(i16 %a); 303 ret i16 %r; 304} 305 306; CHECK: .func (.param .b32 func_retval0) 307; CHECK-LABEL: test_i16s( 308; CHECK-NEXT: .param .b32 test_i16s_param_0 309; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16s_param_0]; 310; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]]; 311; CHECK: .param .b32 param0; 312; CHECK: st.param.b32 [param0+0], [[E32]]; 313; CHECK: .param .b32 retval0; 314; CHECK: call.uni (retval0), 315; CHECK-NEXT: test_i16s, 316; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0]; 317; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]]; 318; CHECK: st.param.b32 [func_retval0+0], [[R]]; 319; CHECK-NEXT: ret; 320define signext i16 @test_i16s(i16 signext %a) { 321 %r = tail call signext i16 @test_i16s(i16 signext %a); 322 ret i16 %r; 323} 324 325; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 326; CHECK-LABEL: test_v3i16( 327; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8] 328; CHECK-DAG: ld.param.u16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4]; 329; CHECK-DAG: ld.param.v2.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0]; 330; CHECK: .param .align 8 .b8 param0[8]; 331; CHECK: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; 332; CHECK: st.param.b16 [param0+4], [[E2]]; 333; CHECK: .param .align 8 .b8 retval0[8]; 334; CHECK: call.uni (retval0), 335; CHECK-NEXT: test_v3i16, 336; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; 337; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4]; 338; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[RE0]], [[RE1]]}; 339; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]]; 340; CHECK-NEXT: ret; 341define <3 x i16> @test_v3i16(<3 x i16> %a) { 342 %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a); 343 ret <3 x i16> %r; 344} 345 346; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 347; CHECK-LABEL: test_v4i16( 348; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8] 349; CHECK: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i16_param_0] 350; CHECK: .param .align 8 .b8 param0[8]; 351; CHECK: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 352; CHECK: .param .align 8 .b8 retval0[8]; 353; CHECK: call.uni (retval0), 354; CHECK-NEXT: test_v4i16, 355; CHECK: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 356; CHECK: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 357; CHECK-NEXT: ret; 358define <4 x i16> @test_v4i16(<4 x i16> %a) { 359 %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a); 360 ret <4 x i16> %r; 361} 362 363; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 364; CHECK-LABEL: test_v5i16( 365; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16] 366; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8]; 367; CHECK-DAG: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0] 368; CHECK: .param .align 16 .b8 param0[16]; 369; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 370; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; 371; CHECK: .param .align 16 .b8 retval0[16]; 372; CHECK: call.uni (retval0), 373; CHECK-NEXT: test_v5i16, 374; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 375; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8]; 376; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 377; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]]; 378; CHECK-NEXT: ret; 379define <5 x i16> @test_v5i16(<5 x i16> %a) { 380 %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a); 381 ret <5 x i16> %r; 382} 383 384; CHECK: .func (.param .b32 func_retval0) 385; CHECK-LABEL: test_f16( 386; CHECK-NEXT: .param .b32 test_f16_param_0 387; CHECK: ld.param.b16 [[E:%h[0-9]+]], [test_f16_param_0]; 388; CHECK: .param .b32 param0; 389; CHECK: st.param.b16 [param0+0], [[E]]; 390; CHECK: .param .b32 retval0; 391; CHECK: call.uni (retval0), 392; CHECK-NEXT: test_f16, 393; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; 394; CHECK: st.param.b16 [func_retval0+0], [[R]] 395; CHECK-NEXT: ret; 396define half @test_f16(half %a) { 397 %r = tail call half @test_f16(half %a); 398 ret half %r; 399} 400 401; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 402; CHECK-LABEL: test_v2f16( 403; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4] 404; CHECK: ld.param.b32 [[E:%hh[0-9]+]], [test_v2f16_param_0]; 405; CHECK: .param .align 4 .b8 param0[4]; 406; CHECK: st.param.b32 [param0+0], [[E]]; 407; CHECK: .param .align 4 .b8 retval0[4]; 408; CHECK: call.uni (retval0), 409; CHECK-NEXT: test_v2f16, 410; CHECK: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 411; CHECK: st.param.b32 [func_retval0+0], [[R]] 412; CHECK-NEXT: ret; 413define <2 x half> @test_v2f16(<2 x half> %a) { 414 %r = tail call <2 x half> @test_v2f16(<2 x half> %a); 415 ret <2 x half> %r; 416} 417 418; CHECK:.func (.param .align 8 .b8 func_retval0[8]) 419; CHECK-LABEL: test_v3f16( 420; CHECK: .param .align 8 .b8 test_v3f16_param_0[8] 421; CHECK-DAG: ld.param.b32 [[HH01:%hh[0-9]+]], [test_v3f16_param_0]; 422; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; 423; CHECK-DAG: ld.param.b16 [[E2:%h[0-9]+]], [test_v3f16_param_0+4]; 424; CHECK: .param .align 8 .b8 param0[8]; 425; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; 426; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; 427; CHECK: .param .align 8 .b8 retval0[8]; 428; CHECK: call.uni (retval0), 429; CHECK: test_v3f16, 430; CHECK-DAG: ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0]; 431; CHECK-DAG: ld.param.b16 [[R2:%h[0-9]+]], [retval0+4]; 432; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}; 433; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]]; 434; CHECK: ret; 435define <3 x half> @test_v3f16(<3 x half> %a) { 436 %r = tail call <3 x half> @test_v3f16(<3 x half> %a); 437 ret <3 x half> %r; 438} 439 440; CHECK:.func (.param .align 8 .b8 func_retval0[8]) 441; CHECK-LABEL: test_v4f16( 442; CHECK: .param .align 8 .b8 test_v4f16_param_0[8] 443; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0]; 444; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]]; 445; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]]; 446; CHECK: .param .align 8 .b8 param0[8]; 447; CHECK: st.param.v2.b32 [param0+0], {[[HH01]], [[HH23]]}; 448; CHECK: .param .align 8 .b8 retval0[8]; 449; CHECK: call.uni (retval0), 450; CHECK: test_v4f16, 451; CHECK: ld.param.v2.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]]}, [retval0+0]; 452; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]}; 453; CHECK: ret; 454define <4 x half> @test_v4f16(<4 x half> %a) { 455 %r = tail call <4 x half> @test_v4f16(<4 x half> %a); 456 ret <4 x half> %r; 457} 458 459; CHECK:.func (.param .align 16 .b8 func_retval0[16]) 460; CHECK-LABEL: test_v5f16( 461; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] 462; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0]; 463; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; 464; CHECK-DAG: ld.param.b16 [[E4:%h[0-9]+]], [test_v5f16_param_0+8]; 465; CHECK: .param .align 16 .b8 param0[16]; 466; CHECK-DAG: st.param.v4.b16 [param0+0], 467; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; 468; CHECK: .param .align 16 .b8 retval0[16]; 469; CHECK: call.uni (retval0), 470; CHECK: test_v5f16, 471; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; 472; CHECK-DAG: ld.param.b16 [[R4:%h[0-9]+]], [retval0+8]; 473; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; 474; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]]; 475; CHECK: ret; 476define <5 x half> @test_v5f16(<5 x half> %a) { 477 %r = tail call <5 x half> @test_v5f16(<5 x half> %a); 478 ret <5 x half> %r; 479} 480 481; CHECK:.func (.param .align 16 .b8 func_retval0[16]) 482; CHECK-LABEL: test_v8f16( 483; CHECK: .param .align 16 .b8 test_v8f16_param_0[16] 484; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0]; 485; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]]; 486; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]]; 487; CHECK-DAG: mov.b32 [[HH45:%hh[0-9]+]], [[R45]]; 488; CHECK-DAG: mov.b32 [[HH67:%hh[0-9]+]], [[R67]]; 489; CHECK: .param .align 16 .b8 param0[16]; 490; CHECK: st.param.v4.b32 [param0+0], {[[HH01]], [[HH23]], [[HH45]], [[HH67]]}; 491; CHECK: .param .align 16 .b8 retval0[16]; 492; CHECK: call.uni (retval0), 493; CHECK: test_v8f16, 494; CHECK: ld.param.v4.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]], [[RH45:%hh[0-9]+]], [[RH67:%hh[0-9]+]]}, [retval0+0]; 495; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; 496; CHECK: ret; 497define <8 x half> @test_v8f16(<8 x half> %a) { 498 %r = tail call <8 x half> @test_v8f16(<8 x half> %a); 499 ret <8 x half> %r; 500} 501 502; CHECK:.func (.param .align 32 .b8 func_retval0[32]) 503; CHECK-LABEL: test_v9f16( 504; CHECK: .param .align 32 .b8 test_v9f16_param_0[32] 505; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0]; 506; CHECK-DAG: ld.param.v4.b16 {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8]; 507; CHECK-DAG: ld.param.b16 [[E8:%h[0-9]+]], [test_v9f16_param_0+16]; 508; CHECK: .param .align 32 .b8 param0[32]; 509; CHECK-DAG: st.param.v4.b16 [param0+0], 510; CHECK-DAG: st.param.v4.b16 [param0+8], 511; CHECK-DAG: st.param.b16 [param0+16], [[E8]]; 512; CHECK: .param .align 32 .b8 retval0[32]; 513; CHECK: call.uni (retval0), 514; CHECK: test_v9f16, 515; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; 516; CHECK-DAG: ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8]; 517; CHECK-DAG: ld.param.b16 [[R8:%h[0-9]+]], [retval0+16]; 518; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; 519; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]}; 520; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]]; 521; CHECK: ret; 522define <9 x half> @test_v9f16(<9 x half> %a) { 523 %r = tail call <9 x half> @test_v9f16(<9 x half> %a); 524 ret <9 x half> %r; 525} 526 527; CHECK: .func (.param .b32 func_retval0) 528; CHECK-LABEL: test_i19( 529; CHECK-NEXT: .param .b32 test_i19_param_0 530; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i19_param_0]; 531; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i19_param_0+2]; 532; CHECK: .param .b32 param0; 533; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; 534; CHECK: .param .b32 retval0; 535; CHECK: call.uni (retval0), 536; CHECK-NEXT: test_i19, 537; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; 538; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; 539; CHECK-NEXT: ret; 540define i19 @test_i19(i19 %a) { 541 %r = tail call i19 @test_i19(i19 %a); 542 ret i19 %r; 543} 544 545; CHECK: .func (.param .b32 func_retval0) 546; CHECK-LABEL: test_i23( 547; CHECK-NEXT: .param .b32 test_i23_param_0 548; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i23_param_0]; 549; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i23_param_0+2]; 550; CHECK: .param .b32 param0; 551; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; 552; CHECK: .param .b32 retval0; 553; CHECK: call.uni (retval0), 554; CHECK-NEXT: test_i23, 555; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; 556; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; 557; CHECK-NEXT: ret; 558define i23 @test_i23(i23 %a) { 559 %r = tail call i23 @test_i23(i23 %a); 560 ret i23 %r; 561} 562 563; CHECK: .func (.param .b32 func_retval0) 564; CHECK-LABEL: test_i24( 565; CHECK-NEXT: .param .b32 test_i24_param_0 566; CHECK-DAG: ld.param.u8 {{%r[0-9]+}}, [test_i24_param_0+2]; 567; CHECK-DAG: ld.param.u16 {{%r[0-9]+}}, [test_i24_param_0]; 568; CHECK: .param .b32 param0; 569; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; 570; CHECK: .param .b32 retval0; 571; CHECK: call.uni (retval0), 572; CHECK-NEXT: test_i24, 573; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; 574; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; 575; CHECK-NEXT: ret; 576define i24 @test_i24(i24 %a) { 577 %r = tail call i24 @test_i24(i24 %a); 578 ret i24 %r; 579} 580 581; CHECK: .func (.param .b32 func_retval0) 582; CHECK-LABEL: test_i29( 583; CHECK-NEXT: .param .b32 test_i29_param_0 584; CHECK: ld.param.u32 {{%r[0-9]+}}, [test_i29_param_0]; 585; CHECK: .param .b32 param0; 586; CHECK: st.param.b32 [param0+0], {{%r[0-9]+}}; 587; CHECK: .param .b32 retval0; 588; CHECK: call.uni (retval0), 589; CHECK-NEXT: test_i29, 590; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+0]; 591; CHECK: st.param.b32 [func_retval0+0], {{%r[0-9]+}}; 592; CHECK-NEXT: ret; 593define i29 @test_i29(i29 %a) { 594 %r = tail call i29 @test_i29(i29 %a); 595 ret i29 %r; 596} 597 598; CHECK: .func (.param .b32 func_retval0) 599; CHECK-LABEL: test_i32( 600; CHECK-NEXT: .param .b32 test_i32_param_0 601; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_i32_param_0]; 602; CHECK: .param .b32 param0; 603; CHECK: st.param.b32 [param0+0], [[E]]; 604; CHECK: .param .b32 retval0; 605; CHECK: call.uni (retval0), 606; CHECK-NEXT: test_i32, 607; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; 608; CHECK: st.param.b32 [func_retval0+0], [[R]]; 609; CHECK-NEXT: ret; 610define i32 @test_i32(i32 %a) { 611 %r = tail call i32 @test_i32(i32 %a); 612 ret i32 %r; 613} 614 615; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 616; CHECK-LABEL: test_v3i32( 617; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16] 618; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8]; 619; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0]; 620; CHECK: .param .align 16 .b8 param0[16]; 621; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 622; CHECK: st.param.b32 [param0+8], [[E2]]; 623; CHECK: .param .align 16 .b8 retval0[16]; 624; CHECK: call.uni (retval0), 625; CHECK-NEXT: test_v3i32, 626; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 627; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 628; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 629; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]]; 630; CHECK-NEXT: ret; 631define <3 x i32> @test_v3i32(<3 x i32> %a) { 632 %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a); 633 ret <3 x i32> %r; 634} 635 636; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 637; CHECK-LABEL: test_v4i32( 638; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16] 639; CHECK: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0] 640; CHECK: .param .align 16 .b8 param0[16]; 641; CHECK: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 642; CHECK: .param .align 16 .b8 retval0[16]; 643; CHECK: call.uni (retval0), 644; CHECK-NEXT: test_v4i32, 645; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0]; 646; CHECK: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 647; CHECK-NEXT: ret; 648define <4 x i32> @test_v4i32(<4 x i32> %a) { 649 %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a); 650 ret <4 x i32> %r; 651} 652 653; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 654; CHECK-LABEL: test_v5i32( 655; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32] 656; CHECK-DAG: ld.param.u32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16]; 657; CHECK-DAG: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0] 658; CHECK: .param .align 32 .b8 param0[32]; 659; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 660; CHECK-DAG: st.param.b32 [param0+16], [[E4]]; 661; CHECK: .param .align 32 .b8 retval0[32]; 662; CHECK: call.uni (retval0), 663; CHECK-NEXT: test_v5i32, 664; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0]; 665; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; 666; CHECK-DAG: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 667; CHECK-DAG: st.param.b32 [func_retval0+16], [[RE4]]; 668; CHECK-NEXT: ret; 669define <5 x i32> @test_v5i32(<5 x i32> %a) { 670 %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a); 671 ret <5 x i32> %r; 672} 673 674; CHECK: .func (.param .b32 func_retval0) 675; CHECK-LABEL: test_f32( 676; CHECK-NEXT: .param .b32 test_f32_param_0 677; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_f32_param_0]; 678; CHECK: .param .b32 param0; 679; CHECK: st.param.f32 [param0+0], [[E]]; 680; CHECK: .param .b32 retval0; 681; CHECK: call.uni (retval0), 682; CHECK-NEXT: test_f32, 683; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0]; 684; CHECK: st.param.f32 [func_retval0+0], [[R]]; 685; CHECK-NEXT: ret; 686define float @test_f32(float %a) { 687 %r = tail call float @test_f32(float %a); 688 ret float %r; 689} 690 691; CHECK: .func (.param .b64 func_retval0) 692; CHECK-LABEL: test_i40( 693; CHECK-NEXT: .param .b64 test_i40_param_0 694; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i40_param_0+4]; 695; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i40_param_0]; 696; CHECK: .param .b64 param0; 697; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; 698; CHECK: .param .b64 retval0; 699; CHECK: call.uni (retval0), 700; CHECK-NEXT: test_i40, 701; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; 702; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; 703; CHECK-NEXT: ret; 704define i40 @test_i40(i40 %a) { 705 %r = tail call i40 @test_i40(i40 %a); 706 ret i40 %r; 707} 708 709; CHECK: .func (.param .b64 func_retval0) 710; CHECK-LABEL: test_i47( 711; CHECK-NEXT: .param .b64 test_i47_param_0 712; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i47_param_0+4]; 713; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i47_param_0]; 714; CHECK: .param .b64 param0; 715; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; 716; CHECK: .param .b64 retval0; 717; CHECK: call.uni (retval0), 718; CHECK-NEXT: test_i47, 719; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; 720; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; 721; CHECK-NEXT: ret; 722define i47 @test_i47(i47 %a) { 723 %r = tail call i47 @test_i47(i47 %a); 724 ret i47 %r; 725} 726 727; CHECK: .func (.param .b64 func_retval0) 728; CHECK-LABEL: test_i48( 729; CHECK-NEXT: .param .b64 test_i48_param_0 730; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i48_param_0+4]; 731; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i48_param_0]; 732; CHECK: .param .b64 param0; 733; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; 734; CHECK: .param .b64 retval0; 735; CHECK: call.uni (retval0), 736; CHECK-NEXT: test_i48, 737; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; 738; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; 739; CHECK-NEXT: ret; 740define i48 @test_i48(i48 %a) { 741 %r = tail call i48 @test_i48(i48 %a); 742 ret i48 %r; 743} 744 745; CHECK: .func (.param .b64 func_retval0) 746; CHECK-LABEL: test_i51( 747; CHECK-NEXT: .param .b64 test_i51_param_0 748; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i51_param_0+6]; 749; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i51_param_0+4]; 750; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i51_param_0]; 751; CHECK: .param .b64 param0; 752; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; 753; CHECK: .param .b64 retval0; 754; CHECK: call.uni (retval0), 755; CHECK-NEXT: test_i51, 756; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; 757; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; 758; CHECK-NEXT: ret; 759define i51 @test_i51(i51 %a) { 760 %r = tail call i51 @test_i51(i51 %a); 761 ret i51 %r; 762} 763 764; CHECK: .func (.param .b64 func_retval0) 765; CHECK-LABEL: test_i56( 766; CHECK-NEXT: .param .b64 test_i56_param_0 767; CHECK-DAG: ld.param.u8 {{%rd[0-9]+}}, [test_i56_param_0+6]; 768; CHECK-DAG: ld.param.u16 {{%rd[0-9]+}}, [test_i56_param_0+4]; 769; CHECK-DAG: ld.param.u32 {{%rd[0-9]+}}, [test_i56_param_0]; 770; CHECK: .param .b64 param0; 771; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; 772; CHECK: .param .b64 retval0; 773; CHECK: call.uni (retval0), 774; CHECK-NEXT: test_i56, 775; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; 776; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; 777; CHECK-NEXT: ret; 778define i56 @test_i56(i56 %a) { 779 %r = tail call i56 @test_i56(i56 %a); 780 ret i56 %r; 781} 782 783; CHECK: .func (.param .b64 func_retval0) 784; CHECK-LABEL: test_i57( 785; CHECK-NEXT: .param .b64 test_i57_param_0 786; CHECK: ld.param.u64 {{%rd[0-9]+}}, [test_i57_param_0]; 787; CHECK: .param .b64 param0; 788; CHECK: st.param.b64 [param0+0], {{%rd[0-9]+}}; 789; CHECK: .param .b64 retval0; 790; CHECK: call.uni (retval0), 791; CHECK-NEXT: test_i57, 792; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+0]; 793; CHECK: st.param.b64 [func_retval0+0], {{%rd[0-9]+}}; 794; CHECK-NEXT: ret; 795define i57 @test_i57(i57 %a) { 796 %r = tail call i57 @test_i57(i57 %a); 797 ret i57 %r; 798} 799 800; CHECK: .func (.param .b64 func_retval0) 801; CHECK-LABEL: test_i64( 802; CHECK-NEXT: .param .b64 test_i64_param_0 803; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_i64_param_0]; 804; CHECK: .param .b64 param0; 805; CHECK: st.param.b64 [param0+0], [[E]]; 806; CHECK: .param .b64 retval0; 807; CHECK: call.uni (retval0), 808; CHECK-NEXT: test_i64, 809; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0]; 810; CHECK: st.param.b64 [func_retval0+0], [[R]]; 811; CHECK-NEXT: ret; 812define i64 @test_i64(i64 %a) { 813 %r = tail call i64 @test_i64(i64 %a); 814 ret i64 %r; 815} 816 817; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 818; CHECK-LABEL: test_v3i64( 819; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32] 820; CHECK-DAG: ld.param.u64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16]; 821; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0]; 822; CHECK: .param .align 32 .b8 param0[32]; 823; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]}; 824; CHECK: st.param.b64 [param0+16], [[E2]]; 825; CHECK: .param .align 32 .b8 retval0[32]; 826; CHECK: call.uni (retval0), 827; CHECK-NEXT: test_v3i64, 828; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0]; 829; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16]; 830; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; 831; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]]; 832; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; 833; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]]; 834; CHECK-NEXT: ret; 835define <3 x i64> @test_v3i64(<3 x i64> %a) { 836 %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a); 837 ret <3 x i64> %r; 838} 839 840; For i64 vector loads are limited by PTX to 2 elements. 841; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 842; CHECK-LABEL: test_v4i64( 843; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32] 844; CHECK-DAG: ld.param.v2.u64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16]; 845; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0]; 846; CHECK: .param .align 32 .b8 param0[32]; 847; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]}; 848; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]}; 849; CHECK: .param .align 32 .b8 retval0[32]; 850; CHECK: call.uni (retval0), 851; CHECK-NEXT: test_v4i64, 852; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0]; 853; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16]; 854; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]}; 855; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; 856; CHECK-NEXT: ret; 857define <4 x i64> @test_v4i64(<4 x i64> %a) { 858 %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a); 859 ret <4 x i64> %r; 860} 861 862; Aggregates, on the other hand, do not get extended. 863 864; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 865; CHECK-LABEL: test_s_i1( 866; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1] 867; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0]; 868; CHECK: .param .align 1 .b8 param0[1]; 869; CHECK: st.param.b8 [param0+0], [[A]] 870; CHECK: .param .align 1 .b8 retval0[1]; 871; CHECK: call.uni 872; CHECK-NEXT: test_s_i1, 873; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0]; 874; CHECK: st.param.b8 [func_retval0+0], [[R]]; 875; CHECK-NEXT: ret; 876define %s_i1 @test_s_i1(%s_i1 %a) { 877 %r = tail call %s_i1 @test_s_i1(%s_i1 %a); 878 ret %s_i1 %r; 879} 880 881; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 882; CHECK-LABEL: test_s_i8( 883; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1] 884; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0]; 885; CHECK: .param .align 1 .b8 param0[1]; 886; CHECK: st.param.b8 [param0+0], [[A]] 887; CHECK: .param .align 1 .b8 retval0[1]; 888; CHECK: call.uni 889; CHECK-NEXT: test_s_i8, 890; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0]; 891; CHECK: st.param.b8 [func_retval0+0], [[R]]; 892; CHECK-NEXT: ret; 893define %s_i8 @test_s_i8(%s_i8 %a) { 894 %r = tail call %s_i8 @test_s_i8(%s_i8 %a); 895 ret %s_i8 %r; 896} 897 898; CHECK: .func (.param .align 2 .b8 func_retval0[2]) 899; CHECK-LABEL: test_s_i16( 900; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2] 901; CHECK: ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0]; 902; CHECK: .param .align 2 .b8 param0[2]; 903; CHECK: st.param.b16 [param0+0], [[A]] 904; CHECK: .param .align 2 .b8 retval0[2]; 905; CHECK: call.uni 906; CHECK-NEXT: test_s_i16, 907; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; 908; CHECK: st.param.b16 [func_retval0+0], [[R]]; 909; CHECK-NEXT: ret; 910define %s_i16 @test_s_i16(%s_i16 %a) { 911 %r = tail call %s_i16 @test_s_i16(%s_i16 %a); 912 ret %s_i16 %r; 913} 914 915; CHECK: .func (.param .align 2 .b8 func_retval0[2]) 916; CHECK-LABEL: test_s_f16( 917; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2] 918; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0]; 919; CHECK: .param .align 2 .b8 param0[2]; 920; CHECK: st.param.b16 [param0+0], [[A]] 921; CHECK: .param .align 2 .b8 retval0[2]; 922; CHECK: call.uni 923; CHECK-NEXT: test_s_f16, 924; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; 925; CHECK: st.param.b16 [func_retval0+0], [[R]]; 926; CHECK-NEXT: ret; 927define %s_f16 @test_s_f16(%s_f16 %a) { 928 %r = tail call %s_f16 @test_s_f16(%s_f16 %a); 929 ret %s_f16 %r; 930} 931 932; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 933; CHECK-LABEL: test_s_i32( 934; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4] 935; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_s_i32_param_0]; 936; CHECK: .param .align 4 .b8 param0[4] 937; CHECK: st.param.b32 [param0+0], [[E]]; 938; CHECK: .param .align 4 .b8 retval0[4]; 939; CHECK: call.uni (retval0), 940; CHECK-NEXT: test_s_i32, 941; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; 942; CHECK: st.param.b32 [func_retval0+0], [[R]]; 943; CHECK-NEXT: ret; 944define %s_i32 @test_s_i32(%s_i32 %a) { 945 %r = tail call %s_i32 @test_s_i32(%s_i32 %a); 946 ret %s_i32 %r; 947} 948 949; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 950; CHECK-LABEL: test_s_f32( 951; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4] 952; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_s_f32_param_0]; 953; CHECK: .param .align 4 .b8 param0[4] 954; CHECK: st.param.f32 [param0+0], [[E]]; 955; CHECK: .param .align 4 .b8 retval0[4]; 956; CHECK: call.uni (retval0), 957; CHECK-NEXT: test_s_f32, 958; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0]; 959; CHECK: st.param.f32 [func_retval0+0], [[R]]; 960; CHECK-NEXT: ret; 961define %s_f32 @test_s_f32(%s_f32 %a) { 962 %r = tail call %s_f32 @test_s_f32(%s_f32 %a); 963 ret %s_f32 %r; 964} 965 966; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 967; CHECK-LABEL: test_s_i64( 968; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8] 969; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_s_i64_param_0]; 970; CHECK: .param .align 8 .b8 param0[8]; 971; CHECK: st.param.b64 [param0+0], [[E]]; 972; CHECK: .param .align 8 .b8 retval0[8]; 973; CHECK: call.uni (retval0), 974; CHECK-NEXT: test_s_i64, 975; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0]; 976; CHECK: st.param.b64 [func_retval0+0], [[R]]; 977; CHECK-NEXT: ret; 978define %s_i64 @test_s_i64(%s_i64 %a) { 979 %r = tail call %s_i64 @test_s_i64(%s_i64 %a); 980 ret %s_i64 %r; 981} 982 983; Fields that have different types, but identical sizes are not vectorized. 984; CHECK: .func (.param .align 8 .b8 func_retval0[24]) 985; CHECK-LABEL: test_s_i32f32( 986; CHECK: .param .align 8 .b8 test_s_i32f32_param_0[24] 987; CHECK-DAG: ld.param.u64 [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16]; 988; CHECK-DAG: ld.param.f32 [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12]; 989; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8]; 990; CHECK-DAG: ld.param.f32 [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4]; 991; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0]; 992; CHECK: .param .align 8 .b8 param0[24]; 993; CHECK-DAG: st.param.b32 [param0+0], [[E0]]; 994; CHECK-DAG: st.param.f32 [param0+4], [[E1]]; 995; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; 996; CHECK-DAG: st.param.f32 [param0+12], [[E3]]; 997; CHECK-DAG: st.param.b64 [param0+16], [[E4]]; 998; CHECK: .param .align 8 .b8 retval0[24]; 999; CHECK: call.uni (retval0), 1000; CHECK-NEXT: test_s_i32f32, 1001; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0+0]; 1002; CHECK-DAG: ld.param.f32 [[RE1:%f[0-9]+]], [retval0+4]; 1003; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 1004; CHECK-DAG: ld.param.f32 [[RE3:%f[0-9]+]], [retval0+12]; 1005; CHECK-DAG: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; 1006; CHECK-DAG: st.param.b32 [func_retval0+0], [[RE0]]; 1007; CHECK-DAG: st.param.f32 [func_retval0+4], [[RE1]]; 1008; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]]; 1009; CHECK-DAG: st.param.f32 [func_retval0+12], [[RE3]]; 1010; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]]; 1011; CHECK: ret; 1012define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) { 1013 %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a); 1014 ret %s_i32f32 %r; 1015} 1016 1017; We do vectorize consecutive fields with matching types. 1018; CHECK:.visible .func (.param .align 8 .b8 func_retval0[24]) 1019; CHECK-LABEL: test_s_i32x4( 1020; CHECK: .param .align 8 .b8 test_s_i32x4_param_0[24] 1021; CHECK-DAG: ld.param.u64 [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16]; 1022; CHECK-DAG: ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8]; 1023; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0]; 1024; CHECK: .param .align 8 .b8 param0[24]; 1025; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 1026; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; 1027; CHECK: st.param.b64 [param0+16], [[E4]]; 1028; CHECK: .param .align 8 .b8 retval0[24]; 1029; CHECK: call.uni (retval0), 1030; CHECK-NEXT: test_s_i32x4, 1031; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 1032; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8]; 1033; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; 1034; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 1035; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]}; 1036; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]]; 1037; CHECK: ret; 1038 1039define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) { 1040 %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a); 1041 ret %s_i32x4 %r; 1042} 1043 1044; CHECK:.visible .func (.param .align 8 .b8 func_retval0[32]) 1045; CHECK-LABEL: test_s_i1i32x4( 1046; CHECK: .param .align 8 .b8 test_s_i1i32x4_param_0[32] 1047; CHECK: ld.param.u64 [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24]; 1048; CHECK: ld.param.u32 [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16]; 1049; CHECK: ld.param.u32 [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12]; 1050; CHECK: ld.param.u8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8]; 1051; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0]; 1052; CHECK: .param .align 8 .b8 param0[32]; 1053; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 1054; CHECK: st.param.b8 [param0+8], [[E2]]; 1055; CHECK: st.param.b32 [param0+12], [[E3]]; 1056; CHECK: st.param.b32 [param0+16], [[E4]]; 1057; CHECK: st.param.b64 [param0+24], [[E5]]; 1058; CHECK: .param .align 8 .b8 retval0[32]; 1059; CHECK: call.uni (retval0), 1060; CHECK: test_s_i1i32x4, 1061; CHECK: ( 1062; CHECK: param0 1063; CHECK: ); 1064; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 1065; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8]; 1066; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12]; 1067; CHECK: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; 1068; CHECK: ld.param.b64 [[RE5:%rd[0-9]+]], [retval0+24]; 1069; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 1070; CHECK: st.param.b8 [func_retval0+8], [[RE2]]; 1071; CHECK: st.param.b32 [func_retval0+12], [[RE3]]; 1072; CHECK: st.param.b32 [func_retval0+16], [[RE4]]; 1073; CHECK: st.param.b64 [func_retval0+24], [[RE5]]; 1074; CHECK: ret; 1075 1076define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { 1077 %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a); 1078 ret %s_i8i32x4 %r; 1079} 1080 1081; -- All loads/stores from parameters aligned by one must be done one 1082; -- byte at a time. 1083; CHECK:.visible .func (.param .align 1 .b8 func_retval0[25]) 1084; CHECK-LABEL: test_s_i1i32x4p( 1085; CHECK-DAG: .param .align 1 .b8 test_s_i1i32x4p_param_0[25] 1086; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+24]; 1087; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+23]; 1088; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+22]; 1089; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+21]; 1090; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+20]; 1091; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+19]; 1092; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+18]; 1093; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+17]; 1094; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+16]; 1095; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+15]; 1096; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+14]; 1097; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+13]; 1098; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+12]; 1099; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+11]; 1100; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+10]; 1101; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+9]; 1102; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+8]; 1103; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+7]; 1104; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+6]; 1105; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+5]; 1106; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+4]; 1107; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+3]; 1108; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+2]; 1109; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+1]; 1110; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0]; 1111; --- TODO 1112; --- Unaligned parameter store/ return value load is broken in both nvcc 1113; --- and llvm and needs to be fixed. 1114; CHECK: .param .align 1 .b8 param0[25]; 1115; CHECK-DAG: st.param.b32 [param0+0], 1116; CHECK-DAG: st.param.b32 [param0+4], 1117; CHECK-DAG: st.param.b8 [param0+8], 1118; CHECK-DAG: st.param.b32 [param0+9], 1119; CHECK-DAG: st.param.b32 [param0+13], 1120; CHECK-DAG: st.param.b64 [param0+17], 1121; CHECK: .param .align 1 .b8 retval0[25]; 1122; CHECK: call.uni (retval0), 1123; CHECK-NEXT: test_s_i1i32x4p, 1124; CHECK-DAG: ld.param.b32 %r41, [retval0+0]; 1125; CHECK-DAG: ld.param.b32 %r42, [retval0+4]; 1126; CHECK-DAG: ld.param.b8 %rs2, [retval0+8]; 1127; CHECK-DAG: ld.param.b32 %r43, [retval0+9]; 1128; CHECK-DAG: ld.param.b32 %r44, [retval0+13]; 1129; CHECK-DAG: ld.param.b64 %rd23, [retval0+17]; 1130; CHECK-DAG: st.param.b32 [func_retval0+0], 1131; CHECK-DAG: st.param.b32 [func_retval0+4], 1132; CHECK-DAG: st.param.b8 [func_retval0+8], 1133; CHECK-DAG: st.param.b32 [func_retval0+9], 1134; CHECK-DAG: st.param.b32 [func_retval0+13], 1135; CHECK-DAG: st.param.b64 [func_retval0+17], 1136 1137define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) { 1138 %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a); 1139 ret %s_i8i32x4p %r; 1140} 1141 1142; Check that we can vectorize loads that span multiple aggregate fields. 1143; CHECK:.visible .func (.param .align 16 .b8 func_retval0[80]) 1144; CHECK-LABEL: test_s_crossfield( 1145; CHECK: .param .align 16 .b8 test_s_crossfield_param_0[80] 1146; CHECK: ld.param.u32 [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64]; 1147; CHECK: ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48]; 1148; CHECK: ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32]; 1149; CHECK: ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16]; 1150; CHECK: ld.param.u32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8]; 1151; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0]; 1152; CHECK: .param .align 16 .b8 param0[80]; 1153; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 1154; CHECK: st.param.b32 [param0+8], [[E2]]; 1155; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]}; 1156; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]}; 1157; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]}; 1158; CHECK: st.param.b32 [param0+64], [[E15]]; 1159; CHECK: .param .align 16 .b8 retval0[80]; 1160; CHECK: call.uni (retval0), 1161; CHECK: test_s_crossfield, 1162; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 1163; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 1164; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16]; 1165; CHECK: ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32]; 1166; CHECK: ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48]; 1167; CHECK: ld.param.b32 [[RE15:%r[0-9]+]], [retval0+64]; 1168; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 1169; CHECK: st.param.b32 [func_retval0+8], [[RE2]]; 1170; CHECK: st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]}; 1171; CHECK: st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]}; 1172; CHECK: st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]}; 1173; CHECK: st.param.b32 [func_retval0+64], [[RE15]]; 1174; CHECK: ret; 1175 1176define %s_crossfield @test_s_crossfield(%s_crossfield %a) { 1177 %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a); 1178 ret %s_crossfield %r; 1179} 1180