1 // SPDX-License-Identifier: MIT
2 //
3 // Copyright 2024 Advanced Micro Devices, Inc.
4
5
6 #include "dml2_internal_shared_types.h"
7 #include "dml2_core_dcn4_calcs.h"
8 #include "dml2_debug.h"
9 #include "lib_float_math.h"
10 #include "dml_top_types.h"
11
12 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
13 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4
14 #define DML_MAX_COMPRESSION_RATIO 4
15 //#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW
16 //#define DML_GLOBAL_PREFETCH_CHECK
17 #define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
18 #define DML_MAX_VSTARTUP_START 1023
19
dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)20 const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
21 {
22 switch (bw_type) {
23 case (dml2_core_internal_bw_sdp):
24 return("dml2_core_internal_bw_sdp");
25 case (dml2_core_internal_bw_dram):
26 return("dml2_core_internal_bw_dram");
27 case (dml2_core_internal_bw_max):
28 return("dml2_core_internal_bw_max");
29 default:
30 return("dml2_core_internal_bw_unknown");
31 }
32 }
33
dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)34 const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)
35 {
36 switch (dml2_core_internal_soc_state_type) {
37 case (dml2_core_internal_soc_state_sys_idle):
38 return("dml2_core_internal_soc_state_sys_idle");
39 case (dml2_core_internal_soc_state_sys_active):
40 return("dml2_core_internal_soc_state_sys_active");
41 case (dml2_core_internal_soc_state_svp_prefetch):
42 return("dml2_core_internal_soc_state_svp_prefetch");
43 case dml2_core_internal_soc_state_max:
44 default:
45 return("dml2_core_internal_soc_state_unknown");
46 }
47 }
48
dml2_core_div_rem(double dividend,unsigned int divisor,unsigned int * remainder)49 static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder)
50 {
51 *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0);
52 return dividend / divisor;
53 }
54
dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info * support,bool fail_only)55 static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
56 {
57 dml2_printf("DML: ===================================== \n");
58 dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n");
59 if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
60 dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
61 if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
62 dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
63 if (!fail_only || support->ViewportSizeSupport == 0)
64 dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
65 if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
66 dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
67 if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
68 dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
69 if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
70 dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
71 if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
72 dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
73 if (!fail_only || support->ExceededMultistreamSlots == 1)
74 dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
75 if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
76 dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
77 if (!fail_only || support->NotEnoughLanesForMSO == 1)
78 dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
79 if (!fail_only || support->P2IWith420 == 1)
80 dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420);
81 if (!fail_only || support->DSC422NativeNotSupported == 1)
82 dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
83 if (!fail_only || support->DSCSlicesODMModeSupported == 0)
84 dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
85 if (!fail_only || support->NotEnoughDSCUnits == 1)
86 dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
87 if (!fail_only || support->NotEnoughDSCSlices == 1)
88 dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
89 if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
90 dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
91 if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
92 dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
93 if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
94 dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
95 if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
96 dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
97 if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
98 dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
99 if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
100 dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
101 if (!fail_only || support->ROBSupport == 0)
102 dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport);
103 if (!fail_only || support->OutstandingRequestsSupport == 0)
104 dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
105 if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
106 dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
107 if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
108 dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
109 if (!fail_only || support->TotalAvailablePipesSupport == 0)
110 dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
111 if (!fail_only || support->NumberOfOTGSupport == 0)
112 dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
113 if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
114 dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
115 if (!fail_only || support->NumberOfDP2p0Support == 0)
116 dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
117 if (!fail_only || support->EnoughWritebackUnits == 0)
118 dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
119 if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
120 dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
121 if (!fail_only || support->WritebackLatencySupport == 0)
122 dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
123 if (!fail_only || support->CursorSupport == 0)
124 dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport);
125 if (!fail_only || support->PitchSupport == 0)
126 dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport);
127 if (!fail_only || support->ViewportExceedsSurface == 1)
128 dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
129 if (!fail_only || support->PrefetchSupported == 0)
130 dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
131 if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
132 dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
133 if (!fail_only || support->AvgBandwidthSupport == 0)
134 dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
135 if (!fail_only || support->DynamicMetadataSupported == 0)
136 dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
137 if (!fail_only || support->VRatioInPrefetchSupported == 0)
138 dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
139 if (!fail_only || support->PTEBufferSizeNotExceeded == 0)
140 dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
141 if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0)
142 dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
143 if (!fail_only || support->ExceededMALLSize == 1)
144 dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
145 if (!fail_only || support->g6_temp_read_support == 0)
146 dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
147 if (!fail_only || support->ImmediateFlipSupport == 0)
148 dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
149 if (!fail_only || support->LinkCapacitySupport == 0)
150 dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
151
152 if (!fail_only || support->ModeSupport == 0)
153 dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport);
154 dml2_printf("DML: ===================================== \n");
155 }
156
get_stream_output_bpp(double * out_bpp,const struct dml2_display_cfg * display_cfg)157 static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg)
158 {
159 for (unsigned int k = 0; k < display_cfg->num_planes; k++) {
160 double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc;
161 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) {
162 switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) {
163 case dml2_444:
164 out_bpp[k] = bpc * 3;
165 break;
166 case dml2_s422:
167 out_bpp[k] = bpc * 2;
168 break;
169 case dml2_n422:
170 out_bpp[k] = bpc * 2;
171 break;
172 case dml2_420:
173 default:
174 out_bpp[k] = bpc * 1.5;
175 break;
176 }
177 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) {
178 out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16;
179 } else {
180 out_bpp[k] = 0;
181 }
182 #ifdef __DML_VBA_DEBUG__
183 dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
184 dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
185 dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
186 #endif
187 }
188 }
189
dml_round_to_multiple(unsigned int num,unsigned int multiple,bool up)190 static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up)
191 {
192 unsigned int remainder;
193
194 if (multiple == 0)
195 return num;
196
197 remainder = num % multiple;
198 if (remainder == 0)
199 return num;
200
201 if (up)
202 return (num + multiple - remainder);
203 else
204 return (num - remainder);
205 }
206
dml_get_num_active_pipes(int unsigned num_planes,const struct core_display_cfg_support_info * cfg_support_info)207 static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info)
208 {
209 unsigned int num_active_pipes = 0;
210
211 for (unsigned int k = 0; k < num_planes; k++) {
212 num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used;
213 }
214
215 #ifdef __DML_VBA_DEBUG__
216 dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
217 #endif
218 return num_active_pipes;
219 }
220
dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info * cfg_support_info,unsigned int * pipe_plane)221 static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane)
222 {
223 unsigned int pipe_idx = 0;
224
225 for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) {
226 pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__;
227 }
228
229 for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) {
230 for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) {
231 pipe_plane[pipe_idx] = plane_idx;
232 pipe_idx++;
233 }
234 }
235 }
236
dml_is_phantom_pipe(const struct dml2_plane_parameters * plane_cfg)237 static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg)
238 {
239 bool is_phantom = false;
240
241 if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe ||
242 plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) {
243 is_phantom = true;
244 }
245
246 return is_phantom;
247 }
248
dml_get_is_phantom_pipe(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)249 static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
250 {
251 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
252
253 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]);
254 dml2_printf("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom);
255 return is_phantom;
256 }
257
258 #define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \
259 { \
260 unsigned int plane_idx; \
261 plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \
262 return (type) interval_var[plane_idx]; \
263 }
264
265 dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes);
266 dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes);
267 dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY);
268 dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC);
269 dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear);
270 dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma);
271
272 dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup);
273 dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix);
274 dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix);
275 dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix);
276 dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines);
277 dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY);
278 dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC);
279 dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte);
280 dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL);
281
282 #define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \
283 { \
284 return (type) interval_var[plane_idx]; \
285 }
286
287 dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l);
288 dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l);
289 dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l);
290 dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c);
291 dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c);
292 dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c);
293 dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l);
294 dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c);
295 dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache);
296 dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL);
297 dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines);
298
299 #define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \
300 { \
301 return (type) interval_var[plane_idx][array_idx]; \
302 }
303
304 dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l);
305 dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c);
306
307 #define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \
308 { \
309 return (type) internal_var; \
310 }
311
312 dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark);
313 dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark);
314 dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
315 dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark);
316 dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
317 dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency);
318 dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory);
319
320 dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark);
321 dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark);
322 dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us);
323 dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
324 dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth);
325 dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
326 dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL);
327 dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
328 dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
329 dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency);
330 dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
331 dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame);
332 dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency);
333 dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
334 dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod);
335 dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase);
336 dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
337 dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase);
338 dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
339 dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0);
340
341 dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
342 dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
343
344 dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
345 dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
346
347 dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
348 dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
349
350 dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
351 dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
352
353 dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
354 dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
355 dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
356
357 dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
358 dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
359 dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]);
360
361 dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency);
362 dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us);
363 dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us);
364 dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us);
365 dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us);
366
367 dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
368 dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
369 dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
370 dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
371
372 dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
373 dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
374 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
375 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
376
377 dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
378 dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
379 dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
380 dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
381
382 dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
383 dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
384 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
385 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
386
387 dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte);
388
389 dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled);
390 dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark);
391 dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
392 dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b);
393 dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5);
394 dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
395
CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,unsigned int nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,bool is_mrq_present,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)396 static void CalculateMaxDETAndMinCompressedBufferSize(
397 unsigned int ConfigReturnBufferSizeInKByte,
398 unsigned int ConfigReturnBufferSegmentSizeInKByte,
399 unsigned int ROBBufferSizeInKByte,
400 unsigned int MaxNumDPP,
401 unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
402 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
403 bool is_mrq_present,
404
405 // Output
406 unsigned int *MaxTotalDETInKByte,
407 unsigned int *nomDETInKByte,
408 unsigned int *MinCompressedBufferSizeInKByte)
409 {
410 if (is_mrq_present)
411 *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64);
412 else
413 *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
414
415 *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
416 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
417
418 #if defined(__DML_VBA_DEBUG__)
419 dml2_printf("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present);
420 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
421 dml2_printf("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
422 dml2_printf("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
423 dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
424 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
425 dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
426 #endif
427
428 if (nomDETInKByteOverrideEnable) {
429 *nomDETInKByte = nomDETInKByteOverrideValue;
430 dml2_printf("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
431 }
432 }
433
PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg * display_cfg,bool ptoi_supported,double * PixelClockBackEnd)434 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd)
435 {
436 //unsigned int num_active_planes = display_cfg->num_planes;
437
438 //Progressive To Interlace Unit Effect
439 for (unsigned int k = 0; k < display_cfg->num_planes; ++k) {
440 PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
441 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) {
442 // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly
443 //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz;
444 }
445 }
446 }
447
dml_is_420(enum dml2_source_format_class source_format)448 static bool dml_is_420(enum dml2_source_format_class source_format)
449 {
450 bool val = false;
451
452 switch (source_format) {
453 case dml2_444_8:
454 val = 0;
455 break;
456 case dml2_444_16:
457 val = 0;
458 break;
459 case dml2_444_32:
460 val = 0;
461 break;
462 case dml2_444_64:
463 val = 0;
464 break;
465 case dml2_420_8:
466 val = 1;
467 break;
468 case dml2_420_10:
469 val = 1;
470 break;
471 case dml2_420_12:
472 val = 1;
473 break;
474 case dml2_422_planar_8:
475 val = 0;
476 break;
477 case dml2_422_planar_10:
478 val = 0;
479 break;
480 case dml2_422_planar_12:
481 val = 0;
482 break;
483 case dml2_422_packed_8:
484 val = 0;
485 break;
486 case dml2_422_packed_10:
487 val = 0;
488 break;
489 case dml2_422_packed_12:
490 val = 0;
491 break;
492 case dml2_rgbe_alpha:
493 val = 0;
494 break;
495 case dml2_rgbe:
496 val = 0;
497 break;
498 case dml2_mono_8:
499 val = 0;
500 break;
501 case dml2_mono_16:
502 val = 0;
503 break;
504 default:
505 DML2_ASSERT(0);
506 break;
507 }
508 return val;
509 }
510
dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)511 static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)
512 {
513 if (sw_mode == dml2_sw_linear)
514 return 256;
515 else if (sw_mode == dml2_sw_256b_2d)
516 return 256;
517 else if (sw_mode == dml2_sw_4kb_2d)
518 return 4096;
519 else if (sw_mode == dml2_sw_64kb_2d)
520 return 65536;
521 else if (sw_mode == dml2_sw_256kb_2d)
522 return 262144;
523 else if (sw_mode == dml2_gfx11_sw_linear)
524 return 256;
525 else if (sw_mode == dml2_gfx11_sw_64kb_d)
526 return 65536;
527 else if (sw_mode == dml2_gfx11_sw_64kb_d_t)
528 return 65536;
529 else if (sw_mode == dml2_gfx11_sw_64kb_d_x)
530 return 65536;
531 else if (sw_mode == dml2_gfx11_sw_64kb_r_x)
532 return 65536;
533 else if (sw_mode == dml2_gfx11_sw_256kb_d_x)
534 return 262144;
535 else if (sw_mode == dml2_gfx11_sw_256kb_r_x)
536 return 262144;
537 else {
538 DML2_ASSERT(0);
539 return 256;
540 }
541 }
542
dml_is_vertical_rotation(enum dml2_rotation_angle Scan)543 static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan)
544 {
545 bool is_vert = false;
546 if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) {
547 is_vert = true;
548 } else {
549 is_vert = false;
550 }
551 return is_vert;
552 }
553
dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)554 static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)
555 {
556 int unsigned version = 0;
557
558 if (sw_mode == dml2_sw_linear ||
559 sw_mode == dml2_sw_256b_2d ||
560 sw_mode == dml2_sw_4kb_2d ||
561 sw_mode == dml2_sw_64kb_2d ||
562 sw_mode == dml2_sw_256kb_2d) {
563 version = 12;
564 } else if (sw_mode == dml2_gfx11_sw_linear ||
565 sw_mode == dml2_gfx11_sw_64kb_d ||
566 sw_mode == dml2_gfx11_sw_64kb_d_t ||
567 sw_mode == dml2_gfx11_sw_64kb_d_x ||
568 sw_mode == dml2_gfx11_sw_64kb_r_x ||
569 sw_mode == dml2_gfx11_sw_256kb_d_x ||
570 sw_mode == dml2_gfx11_sw_256kb_r_x) {
571 version = 11;
572 } else {
573 dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
574 DML2_ASSERT(0);
575 }
576
577 return version;
578 }
579
CalculateBytePerPixelAndBlockSizes(enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,unsigned int pitch_y,unsigned int pitch_c,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC,bool * surf_linear128_l,bool * surf_linear128_c)580 static void CalculateBytePerPixelAndBlockSizes(
581 enum dml2_source_format_class SourcePixelFormat,
582 enum dml2_swizzle_mode SurfaceTiling,
583 unsigned int pitch_y,
584 unsigned int pitch_c,
585
586 // Output
587 unsigned int *BytePerPixelY,
588 unsigned int *BytePerPixelC,
589 double *BytePerPixelDETY,
590 double *BytePerPixelDETC,
591 unsigned int *BlockHeight256BytesY,
592 unsigned int *BlockHeight256BytesC,
593 unsigned int *BlockWidth256BytesY,
594 unsigned int *BlockWidth256BytesC,
595 unsigned int *MacroTileHeightY,
596 unsigned int *MacroTileHeightC,
597 unsigned int *MacroTileWidthY,
598 unsigned int *MacroTileWidthC,
599 bool *surf_linear128_l,
600 bool *surf_linear128_c)
601 {
602 *BytePerPixelDETY = 0;
603 *BytePerPixelDETC = 0;
604 *BytePerPixelY = 1;
605 *BytePerPixelC = 1;
606
607 if (SourcePixelFormat == dml2_444_64) {
608 *BytePerPixelDETY = 8;
609 *BytePerPixelDETC = 0;
610 *BytePerPixelY = 8;
611 *BytePerPixelC = 0;
612 } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) {
613 *BytePerPixelDETY = 4;
614 *BytePerPixelDETC = 0;
615 *BytePerPixelY = 4;
616 *BytePerPixelC = 0;
617 } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) {
618 *BytePerPixelDETY = 2;
619 *BytePerPixelDETC = 0;
620 *BytePerPixelY = 2;
621 *BytePerPixelC = 0;
622 } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) {
623 *BytePerPixelDETY = 1;
624 *BytePerPixelDETC = 0;
625 *BytePerPixelY = 1;
626 *BytePerPixelC = 0;
627 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
628 *BytePerPixelDETY = 4;
629 *BytePerPixelDETC = 1;
630 *BytePerPixelY = 4;
631 *BytePerPixelC = 1;
632 } else if (SourcePixelFormat == dml2_420_8) {
633 *BytePerPixelDETY = 1;
634 *BytePerPixelDETC = 2;
635 *BytePerPixelY = 1;
636 *BytePerPixelC = 2;
637 } else if (SourcePixelFormat == dml2_420_12) {
638 *BytePerPixelDETY = 2;
639 *BytePerPixelDETC = 4;
640 *BytePerPixelY = 2;
641 *BytePerPixelC = 4;
642 } else if (SourcePixelFormat == dml2_420_10) {
643 *BytePerPixelDETY = (double)(4.0 / 3);
644 *BytePerPixelDETC = (double)(8.0 / 3);
645 *BytePerPixelY = 2;
646 *BytePerPixelC = 4;
647 } else {
648 dml2_printf("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat);
649 DML2_ASSERT(0);
650 }
651
652 #ifdef __DML_VBA_DEBUG__
653 dml2_printf("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
654 dml2_printf("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
655 dml2_printf("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
656 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
657 dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
658 dml2_printf("DML::%s: pitch_y = %u\n", __func__, pitch_y);
659 dml2_printf("DML::%s: pitch_c = %u\n", __func__, pitch_c);
660 dml2_printf("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l);
661 dml2_printf("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c);
662 #endif
663
664 if (dml_get_gfx_version(SurfaceTiling) == 11) {
665 *surf_linear128_l = 0;
666 *surf_linear128_c = 0;
667 } else {
668 if (SurfaceTiling == dml2_sw_linear) {
669 *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0);
670
671 if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)
672 *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0);
673 }
674 }
675
676 if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) {
677 if (SurfaceTiling == dml2_sw_linear) {
678 *BlockHeight256BytesY = 1;
679 } else if (SourcePixelFormat == dml2_444_64) {
680 *BlockHeight256BytesY = 4;
681 } else if (SourcePixelFormat == dml2_444_8) {
682 *BlockHeight256BytesY = 16;
683 } else {
684 *BlockHeight256BytesY = 8;
685 }
686 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
687 *BlockHeight256BytesC = 0;
688 *BlockWidth256BytesC = 0;
689 } else { // dual plane
690 if (SurfaceTiling == dml2_sw_linear) {
691 *BlockHeight256BytesY = 1;
692 *BlockHeight256BytesC = 1;
693 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
694 *BlockHeight256BytesY = 8;
695 *BlockHeight256BytesC = 16;
696 } else if (SourcePixelFormat == dml2_420_8) {
697 *BlockHeight256BytesY = 16;
698 *BlockHeight256BytesC = 8;
699 } else {
700 *BlockHeight256BytesY = 8;
701 *BlockHeight256BytesC = 8;
702 }
703 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
704 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
705 }
706 #ifdef __DML_VBA_DEBUG__
707 dml2_printf("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
708 dml2_printf("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
709 dml2_printf("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
710 dml2_printf("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
711 #endif
712
713 if (dml_get_gfx_version(SurfaceTiling) == 11) {
714 if (SurfaceTiling == dml2_gfx11_sw_linear) {
715 *MacroTileHeightY = *BlockHeight256BytesY;
716 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
717 *MacroTileHeightC = *BlockHeight256BytesC;
718 if (*MacroTileHeightC == 0) {
719 *MacroTileWidthC = 0;
720 } else {
721 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
722 }
723 } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) {
724 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
725 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
726 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
727 if (*MacroTileHeightC == 0) {
728 *MacroTileWidthC = 0;
729 } else {
730 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
731 }
732 } else {
733 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
734 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
735 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
736 if (*MacroTileHeightC == 0) {
737 *MacroTileWidthC = 0;
738 } else {
739 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
740 }
741 }
742 } else {
743 unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling);
744 unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling
745
746 if (SurfaceTiling == dml2_sw_linear) {
747 macro_tile_scale = 1;
748 } else if (SurfaceTiling == dml2_sw_4kb_2d) {
749 macro_tile_scale = 4;
750 } else if (SurfaceTiling == dml2_sw_64kb_2d) {
751 macro_tile_scale = 16;
752 } else if (SurfaceTiling == dml2_sw_256kb_2d) {
753 macro_tile_scale = 32;
754 } else {
755 dml2_printf("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling);
756 DML2_ASSERT(0);
757 }
758
759 *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY;
760 *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY;
761 *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC;
762 if (*MacroTileHeightC == 0) {
763 *MacroTileWidthC = 0;
764 } else {
765 *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC;
766 }
767 }
768
769 #ifdef __DML_VBA_DEBUG__
770 dml2_printf("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
771 dml2_printf("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
772 dml2_printf("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
773 dml2_printf("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
774 #endif
775 }
776
CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum dml2_source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)777 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
778 double HRatio,
779 double HRatioChroma,
780 double VRatio,
781 double VRatioChroma,
782 double MaxDCHUBToPSCLThroughput,
783 double MaxPSCLToLBThroughput,
784 double PixelClock,
785 enum dml2_source_format_class SourcePixelFormat,
786 unsigned int HTaps,
787 unsigned int HTapsChroma,
788 unsigned int VTaps,
789 unsigned int VTapsChroma,
790
791 // Output
792 double *PSCL_THROUGHPUT,
793 double *PSCL_THROUGHPUT_CHROMA,
794 double *DPPCLKUsingSingleDPP)
795 {
796 double DPPCLKUsingSingleDPPLuma;
797 double DPPCLKUsingSingleDPPChroma;
798
799 if (HRatio > 1) {
800 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0));
801 } else {
802 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
803 }
804
805 DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
806
807 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
808 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
809
810 if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) {
811 *PSCL_THROUGHPUT_CHROMA = 0;
812 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
813 } else {
814 if (HRatioChroma > 1) {
815 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0));
816 } else {
817 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
818 }
819 DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma),
820 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
821 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
822 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
823 *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
824 }
825 }
826
CalculateSwathWidth(const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum dml2_odm_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],bool surf_linear128_l[],bool surf_linear128_c[],unsigned int DPPPerSurface[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],unsigned int SwathWidthSingleDPPY[],unsigned int SwathWidthSingleDPPC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])827 static void CalculateSwathWidth(
828 const struct dml2_display_cfg *display_cfg,
829 bool ForceSingleDPP,
830 unsigned int NumberOfActiveSurfaces,
831 enum dml2_odm_mode ODMMode[],
832 unsigned int BytePerPixY[],
833 unsigned int BytePerPixC[],
834 unsigned int Read256BytesBlockHeightY[],
835 unsigned int Read256BytesBlockHeightC[],
836 unsigned int Read256BytesBlockWidthY[],
837 unsigned int Read256BytesBlockWidthC[],
838 bool surf_linear128_l[],
839 bool surf_linear128_c[],
840 unsigned int DPPPerSurface[],
841
842 // Output
843 unsigned int req_per_swath_ub_l[],
844 unsigned int req_per_swath_ub_c[],
845 unsigned int SwathWidthSingleDPPY[], // post-rotated plane width
846 unsigned int SwathWidthSingleDPPC[],
847 unsigned int SwathWidthY[], // per-pipe
848 unsigned int SwathWidthC[], // per-pipe
849 unsigned int MaximumSwathHeightY[],
850 unsigned int MaximumSwathHeightC[],
851 unsigned int swath_width_luma_ub[], // per-pipe
852 unsigned int swath_width_chroma_ub[]) // per-pipe
853 {
854 enum dml2_odm_mode MainSurfaceODMMode;
855 double odm_hactive_factor = 1.0;
856 unsigned int req_width_horz_y;
857 unsigned int req_width_horz_c;
858 unsigned int surface_width_ub_l;
859 unsigned int surface_height_ub_l;
860 unsigned int surface_width_ub_c;
861 unsigned int surface_height_ub_c;
862
863 #ifdef __DML_VBA_DEBUG__
864 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
865 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
866 #endif
867
868 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
869 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
870 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
871 } else {
872 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
873 }
874
875 #ifdef __DML_VBA_DEBUG__
876 dml2_printf("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
877 dml2_printf("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
878 dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
879 #endif
880
881 MainSurfaceODMMode = ODMMode[k];
882
883 if (ForceSingleDPP) {
884 SwathWidthY[k] = SwathWidthSingleDPPY[k];
885 } else {
886 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1)
887 odm_hactive_factor = 4.0;
888 else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1)
889 odm_hactive_factor = 3.0;
890 else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1)
891 odm_hactive_factor = 2.0;
892
893 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) {
894 SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio)));
895 } else if (DPPPerSurface[k] == 2) {
896 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
897 } else {
898 SwathWidthY[k] = SwathWidthSingleDPPY[k];
899 }
900 }
901
902 #ifdef __DML_VBA_DEBUG__
903 dml2_printf("DML::%s: k=%u HActive=%u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active);
904 dml2_printf("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
905 dml2_printf("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
906 dml2_printf("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
907 dml2_printf("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
908 #endif
909
910 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
911 SwathWidthC[k] = SwathWidthY[k] / 2;
912 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
913 } else {
914 SwathWidthC[k] = SwathWidthY[k];
915 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
916 }
917
918 if (ForceSingleDPP == true) {
919 SwathWidthY[k] = SwathWidthSingleDPPY[k];
920 SwathWidthC[k] = SwathWidthSingleDPPC[k];
921 }
922
923 req_width_horz_y = Read256BytesBlockWidthY[k];
924 req_width_horz_c = Read256BytesBlockWidthC[k];
925
926 if (surf_linear128_l[k])
927 req_width_horz_y = req_width_horz_y / 2;
928
929 if (surf_linear128_c[k])
930 req_width_horz_c = req_width_horz_c / 2;
931
932 surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y);
933 surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]);
934 surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c);
935 surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]);
936
937 #ifdef __DML_VBA_DEBUG__
938 dml2_printf("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
939 dml2_printf("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
940 dml2_printf("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
941 dml2_printf("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
942 dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
943 dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
944 dml2_printf("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
945 dml2_printf("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
946 dml2_printf("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
947 dml2_printf("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
948 dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
949 dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
950 dml2_printf("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary);
951 dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
952 #endif
953
954 req_per_swath_ub_l[k] = 0;
955 req_per_swath_ub_c[k] = 0;
956 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
957 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
958 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
959 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
960 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y)));
961 } else {
962 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y));
963 }
964 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y;
965
966 if (BytePerPixC[k] > 0) {
967 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
968 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c)));
969 } else {
970 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c));
971 }
972 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c;
973 } else {
974 swath_width_chroma_ub[k] = 0;
975 }
976 } else {
977 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
978 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
979
980 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
981 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k])));
982 } else {
983 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
984 }
985 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k];
986 if (BytePerPixC[k] > 0) {
987 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
988 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k])));
989 } else {
990 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
991 }
992 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k];
993 } else {
994 swath_width_chroma_ub[k] = 0;
995 }
996 }
997
998 #ifdef __DML_VBA_DEBUG__
999 dml2_printf("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
1000 dml2_printf("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
1001 dml2_printf("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
1002 dml2_printf("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
1003 dml2_printf("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]);
1004 dml2_printf("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]);
1005 #endif
1006
1007 }
1008 }
1009
UnboundedRequest(bool unb_req_force_en,bool unb_req_force_val,unsigned int TotalNumberOfActiveDPP,bool NoChromaOrLinear)1010 static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear)
1011 {
1012 bool unb_req_ok = false;
1013 bool unb_req_en = false;
1014
1015 unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
1016 unb_req_en = unb_req_ok;
1017
1018 if (unb_req_force_en) {
1019 unb_req_en = unb_req_force_val && unb_req_ok;
1020 }
1021 #ifdef __DML_VBA_DEBUG__
1022 dml2_printf("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en);
1023 dml2_printf("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val);
1024 dml2_printf("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok);
1025 dml2_printf("DML::%s: unb_req_en = %u\n", __func__, unb_req_en);
1026 #endif
1027 return (unb_req_en);
1028 }
1029
CalculateDETBufferSize(struct dml2_core_shared_CalculateDETBufferSize_locals * l,const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInkByte,unsigned int CompressedBufferSegmentSizeInkByte,double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int full_swath_bytes_l[],unsigned int full_swath_bytes_c[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)1030 static void CalculateDETBufferSize(
1031 struct dml2_core_shared_CalculateDETBufferSize_locals *l,
1032 const struct dml2_display_cfg *display_cfg,
1033 bool ForceSingleDPP,
1034 unsigned int NumberOfActiveSurfaces,
1035 bool UnboundedRequestEnabled,
1036 unsigned int nomDETInKByte,
1037 unsigned int MaxTotalDETInKByte,
1038 unsigned int ConfigReturnBufferSizeInKByte,
1039 unsigned int MinCompressedBufferSizeInKByte,
1040 unsigned int ConfigReturnBufferSegmentSizeInkByte,
1041 unsigned int CompressedBufferSegmentSizeInkByte,
1042 double ReadBandwidthLuma[],
1043 double ReadBandwidthChroma[],
1044 unsigned int full_swath_bytes_l[],
1045 unsigned int full_swath_bytes_c[],
1046 unsigned int DPPPerSurface[],
1047 // Output
1048 unsigned int DETBufferSizeInKByte[],
1049 unsigned int *CompressedBufferSizeInkByte)
1050 {
1051 memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals));
1052
1053 bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES];
1054 bool NextPotentialSurfaceToAssignDETPieceFound;
1055 bool MinimizeReallocationSuccess = false;
1056
1057 #ifdef __DML_VBA_DEBUG__
1058 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
1059 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
1060 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
1061 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
1062 dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
1063 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
1064 dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
1065 dml2_printf("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte);
1066 #endif
1067
1068 // Note: Will use default det size if that fits 2 swaths
1069 if (UnboundedRequestEnabled) {
1070 if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) {
1071 DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb;
1072 } else {
1073 DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
1074 }
1075 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
1076 } else {
1077 l->DETBufferSizePoolInKByte = MaxTotalDETInKByte;
1078 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1079 DETBufferSizeInKByte[k] = 0;
1080 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
1081 l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
1082 } else {
1083 l->max_minDET = nomDETInKByte;
1084 }
1085 l->minDET = 128;
1086 l->minDET_pipe = 0;
1087
1088 // add DET resource until can hold 2 full swaths
1089 while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) {
1090 if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET)
1091 l->minDET_pipe = l->minDET;
1092 l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte;
1093 }
1094
1095 #ifdef __DML_VBA_DEBUG__
1096 dml2_printf("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET);
1097 dml2_printf("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET);
1098 dml2_printf("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe);
1099 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]);
1100 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]);
1101 #endif
1102
1103 if (l->minDET_pipe == 0) {
1104 l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
1105 #ifdef __DML_VBA_DEBUG__
1106 dml2_printf("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe);
1107 #endif
1108 }
1109
1110 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1111 DETBufferSizeInKByte[k] = 0;
1112 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) {
1113 DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
1114 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
1115 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) {
1116 DETBufferSizeInKByte[k] = l->minDET_pipe;
1117 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe;
1118 }
1119
1120 #ifdef __DML_VBA_DEBUG__
1121 dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
1122 dml2_printf("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb);
1123 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
1124 dml2_printf("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte);
1125 #endif
1126 }
1127
1128 if (display_cfg->minimize_det_reallocation) {
1129 MinimizeReallocationSuccess = true;
1130 // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global
1131 // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on
1132 // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a
1133 // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane.
1134
1135 // Calculate total pixel rate
1136 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
1137 l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz;
1138 }
1139
1140 // Calculate per stream DET budget
1141 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
1142 l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate);
1143 l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k];
1144 }
1145
1146 // Calculate the per stream total bandwidth
1147 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1148 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1149 l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1150
1151 // Check the minimum can be satisfied by budget
1152 if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1153 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
1154 } else {
1155 MinimizeReallocationSuccess = false;
1156 break;
1157 }
1158 }
1159 }
1160
1161 if (MinimizeReallocationSuccess) {
1162 // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams
1163 // budget proportionally across its planes
1164 l->ResidualDETAfterRounding = MaxTotalDETInKByte;
1165
1166 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1167 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1168 l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index])
1169 * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]);
1170
1171 if (l->IdealDETBudget > DETBufferSizeInKByte[k]) {
1172 l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k];
1173 if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index])
1174 l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index];
1175
1176 /* split the additional budgeted DET among the pipes per plane */
1177 DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k]));
1178 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget;
1179 }
1180
1181 // Round down to segment size
1182 DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte;
1183
1184 l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
1185 }
1186 }
1187 }
1188 }
1189
1190 if (!MinimizeReallocationSuccess) {
1191 l->TotalBandwidth = 0;
1192 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1193 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1194 l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1195 }
1196 }
1197 #ifdef __DML_VBA_DEBUG__
1198 dml2_printf("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1199 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1200 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
1201 }
1202 dml2_printf("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1203 #endif
1204 dml2_printf("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth);
1205 l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth;
1206 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1207
1208 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1209 DETPieceAssignedToThisSurfaceAlready[k] = true;
1210 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) {
1211 DETPieceAssignedToThisSurfaceAlready[k] = true;
1212 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1213 } else {
1214 DETPieceAssignedToThisSurfaceAlready[k] = false;
1215 }
1216 #ifdef __DML_VBA_DEBUG__
1217 dml2_printf("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
1218 dml2_printf("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece);
1219 #endif
1220 }
1221
1222 for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) {
1223 NextPotentialSurfaceToAssignDETPieceFound = false;
1224 l->NextSurfaceToAssignDETPiece = 0;
1225
1226 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1227 #ifdef __DML_VBA_DEBUG__
1228 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
1229 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
1230 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
1231 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1232 dml2_printf("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece);
1233 #endif
1234 if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
1235 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) {
1236 l->NextSurfaceToAssignDETPiece = k;
1237 NextPotentialSurfaceToAssignDETPieceFound = true;
1238 }
1239 #ifdef __DML_VBA_DEBUG__
1240 dml2_printf("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1241 dml2_printf("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1242 #endif
1243 }
1244
1245 if (NextPotentialSurfaceToAssignDETPieceFound) {
1246 l->NextDETBufferPieceInKByte = (unsigned int)(math_min2(
1247 math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece /
1248 ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))
1249 * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
1250 math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
1251
1252 #ifdef __DML_VBA_DEBUG__
1253 dml2_printf("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte);
1254 dml2_printf("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece);
1255 dml2_printf("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
1256 dml2_printf("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1257 dml2_printf("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece);
1258 dml2_printf("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte);
1259 dml2_printf("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
1260 #endif
1261
1262 DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]);
1263 #ifdef __DML_VBA_DEBUG__
1264 dml2_printf("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
1265 #endif
1266
1267 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte;
1268 DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true;
1269 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1270 }
1271 }
1272 }
1273 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1274 }
1275 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte;
1276
1277 #ifdef __DML_VBA_DEBUG__
1278 dml2_printf("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1279 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
1280 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1281 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1282 }
1283 #endif
1284 }
1285
CalculateRequiredDispclk(enum dml2_odm_mode ODMMode,double PixelClock)1286 static double CalculateRequiredDispclk(
1287 enum dml2_odm_mode ODMMode,
1288 double PixelClock)
1289 {
1290
1291 if (ODMMode == dml2_odm_mode_combine_4to1) {
1292 return PixelClock / 4.0;
1293 } else if (ODMMode == dml2_odm_mode_combine_3to1) {
1294 return PixelClock / 3.0;
1295 } else if (ODMMode == dml2_odm_mode_combine_2to1) {
1296 return PixelClock / 2.0;
1297 } else {
1298 return PixelClock;
1299 }
1300 }
1301
TruncToValidBPP(struct dml2_core_shared_TruncToValidBPP_locals * l,double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum dml2_output_encoder_class Output,enum dml2_output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,unsigned int * RequiredSlots)1302 static double TruncToValidBPP(
1303 struct dml2_core_shared_TruncToValidBPP_locals *l,
1304 double LinkBitRate,
1305 unsigned int Lanes,
1306 unsigned int HTotal,
1307 unsigned int HActive,
1308 double PixelClock,
1309 double DesiredBPP,
1310 bool DSCEnable,
1311 enum dml2_output_encoder_class Output,
1312 enum dml2_output_format_class Format,
1313 unsigned int DSCInputBitPerComponent,
1314 unsigned int DSCSlices,
1315 unsigned int AudioRate,
1316 unsigned int AudioLayout,
1317 enum dml2_odm_mode ODMModeNoDSC,
1318 enum dml2_odm_mode ODMModeDSC,
1319
1320 // Output
1321 unsigned int *RequiredSlots)
1322 {
1323 double MaxLinkBPP;
1324 unsigned int MinDSCBPP;
1325 double MaxDSCBPP;
1326 unsigned int NonDSCBPP0;
1327 unsigned int NonDSCBPP1;
1328 unsigned int NonDSCBPP2;
1329 enum dml2_odm_mode ODMMode;
1330
1331 if (Format == dml2_420) {
1332 NonDSCBPP0 = 12;
1333 NonDSCBPP1 = 15;
1334 NonDSCBPP2 = 18;
1335 MinDSCBPP = 6;
1336 MaxDSCBPP = 16;
1337 } else if (Format == dml2_444) {
1338 NonDSCBPP0 = 24;
1339 NonDSCBPP1 = 30;
1340 NonDSCBPP2 = 36;
1341 MinDSCBPP = 8;
1342 MaxDSCBPP = 16;
1343 } else {
1344 if (Output == dml2_hdmi || Output == dml2_hdmifrl) {
1345 NonDSCBPP0 = 24;
1346 NonDSCBPP1 = 24;
1347 NonDSCBPP2 = 24;
1348 } else {
1349 NonDSCBPP0 = 16;
1350 NonDSCBPP1 = 20;
1351 NonDSCBPP2 = 24;
1352 }
1353 if (Format == dml2_n422 || Output == dml2_hdmifrl) {
1354 MinDSCBPP = 7;
1355 MaxDSCBPP = 16;
1356 } else {
1357 MinDSCBPP = 8;
1358 MaxDSCBPP = 16;
1359 }
1360 }
1361 if (Output == dml2_dp2p0) {
1362 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
1363 } else if (DSCEnable && Output == dml2_dp) {
1364 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
1365 } else {
1366 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
1367 }
1368
1369 ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC;
1370
1371 if (ODMMode == dml2_odm_mode_split_1to2) {
1372 MaxLinkBPP = 2 * MaxLinkBPP;
1373 }
1374
1375 if (DesiredBPP == 0) {
1376 if (DSCEnable) {
1377 if (MaxLinkBPP < MinDSCBPP) {
1378 return __DML2_CALCS_DPP_INVALID__;
1379 } else if (MaxLinkBPP >= MaxDSCBPP) {
1380 return MaxDSCBPP;
1381 } else {
1382 return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0;
1383 }
1384 } else {
1385 if (MaxLinkBPP >= NonDSCBPP2) {
1386 return NonDSCBPP2;
1387 } else if (MaxLinkBPP >= NonDSCBPP1) {
1388 return NonDSCBPP1;
1389 } else if (MaxLinkBPP >= NonDSCBPP0) {
1390 return NonDSCBPP0;
1391 } else {
1392 return __DML2_CALCS_DPP_INVALID__;
1393 }
1394 }
1395 } else {
1396 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
1397 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
1398 return __DML2_CALCS_DPP_INVALID__;
1399 } else {
1400 return DesiredBPP;
1401 }
1402 }
1403 }
1404
1405 // updated for dcn4
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)1406 static unsigned int dscceComputeDelay(
1407 unsigned int bpc,
1408 double BPP,
1409 unsigned int sliceWidth,
1410 unsigned int numSlices,
1411 enum dml2_output_format_class pixelFormat,
1412 enum dml2_output_encoder_class Output)
1413 {
1414 // valid bpc = source bits per component in the set of {8, 10, 12}
1415 // valid bpp = increments of 1/16 of a bit
1416 // min = 6/7/8 in N420/N422/444, respectively
1417 // max = such that compression is 1:1
1418 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
1419 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
1420 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
1421
1422 // fixed value
1423 unsigned int rcModelSize = 8192;
1424
1425 // N422/N420 operate at 2 pixels per clock
1426 unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified;
1427
1428 if (pixelFormat == dml2_420)
1429 pixelsPerClock = 2;
1430 // #all other modes operate at 1 pixel per clock
1431 else if (pixelFormat == dml2_444)
1432 pixelsPerClock = 1;
1433 else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
1434 pixelsPerClock = 2;
1435 else
1436 pixelsPerClock = 1;
1437
1438 //initial transmit delay as per PPS
1439 initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock));
1440
1441 //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format)
1442 slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth;
1443
1444 padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0;
1445
1446 if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) {
1447 if ((initial_xmit_delay + padding_pixels) % 3 == 1) {
1448 initial_xmit_delay++;
1449 }
1450 }
1451
1452 //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard
1453 if (bpc == 8)
1454 ssm_group_priming_delay = 83;
1455 else if (bpc == 10)
1456 ssm_group_priming_delay = 91;
1457 else if (bpc == 12)
1458 ssm_group_priming_delay = 115;
1459 else if (bpc == 14)
1460 ssm_group_priming_delay = 123;
1461 else
1462 ssm_group_priming_delay = 128;
1463
1464 //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice
1465 slice_width_groups = (slice_width_modified + 2) / 3;
1466
1467 //determine number of padded pixels in the last group of a slice line, computed as
1468 slice_padded_pixels = 3 * slice_width_groups - slice_width_modified;
1469
1470 //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered
1471 number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified;
1472
1473 //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay
1474 //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay
1475 ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd;
1476
1477 //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels
1478 ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3;
1479
1480 //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay
1481 groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay;
1482
1483 //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice
1484 //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice
1485 lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next
1486
1487 //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached
1488 //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay
1489 additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0;
1490
1491 //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block)
1492 ssm_pipeline_delay = 2;
1493
1494 //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block)
1495 obsm_pipeline_delay = 1;
1496
1497 //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes
1498 if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
1499 cycles_per_group = 6;
1500 else
1501 cycles_per_group = 3;
1502 //delay of the bit stream contruction layer in pixels is the sum of:
1503 //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice
1504 //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice)
1505 //3. additional group of delay if initial transmit delay is reached exactly in a group
1506 //4. ssm and obsm pipeline delay (i.e., clock cycles of delay)
1507 group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay;
1508 pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay;
1509
1510 //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format)
1511 pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay;
1512
1513 #ifdef __DML_VBA_DEBUG__
1514 dml2_printf("DML::%s: bpc: %u\n", __func__, bpc);
1515 dml2_printf("DML::%s: BPP: %f\n", __func__, BPP);
1516 dml2_printf("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
1517 dml2_printf("DML::%s: numSlices: %u\n", __func__, numSlices);
1518 dml2_printf("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
1519 dml2_printf("DML::%s: Output: %u\n", __func__, Output);
1520 dml2_printf("DML::%s: pixels: %u\n", __func__, pixels);
1521 #endif
1522 return pixels;
1523 }
1524
1525 //updated in dcn4
dscComputeDelay(enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)1526 static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output)
1527 {
1528 unsigned int Delay = 0;
1529 unsigned int dispclk_per_dscclk = 3;
1530
1531 // sfr
1532 Delay = Delay + 2;
1533
1534 if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
1535 dispclk_per_dscclk = 3 * 2;
1536 }
1537
1538 if (pixelFormat == dml2_420) {
1539 //dscc top delay for pixel compression layer
1540 Delay = Delay + 16 * dispclk_per_dscclk;
1541
1542 // dscc - input deserializer
1543 Delay = Delay + 5;
1544
1545 // dscc - input cdc fifo
1546 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1547
1548 // dscc - output cdc fifo
1549 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1550
1551 // dscc - cdc uncertainty
1552 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1553 } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
1554 //dscc top delay for pixel compression layer
1555 Delay = Delay + 16 * dispclk_per_dscclk;
1556 // dsccif
1557 Delay = Delay + 1;
1558 // dscc - input deserializer
1559 Delay = Delay + 5;
1560 // dscc - input cdc fifo
1561 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1562
1563
1564 // dscc - output cdc fifo
1565 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1566 // dscc - cdc uncertainty
1567 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1568 } else if (pixelFormat == dml2_s422) {
1569 //dscc top delay for pixel compression layer
1570 Delay = Delay + 17 * dispclk_per_dscclk;
1571
1572 // dscc - input deserializer
1573 Delay = Delay + 3;
1574 // dscc - input cdc fifo
1575 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1576 // dscc - output cdc fifo
1577 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1578 // dscc - cdc uncertainty
1579 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1580 } else {
1581 //dscc top delay for pixel compression layer
1582 Delay = Delay + 16 * dispclk_per_dscclk;
1583 // dscc - input deserializer
1584 Delay = Delay + 3;
1585 // dscc - input cdc fifo
1586 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1587 // dscc - output cdc fifo
1588 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1589
1590 // dscc - cdc uncertainty
1591 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1592 }
1593
1594 // sft
1595 Delay = Delay + 1;
1596 #ifdef __DML_VBA_DEBUG__
1597 dml2_printf("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
1598 dml2_printf("DML::%s: Delay = %u\n", __func__, Delay);
1599 #endif
1600
1601 return Delay;
1602 }
1603
CalculateHostVMDynamicLevels(bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)1604 static unsigned int CalculateHostVMDynamicLevels(
1605 bool GPUVMEnable,
1606 bool HostVMEnable,
1607 unsigned int HostVMMinPageSize,
1608 unsigned int HostVMMaxNonCachedPageTableLevels)
1609 {
1610 unsigned int HostVMDynamicLevels = 0;
1611
1612 if (GPUVMEnable && HostVMEnable) {
1613 if (HostVMMinPageSize < 2048)
1614 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1615 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
1616 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1);
1617 else
1618 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2);
1619 } else {
1620 HostVMDynamicLevels = 0;
1621 }
1622 return HostVMDynamicLevels;
1623 }
1624
CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params * p)1625 static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p)
1626 {
1627 unsigned int extra_dpde_bytes;
1628 unsigned int extra_mpde_bytes;
1629 unsigned int MacroTileSizeBytes;
1630 unsigned int vp_height_dpte_ub;
1631
1632 unsigned int meta_surface_bytes;
1633 unsigned int vm_bytes;
1634 unsigned int vp_height_meta_ub;
1635 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
1636
1637 *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes;
1638 *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes;
1639 if (p->SurfaceTiling == dml2_sw_linear) {
1640 *p->meta_row_height = 32;
1641 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
1642 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways?
1643 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1644 *p->meta_row_height = *p->MetaRequestHeight;
1645 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
1646 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
1647 } else {
1648 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth);
1649 }
1650 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0);
1651 } else {
1652 *p->meta_row_height = *p->MetaRequestWidth;
1653 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
1654 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight));
1655 } else {
1656 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight);
1657 }
1658 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0);
1659 }
1660
1661 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
1662 vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes));
1663 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1664 vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
1665 } else {
1666 vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
1667 }
1668
1669 meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0);
1670 #ifdef __DML_VBA_DEBUG__
1671 dml2_printf("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch);
1672 dml2_printf("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes);
1673 #endif
1674 if (p->GPUVMEnable == true) {
1675 double meta_vmpg_bytes = 4.0 * 1024.0;
1676 *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64);
1677 extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1);
1678 } else {
1679 *p->meta_pte_bytes_per_frame_ub = 0;
1680 extra_mpde_bytes = 0;
1681 }
1682
1683 if (!p->DCCEnable || !p->mrq_present) {
1684 *p->meta_pte_bytes_per_frame_ub = 0;
1685 extra_mpde_bytes = 0;
1686 *p->meta_row_bytes = 0;
1687 }
1688
1689 if (!p->GPUVMEnable) {
1690 *p->PixelPTEBytesPerRow = 0;
1691 *p->PixelPTEBytesPerRowStorage = 0;
1692 *p->dpte_row_width_ub = 0;
1693 *p->dpte_row_height = 0;
1694 *p->dpte_row_height_linear = 0;
1695 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
1696 *p->dpte_row_width_ub_one_row_per_frame = 0;
1697 *p->dpte_row_height_one_row_per_frame = 0;
1698 *p->vmpg_width = 0;
1699 *p->vmpg_height = 0;
1700 *p->PixelPTEReqWidth = 0;
1701 *p->PixelPTEReqHeight = 0;
1702 *p->PTERequestSize = 0;
1703 *p->dpde0_bytes_per_frame_ub = 0;
1704 return 0;
1705 }
1706
1707 MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight;
1708
1709 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
1710 vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight));
1711 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1712 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight);
1713 } else {
1714 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight);
1715 }
1716
1717 if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) {
1718 *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1));
1719 extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2);
1720 } else {
1721 *p->dpde0_bytes_per_frame_ub = 0;
1722 extra_dpde_bytes = 0;
1723 }
1724
1725 vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes;
1726
1727 #ifdef __DML_VBA_DEBUG__
1728 dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable);
1729 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1730 dml2_printf("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear);
1731 dml2_printf("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel);
1732 dml2_printf("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels);
1733 dml2_printf("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes);
1734 dml2_printf("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes);
1735 dml2_printf("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight);
1736 dml2_printf("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth);
1737 dml2_printf("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub);
1738 dml2_printf("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub);
1739 dml2_printf("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes);
1740 dml2_printf("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes);
1741 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
1742 dml2_printf("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight);
1743 dml2_printf("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth);
1744 dml2_printf("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
1745 #endif
1746
1747 if (p->SurfaceTiling == dml2_sw_linear) {
1748 *p->PixelPTEReqHeight = 1;
1749 *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
1750 PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
1751 *p->PTERequestSize = 64;
1752
1753 *p->vmpg_height = 1;
1754 *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel;
1755 } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE
1756 *p->PixelPTEReqHeight = p->MacroTileHeight;
1757 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1758 *p->PTERequestSize = 64;
1759
1760 *p->vmpg_height = p->MacroTileHeight;
1761 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1762
1763 } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile
1764 // one 64KB tile, is 16x16x256B req
1765 *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes;
1766 *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes;
1767 *p->PTERequestSize = 128;
1768
1769 *p->vmpg_height = *p->PixelPTEReqHeight;
1770 *p->vmpg_width = *p->PixelPTEReqWidth;
1771 } else {
1772 // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways
1773 *p->PixelPTEReqHeight = p->MacroTileHeight;
1774 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1775 *p->PTERequestSize = 64;
1776
1777 *p->vmpg_height = p->MacroTileHeight;
1778 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1779
1780 if (p->GPUVMEnable == true) {
1781 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n",
1782 __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling));
1783 DML2_ASSERT(0);
1784 }
1785 }
1786
1787 #ifdef __DML_VBA_DEBUG__
1788 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
1789 dml2_printf("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight);
1790 dml2_printf("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth);
1791 dml2_printf("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
1792 dml2_printf("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize);
1793 dml2_printf("DML::%s: Pitch = %u\n", __func__, p->Pitch);
1794 dml2_printf("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width);
1795 dml2_printf("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height);
1796 #endif
1797
1798 *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
1799 *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth);
1800 *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
1801 *p->dpte_row_height_linear = 0;
1802
1803 if (p->SurfaceTiling == dml2_sw_linear) {
1804 *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1))));
1805 *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth);
1806 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
1807
1808 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
1809 *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1);
1810 if (*p->dpte_row_height_linear > 128)
1811 *p->dpte_row_height_linear = 128;
1812
1813 #ifdef __DML_VBA_DEBUG__
1814 dml2_printf("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub);
1815 #endif
1816
1817 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1818 *p->dpte_row_height = *p->PixelPTEReqHeight;
1819
1820 if (p->GPUVMMinPageSizeKBytes > 64) {
1821 *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth);
1822 } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
1823 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth));
1824 } else {
1825 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth);
1826 }
1827 #ifdef __DML_VBA_DEBUG__
1828 dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub);
1829 #endif
1830
1831 *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize;
1832 } else {
1833 *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth));
1834
1835 if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
1836 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight));
1837 } else {
1838 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight);
1839 }
1840
1841 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize);
1842 #ifdef __DML_VBA_DEBUG__
1843 dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub);
1844 #endif
1845 }
1846
1847 if (p->GPUVMEnable != true) {
1848 *p->PixelPTEBytesPerRow = 0;
1849 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
1850 }
1851
1852 *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow;
1853
1854 #ifdef __DML_VBA_DEBUG__
1855 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
1856 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1857 dml2_printf("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height);
1858 dml2_printf("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height);
1859 dml2_printf("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear);
1860 dml2_printf("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub);
1861 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow);
1862 dml2_printf("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage);
1863 dml2_printf("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests);
1864 dml2_printf("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame);
1865 dml2_printf("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame);
1866 dml2_printf("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame);
1867 #endif
1868
1869 return vm_bytes;
1870 } // CalculateVMAndRowBytes
1871
CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dml2_rotation_angle RotationAngle,bool mirrored,bool ViewportStationary,unsigned int SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,unsigned int * VInitPreFill,unsigned int * MaxNumSwath)1872 static unsigned int CalculatePrefetchSourceLines(
1873 double VRatio,
1874 unsigned int VTaps,
1875 bool Interlace,
1876 bool ProgressiveToInterlaceUnitInOPP,
1877 unsigned int SwathHeight,
1878 enum dml2_rotation_angle RotationAngle,
1879 bool mirrored,
1880 bool ViewportStationary,
1881 unsigned int SwathWidth,
1882 unsigned int ViewportHeight,
1883 unsigned int ViewportXStart,
1884 unsigned int ViewportYStart,
1885
1886 // Output
1887 unsigned int *VInitPreFill,
1888 unsigned int *MaxNumSwath)
1889 {
1890
1891 unsigned int vp_start_rot = 0;
1892 unsigned int sw0_tmp = 0;
1893 unsigned int MaxPartialSwath = 0;
1894 double numLines = 0;
1895
1896 #ifdef __DML_VBA_DEBUG__
1897 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
1898 dml2_printf("DML::%s: VTaps = %u\n", __func__, VTaps);
1899 dml2_printf("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
1900 dml2_printf("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
1901 dml2_printf("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
1902 dml2_printf("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
1903 #endif
1904 if (ProgressiveToInterlaceUnitInOPP)
1905 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1));
1906 else
1907 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1));
1908
1909 if (ViewportStationary) {
1910 if (RotationAngle == dml2_rotation_180) {
1911 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
1912 } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) {
1913 vp_start_rot = ViewportXStart;
1914 } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) {
1915 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
1916 } else {
1917 vp_start_rot = ViewportYStart;
1918 }
1919 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
1920 if (sw0_tmp < *VInitPreFill) {
1921 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1);
1922 } else {
1923 *MaxNumSwath = 1;
1924 }
1925 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight));
1926 } else {
1927 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1);
1928 if (*VInitPreFill > 1) {
1929 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight));
1930 } else {
1931 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight));
1932 }
1933 }
1934 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
1935
1936 #ifdef __DML_VBA_DEBUG__
1937 dml2_printf("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
1938 dml2_printf("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
1939 dml2_printf("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
1940 dml2_printf("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
1941 dml2_printf("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
1942 #endif
1943 return (unsigned int)(numLines);
1944
1945 }
1946
CalculateRowBandwidth(bool GPUVMEnable,bool use_one_row_for_frame,enum dml2_source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,bool mrq_present,unsigned int meta_row_bytes_per_row_ub_l,unsigned int meta_row_bytes_per_row_ub_c,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,double * dpte_row_bw,double * meta_row_bw)1947 static void CalculateRowBandwidth(
1948 bool GPUVMEnable,
1949 bool use_one_row_for_frame,
1950 enum dml2_source_format_class SourcePixelFormat,
1951 double VRatio,
1952 double VRatioChroma,
1953 bool DCCEnable,
1954 double LineTime,
1955 unsigned int PixelPTEBytesPerRowLuma,
1956 unsigned int PixelPTEBytesPerRowChroma,
1957 unsigned int dpte_row_height_luma,
1958 unsigned int dpte_row_height_chroma,
1959
1960 bool mrq_present,
1961 unsigned int meta_row_bytes_per_row_ub_l,
1962 unsigned int meta_row_bytes_per_row_ub_c,
1963 unsigned int meta_row_height_luma,
1964 unsigned int meta_row_height_chroma,
1965
1966 // Output
1967 double *dpte_row_bw,
1968 double *meta_row_bw)
1969 {
1970 if (!DCCEnable || !mrq_present) {
1971 *meta_row_bw = 0;
1972 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
1973 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime)
1974 + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime);
1975 } else {
1976 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime);
1977 }
1978
1979 if (GPUVMEnable != true) {
1980 *dpte_row_bw = 0;
1981 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
1982 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
1983 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
1984 } else {
1985 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
1986 }
1987 }
1988
CalculateMALLUseForStaticScreen(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool is_using_mall_for_ss[])1989 static void CalculateMALLUseForStaticScreen(
1990 const struct dml2_display_cfg *display_cfg,
1991 unsigned int NumberOfActiveSurfaces,
1992 unsigned int MALLAllocatedForDCN,
1993 unsigned int SurfaceSizeInMALL[],
1994 bool one_row_per_frame_fits_in_buffer[],
1995
1996 // Output
1997 bool is_using_mall_for_ss[])
1998 {
1999
2000 unsigned int SurfaceToAddToMALL;
2001 bool CanAddAnotherSurfaceToMALL;
2002 unsigned int TotalSurfaceSizeInMALL;
2003
2004 TotalSurfaceSizeInMALL = 0;
2005 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
2006 is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable);
2007 if (is_using_mall_for_ss[k])
2008 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2009 #ifdef __DML_VBA_DEBUG__
2010 dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]);
2011 dml2_printf("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
2012 #endif
2013 }
2014
2015 SurfaceToAddToMALL = 0;
2016 CanAddAnotherSurfaceToMALL = true;
2017 while (CanAddAnotherSurfaceToMALL) {
2018 CanAddAnotherSurfaceToMALL = false;
2019 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
2020 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 &&
2021 !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] &&
2022 (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2023 CanAddAnotherSurfaceToMALL = true;
2024 SurfaceToAddToMALL = k;
2025 dml2_printf("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall);
2026 }
2027 }
2028 if (CanAddAnotherSurfaceToMALL) {
2029 is_using_mall_for_ss[SurfaceToAddToMALL] = true;
2030 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2031
2032 #ifdef __DML_VBA_DEBUG__
2033 dml2_printf("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
2034 dml2_printf("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
2035 #endif
2036 }
2037 }
2038 }
2039
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum dml2_source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dml2_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dml2_rotation_angle RotationAngle,enum dml2_core_internal_request_type * RequestLuma,enum dml2_core_internal_request_type * RequestChroma,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)2040 static void CalculateDCCConfiguration(
2041 bool DCCEnabled,
2042 bool DCCProgrammingAssumesScanDirectionUnknown,
2043 enum dml2_source_format_class SourcePixelFormat,
2044 unsigned int SurfaceWidthLuma,
2045 unsigned int SurfaceWidthChroma,
2046 unsigned int SurfaceHeightLuma,
2047 unsigned int SurfaceHeightChroma,
2048 unsigned int nomDETInKByte,
2049 unsigned int RequestHeight256ByteLuma,
2050 unsigned int RequestHeight256ByteChroma,
2051 enum dml2_swizzle_mode TilingFormat,
2052 unsigned int BytePerPixelY,
2053 unsigned int BytePerPixelC,
2054 double BytePerPixelDETY,
2055 double BytePerPixelDETC,
2056 enum dml2_rotation_angle RotationAngle,
2057
2058 // Output
2059 enum dml2_core_internal_request_type *RequestLuma,
2060 enum dml2_core_internal_request_type *RequestChroma,
2061 unsigned int *MaxUncompressedBlockLuma,
2062 unsigned int *MaxUncompressedBlockChroma,
2063 unsigned int *MaxCompressedBlockLuma,
2064 unsigned int *MaxCompressedBlockChroma,
2065 unsigned int *IndependentBlockLuma,
2066 unsigned int *IndependentBlockChroma)
2067 {
2068 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
2069
2070 unsigned int segment_order_horz_contiguous_luma;
2071 unsigned int segment_order_horz_contiguous_chroma;
2072 unsigned int segment_order_vert_contiguous_luma;
2073 unsigned int segment_order_vert_contiguous_chroma;
2074
2075 unsigned int req128_horz_wc_l;
2076 unsigned int req128_horz_wc_c;
2077 unsigned int req128_vert_wc_l;
2078 unsigned int req128_vert_wc_c;
2079
2080 unsigned int yuv420;
2081 unsigned int horz_div_l;
2082 unsigned int horz_div_c;
2083 unsigned int vert_div_l;
2084 unsigned int vert_div_c;
2085
2086 unsigned int swath_buf_size;
2087 double detile_buf_vp_horz_limit;
2088 double detile_buf_vp_vert_limit;
2089
2090 unsigned int MAS_vp_horz_limit;
2091 unsigned int MAS_vp_vert_limit;
2092 unsigned int max_vp_horz_width;
2093 unsigned int max_vp_vert_height;
2094 unsigned int eff_surf_width_l;
2095 unsigned int eff_surf_width_c;
2096 unsigned int eff_surf_height_l;
2097 unsigned int eff_surf_height_c;
2098
2099 unsigned int full_swath_bytes_horz_wc_l;
2100 unsigned int full_swath_bytes_horz_wc_c;
2101 unsigned int full_swath_bytes_vert_wc_l;
2102 unsigned int full_swath_bytes_vert_wc_c;
2103
2104 if (dml_is_420(SourcePixelFormat))
2105 yuv420 = 1;
2106 else
2107 yuv420 = 0;
2108 horz_div_l = 1;
2109 horz_div_c = 1;
2110 vert_div_l = 1;
2111 vert_div_c = 1;
2112
2113 if (BytePerPixelY == 1)
2114 vert_div_l = 0;
2115 if (BytePerPixelC == 1)
2116 vert_div_c = 0;
2117
2118 if (BytePerPixelC == 0) {
2119 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
2120 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
2121 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
2122 } else {
2123 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
2124 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
2125 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
2126 }
2127
2128 if (SourcePixelFormat == dml2_420_10) {
2129 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
2130 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
2131 }
2132
2133 detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16);
2134 detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16);
2135
2136 MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144;
2137 MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
2138 max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit));
2139 max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit));
2140 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
2141 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
2142 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
2143 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
2144
2145 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
2146 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
2147 if (BytePerPixelC > 0) {
2148 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
2149 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
2150 } else {
2151 full_swath_bytes_horz_wc_c = 0;
2152 full_swath_bytes_vert_wc_c = 0;
2153 }
2154
2155 if (SourcePixelFormat == dml2_420_10) {
2156 full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
2157 full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
2158 full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
2159 full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
2160 }
2161
2162 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2163 req128_horz_wc_l = 0;
2164 req128_horz_wc_c = 0;
2165 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2166 req128_horz_wc_l = 0;
2167 req128_horz_wc_c = 1;
2168 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2169 req128_horz_wc_l = 1;
2170 req128_horz_wc_c = 0;
2171 } else {
2172 req128_horz_wc_l = 1;
2173 req128_horz_wc_c = 1;
2174 }
2175
2176 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2177 req128_vert_wc_l = 0;
2178 req128_vert_wc_c = 0;
2179 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2180 req128_vert_wc_l = 0;
2181 req128_vert_wc_c = 1;
2182 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2183 req128_vert_wc_l = 1;
2184 req128_vert_wc_c = 0;
2185 } else {
2186 req128_vert_wc_l = 1;
2187 req128_vert_wc_c = 1;
2188 }
2189
2190 if (BytePerPixelY == 2) {
2191 segment_order_horz_contiguous_luma = 0;
2192 segment_order_vert_contiguous_luma = 1;
2193 } else {
2194 segment_order_horz_contiguous_luma = 1;
2195 segment_order_vert_contiguous_luma = 0;
2196 }
2197
2198 if (BytePerPixelC == 2) {
2199 segment_order_horz_contiguous_chroma = 0;
2200 segment_order_vert_contiguous_chroma = 1;
2201 } else {
2202 segment_order_horz_contiguous_chroma = 1;
2203 segment_order_vert_contiguous_chroma = 0;
2204 }
2205 #ifdef __DML_VBA_DEBUG__
2206 dml2_printf("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
2207 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
2208 dml2_printf("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
2209 dml2_printf("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
2210 dml2_printf("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
2211 dml2_printf("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
2212 dml2_printf("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
2213 dml2_printf("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
2214 dml2_printf("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
2215 #endif
2216 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
2217 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
2218 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2219 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
2220 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2221 } else {
2222 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2223 }
2224 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
2225 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2226 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
2227 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2228 } else {
2229 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2230 }
2231 } else if (!dml_is_vertical_rotation(RotationAngle)) {
2232 if (req128_horz_wc_l == 0) {
2233 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2234 } else if (segment_order_horz_contiguous_luma == 0) {
2235 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2236 } else {
2237 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2238 }
2239 if (req128_horz_wc_c == 0) {
2240 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2241 } else if (segment_order_horz_contiguous_chroma == 0) {
2242 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2243 } else {
2244 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2245 }
2246 } else {
2247 if (req128_vert_wc_l == 0) {
2248 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2249 } else if (segment_order_vert_contiguous_luma == 0) {
2250 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2251 } else {
2252 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2253 }
2254 if (req128_vert_wc_c == 0) {
2255 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2256 } else if (segment_order_vert_contiguous_chroma == 0) {
2257 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2258 } else {
2259 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2260 }
2261 }
2262
2263 if (*RequestLuma == dml2_core_internal_request_type_256_bytes) {
2264 *MaxUncompressedBlockLuma = 256;
2265 *MaxCompressedBlockLuma = 256;
2266 *IndependentBlockLuma = 0;
2267 } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) {
2268 *MaxUncompressedBlockLuma = 256;
2269 *MaxCompressedBlockLuma = 128;
2270 *IndependentBlockLuma = 128;
2271 } else {
2272 *MaxUncompressedBlockLuma = 256;
2273 *MaxCompressedBlockLuma = 64;
2274 *IndependentBlockLuma = 64;
2275 }
2276
2277 if (*RequestChroma == dml2_core_internal_request_type_256_bytes) {
2278 *MaxUncompressedBlockChroma = 256;
2279 *MaxCompressedBlockChroma = 256;
2280 *IndependentBlockChroma = 0;
2281 } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) {
2282 *MaxUncompressedBlockChroma = 256;
2283 *MaxCompressedBlockChroma = 128;
2284 *IndependentBlockChroma = 128;
2285 } else {
2286 *MaxUncompressedBlockChroma = 256;
2287 *MaxCompressedBlockChroma = 64;
2288 *IndependentBlockChroma = 64;
2289 }
2290
2291 if (DCCEnabled != true || BytePerPixelC == 0) {
2292 *MaxUncompressedBlockChroma = 0;
2293 *MaxCompressedBlockChroma = 0;
2294 *IndependentBlockChroma = 0;
2295 }
2296
2297 if (DCCEnabled != true) {
2298 *MaxUncompressedBlockLuma = 0;
2299 *MaxCompressedBlockLuma = 0;
2300 *IndependentBlockLuma = 0;
2301 }
2302
2303 #ifdef __DML_VBA_DEBUG__
2304 dml2_printf("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
2305 dml2_printf("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
2306 dml2_printf("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
2307 dml2_printf("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
2308 dml2_printf("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
2309 dml2_printf("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
2310 #endif
2311
2312 }
2313
calculate_mcache_row_bytes(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_row_bytes_params * p)2314 static void calculate_mcache_row_bytes(
2315 struct dml2_core_internal_scratch *scratch,
2316 struct dml2_core_calcs_calculate_mcache_row_bytes_params *p)
2317 {
2318 unsigned int vmpg_bytes = 0;
2319 unsigned int blk_bytes = 0;
2320 float meta_per_mvmpg_per_channel = 0;
2321 unsigned int est_blk_per_vmpg = 2;
2322 unsigned int mvmpg_per_row_ub = 0;
2323 unsigned int full_vp_width_mvmpg_aligned = 0;
2324 unsigned int full_vp_height_mvmpg_aligned = 0;
2325 unsigned int meta_per_mvmpg_per_channel_ub = 0;
2326 unsigned int mvmpg_per_mcache;
2327
2328 #ifdef __DML_VBA_DEBUG__
2329 dml2_printf("DML::%s: num_chans = %u\n", __func__, p->num_chans);
2330 dml2_printf("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes);
2331 dml2_printf("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes);
2332 dml2_printf("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes);
2333 dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
2334 dml2_printf("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes);
2335 dml2_printf("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary);
2336 dml2_printf("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode);
2337 dml2_printf("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x);
2338 dml2_printf("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y);
2339 dml2_printf("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width);
2340 dml2_printf("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height);
2341 dml2_printf("DML::%s: blk_width = %u\n", __func__, p->blk_width);
2342 dml2_printf("DML::%s: blk_height = %u\n", __func__, p->blk_height);
2343 dml2_printf("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width);
2344 dml2_printf("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height);
2345 dml2_printf("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes);
2346 #endif
2347 DML2_ASSERT(p->mcache_line_size_bytes != 0);
2348 DML2_ASSERT(p->mcache_size_bytes != 0);
2349
2350 *p->mvmpg_width = 0;
2351 *p->mvmpg_height = 0;
2352
2353 if (p->full_vp_height == 0 && p->full_vp_width == 0) {
2354 *p->num_mcaches = 0;
2355 *p->mcache_row_bytes = 0;
2356 *p->mcache_row_bytes_per_channel = 0;
2357 } else {
2358 blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode);
2359
2360 // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size
2361 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
2362
2363 //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height.
2364 // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block.
2365 // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end.
2366 *p->mvmpg_width = p->blk_width;
2367 *p->mvmpg_height = p->blk_height;
2368 if (p->gpuvm_enable) {
2369 if (vmpg_bytes >= blk_bytes) {
2370 *p->mvmpg_width = p->vmpg_width;
2371 *p->mvmpg_height = p->vmpg_height;
2372 } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) {
2373 dml2_printf("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__);
2374 DML2_ASSERT(0);
2375 }
2376 }
2377
2378 //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c
2379 full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width));
2380 full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height));
2381
2382 *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned;
2383
2384 //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes.
2385 if (!p->surf_vert) { //horizontal access
2386 if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes)
2387 *p->meta_row_width_ub = full_vp_width_mvmpg_aligned;
2388 else
2389 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width;
2390 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width;
2391 } else { //vertical access
2392 if (p->vp_stationary == 1)
2393 *p->meta_row_width_ub = full_vp_height_mvmpg_aligned;
2394 else
2395 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height;
2396 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height;
2397 }
2398
2399 if (p->gpuvm_enable) {
2400 meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans;
2401
2402 //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic
2403 if (p->surf_vert && vmpg_bytes > blk_bytes) {
2404 meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans;
2405 }
2406
2407 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom
2408 } else {
2409 meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans;
2410
2411 if (!p->surf_vert)
2412 *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0;
2413 else
2414 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel));
2415 }
2416
2417 meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes);
2418
2419 //but for 4KB vmpg with 64KB tile blk
2420 if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096))
2421 meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub;
2422
2423 // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes,
2424 // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes.
2425 if (p->gpuvm_enable || p->surf_vert) {
2426 *p->mcache_row_bytes_per_channel = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub;
2427 *p->mcache_row_bytes = *p->mcache_row_bytes_per_channel * p->num_chans;
2428 } else { // horizontal and gpuvm disable
2429 *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256;
2430 if (p->mcache_line_size_bytes != 0)
2431 *p->mcache_row_bytes_per_channel = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes);
2432 }
2433
2434 *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref
2435 if (p->mcache_size_bytes != 0)
2436 *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes_per_channel / p->mcache_size_bytes, 1);
2437
2438 mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub;
2439 *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1);
2440
2441 #ifdef __DML_VBA_DEBUG__
2442 dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
2443 dml2_printf("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes);
2444 dml2_printf("DML::%s: blk_bytes = %u\n", __func__, blk_bytes);
2445 dml2_printf("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel);
2446 dml2_printf("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub);
2447 dml2_printf("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub);
2448 dml2_printf("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width);
2449 dml2_printf("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height);
2450 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor);
2451 dml2_printf("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor);
2452 #endif
2453 }
2454
2455 #ifdef __DML_VBA_DEBUG__
2456 dml2_printf("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
2457 dml2_printf("DML::%s: mcache_row_bytes_per_channel = %u\n", __func__, *p->mcache_row_bytes_per_channel);
2458 dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
2459 #endif
2460 DML2_ASSERT(*p->num_mcaches > 0);
2461 }
2462
calculate_mcache_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_setting_params * p)2463 static void calculate_mcache_setting(
2464 struct dml2_core_internal_scratch *scratch,
2465 struct dml2_core_calcs_calculate_mcache_setting_params *p)
2466 {
2467 unsigned int n;
2468
2469 struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals;
2470 memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals));
2471
2472 *p->num_mcaches_l = 0;
2473 *p->mcache_row_bytes_l = 0;
2474 *p->mcache_row_bytes_per_channel_l = 0;
2475 *p->dcc_dram_bw_nom_overhead_factor_l = 1.0;
2476 *p->dcc_dram_bw_pref_overhead_factor_l = 1.0;
2477
2478 *p->num_mcaches_c = 0;
2479 *p->mcache_row_bytes_c = 0;
2480 *p->mcache_row_bytes_per_channel_c = 0;
2481 *p->dcc_dram_bw_nom_overhead_factor_c = 1.0;
2482 *p->dcc_dram_bw_pref_overhead_factor_c = 1.0;
2483
2484 *p->mall_comb_mcache_l = 0;
2485 *p->mall_comb_mcache_c = 0;
2486 *p->lc_comb_mcache = 0;
2487
2488 if (!p->dcc_enable)
2489 return;
2490
2491 l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha;
2492
2493 l->l_p.num_chans = p->num_chans;
2494 l->l_p.mem_word_bytes = p->mem_word_bytes;
2495 l->l_p.mcache_size_bytes = p->mcache_size_bytes;
2496 l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
2497 l->l_p.gpuvm_enable = p->gpuvm_enable;
2498 l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
2499 l->l_p.surf_vert = p->surf_vert;
2500 l->l_p.vp_stationary = p->vp_stationary;
2501 l->l_p.tiling_mode = p->tiling_mode;
2502 l->l_p.vp_start_x = p->vp_start_x_l;
2503 l->l_p.vp_start_y = p->vp_start_y_l;
2504 l->l_p.full_vp_width = p->full_vp_width_l;
2505 l->l_p.full_vp_height = p->full_vp_height_l;
2506 l->l_p.blk_width = p->blk_width_l;
2507 l->l_p.blk_height = p->blk_height_l;
2508 l->l_p.vmpg_width = p->vmpg_width_l;
2509 l->l_p.vmpg_height = p->vmpg_height_l;
2510 l->l_p.full_swath_bytes = p->full_swath_bytes_l;
2511 l->l_p.bytes_per_pixel = p->bytes_per_pixel_l;
2512
2513 // output
2514 l->l_p.num_mcaches = p->num_mcaches_l;
2515 l->l_p.mcache_row_bytes = p->mcache_row_bytes_l;
2516 l->l_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_l;
2517 l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l;
2518 l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l;
2519 l->l_p.mvmpg_width = &l->mvmpg_width_l;
2520 l->l_p.mvmpg_height = &l->mvmpg_height_l;
2521 l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l;
2522 l->l_p.meta_row_width_ub = &l->meta_row_width_l;
2523 l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l;
2524
2525 calculate_mcache_row_bytes(scratch, &l->l_p);
2526 DML2_ASSERT(*p->num_mcaches_l > 0);
2527
2528 if (l->is_dual_plane) {
2529 l->c_p.num_chans = p->num_chans;
2530 l->c_p.mem_word_bytes = p->mem_word_bytes;
2531 l->c_p.mcache_size_bytes = p->mcache_size_bytes;
2532 l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
2533 l->c_p.gpuvm_enable = p->gpuvm_enable;
2534 l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
2535 l->c_p.surf_vert = p->surf_vert;
2536 l->c_p.vp_stationary = p->vp_stationary;
2537 l->c_p.tiling_mode = p->tiling_mode;
2538 l->c_p.vp_start_x = p->vp_start_x_c;
2539 l->c_p.vp_start_y = p->vp_start_y_c;
2540 l->c_p.full_vp_width = p->full_vp_width_c;
2541 l->c_p.full_vp_height = p->full_vp_height_c;
2542 l->c_p.blk_width = p->blk_width_c;
2543 l->c_p.blk_height = p->blk_height_c;
2544 l->c_p.vmpg_width = p->vmpg_width_c;
2545 l->c_p.vmpg_height = p->vmpg_height_c;
2546 l->c_p.full_swath_bytes = p->full_swath_bytes_c;
2547 l->c_p.bytes_per_pixel = p->bytes_per_pixel_c;
2548
2549 // output
2550 l->c_p.num_mcaches = p->num_mcaches_c;
2551 l->c_p.mcache_row_bytes = p->mcache_row_bytes_c;
2552 l->c_p.mcache_row_bytes_per_channel = p->mcache_row_bytes_per_channel_c;
2553 l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c;
2554 l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c;
2555 l->c_p.mvmpg_width = &l->mvmpg_width_c;
2556 l->c_p.mvmpg_height = &l->mvmpg_height_c;
2557 l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c;
2558 l->c_p.meta_row_width_ub = &l->meta_row_width_c;
2559 l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c;
2560
2561 calculate_mcache_row_bytes(scratch, &l->c_p);
2562 DML2_ASSERT(*p->num_mcaches_c > 0);
2563 }
2564
2565 // Sharing for iMALL access
2566 l->mcache_remainder_l = *p->mcache_row_bytes_per_channel_l % p->mcache_size_bytes;
2567 l->mcache_remainder_c = *p->mcache_row_bytes_per_channel_c % p->mcache_size_bytes;
2568 l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l;
2569 l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c;
2570
2571 if (p->imall_enable) {
2572 *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes);
2573
2574 if (l->is_dual_plane)
2575 *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes);
2576 }
2577
2578 if (!p->surf_vert) // horizonatal access
2579 l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2;
2580 else // vertical access
2581 l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2;
2582
2583 // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c:
2584 if (*p->num_mcaches_l) {
2585 l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l;
2586 }
2587 if (l->is_dual_plane) {
2588 l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c;
2589
2590 /* if either remainder is 0, then mcache sharing is not needed or not possible due to full utilization */
2591 if (l->mcache_remainder_l && l->mcache_remainder_c) {
2592 if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
2593 l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) +
2594 (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1)));
2595 }
2596 *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c);
2597 }
2598 }
2599
2600 #ifdef __DML_VBA_DEBUG__
2601 dml2_printf("DML::%s: imall_enable = %u\n", __func__, p->imall_enable);
2602 dml2_printf("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane);
2603 dml2_printf("DML::%s: surf_vert = %u\n", __func__, p->surf_vert);
2604 dml2_printf("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l);
2605 dml2_printf("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l);
2606 dml2_printf("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l);
2607 dml2_printf("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l);
2608 dml2_printf("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l);
2609 dml2_printf("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l);
2610 dml2_printf("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l);
2611
2612 if (l->is_dual_plane) {
2613 dml2_printf("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c);
2614 dml2_printf("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c);
2615 dml2_printf("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c);
2616 dml2_printf("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor);
2617 dml2_printf("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c);
2618 dml2_printf("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c);
2619 dml2_printf("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c);
2620 dml2_printf("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c);
2621 dml2_printf("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size);
2622 dml2_printf("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache);
2623 }
2624 #endif
2625 // calculate split_coordinate
2626 l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l;
2627 l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c;
2628
2629 for (n = 0; n < *p->num_mcaches_l - 1; n++) {
2630 p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l;
2631 }
2632 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
2633
2634 if (l->is_dual_plane) {
2635 for (n = 0; n < *p->num_mcaches_c - 1; n++) {
2636 p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c;
2637 }
2638 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
2639 }
2640 #ifdef __DML_VBA_DEBUG__
2641 for (n = 0; n < *p->num_mcaches_l; n++)
2642 dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
2643
2644 if (l->is_dual_plane) {
2645 for (n = 0; n < *p->num_mcaches_c; n++)
2646 dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
2647 }
2648 #endif
2649
2650 // Luma/Chroma combine in the last mcache
2651 // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary
2652 if (*p->lc_comb_mcache && l->is_dual_plane) {
2653 for (n = 0; n < *p->num_mcaches_l - 1; n++)
2654 p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l;
2655 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
2656
2657 for (n = 0; n < *p->num_mcaches_c - 1; n++)
2658 p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c;
2659 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
2660
2661 #ifdef __DML_VBA_DEBUG__
2662 for (n = 0; n < *p->num_mcaches_l; n++)
2663 dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
2664
2665 for (n = 0; n < *p->num_mcaches_c; n++)
2666 dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
2667 #endif
2668 }
2669
2670 *p->mcache_shift_granularity_l = l->mvmpg_access_width_l;
2671 *p->mcache_shift_granularity_c = l->mvmpg_access_width_c;
2672 }
2673
calculate_mall_bw_overhead_factor(double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes)2674 static void calculate_mall_bw_overhead_factor(
2675 double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref
2676 double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref
2677
2678 // input
2679 const struct dml2_display_cfg *display_cfg,
2680 unsigned int num_active_planes)
2681 {
2682 for (unsigned int k = 0; k < num_active_planes; ++k) {
2683 mall_prefetch_sdp_overhead_factor[k] = 1.0;
2684 mall_prefetch_dram_overhead_factor[k] = 1.0;
2685
2686 // SDP - on the return side
2687 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return
2688 mall_prefetch_sdp_overhead_factor[k] = 1.25;
2689 else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
2690 mall_prefetch_sdp_overhead_factor[k] = 0.25;
2691
2692 // DRAM
2693 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
2694 mall_prefetch_dram_overhead_factor[k] = 2.0;
2695
2696 #ifdef __DML_VBA_DEBUG__
2697 dml2_printf("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]);
2698 dml2_printf("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]);
2699 #endif
2700 }
2701 }
2702
dml_get_return_bandwidth_available(const struct dml2_soc_bb * soc,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool is_avg_bw,bool is_hvm_en,bool is_hvm_only,double dcfclk_mhz,double fclk_mhz,double dram_bw_mbps)2703 static double dml_get_return_bandwidth_available(
2704 const struct dml2_soc_bb *soc,
2705 enum dml2_core_internal_soc_state_type state_type,
2706 enum dml2_core_internal_bw_type bw_type,
2707 bool is_avg_bw,
2708 bool is_hvm_en,
2709 bool is_hvm_only,
2710 double dcfclk_mhz,
2711 double fclk_mhz,
2712 double dram_bw_mbps)
2713 {
2714 double return_bw_mbps = 0.;
2715 double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz;
2716 double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes;
2717 double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes;
2718
2719 double derate_sdp_factor;
2720 double derate_fabric_factor;
2721 double derate_dram_factor;
2722
2723 double derate_sdp_bandwidth;
2724 double derate_fabric_bandwidth;
2725 double derate_dram_bandwidth;
2726
2727 if (is_avg_bw) {
2728 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
2729 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0;
2730 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0;
2731 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0;
2732 } else { // just assume sys_active
2733 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0;
2734 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0;
2735 derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0;
2736 }
2737 } else { // urgent bw
2738 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
2739 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0;
2740 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0;
2741 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
2742
2743 if (is_hvm_en) {
2744 if (is_hvm_only)
2745 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0;
2746 else
2747 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0;
2748 } else {
2749 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
2750 }
2751 } else { // just assume sys_active
2752 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0;
2753 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0;
2754
2755 if (is_hvm_en) {
2756 if (is_hvm_only)
2757 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0;
2758 else
2759 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0;
2760 } else {
2761 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0;
2762 }
2763 }
2764 }
2765
2766 derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor;
2767 derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor;
2768 derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor;
2769
2770 if (bw_type == dml2_core_internal_bw_sdp)
2771 return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth);
2772 else // dml2_core_internal_bw_dram
2773 return_bw_mbps = derate_dram_bandwidth;
2774
2775 #ifdef __DML_VBA_DEBUG__
2776 dml2_printf("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw);
2777 dml2_printf("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en);
2778 dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
2779 dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
2780 dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
2781 dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
2782 dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
2783 dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
2784 dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
2785 dml2_printf("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth);
2786 dml2_printf("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor);
2787 dml2_printf("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor);
2788 dml2_printf("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor);
2789 dml2_printf("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps);
2790 #endif
2791 return return_bw_mbps;
2792 }
2793
calculate_bandwidth_available(double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_min[dml2_core_internal_soc_state_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],const struct dml2_soc_bb * soc,bool HostVMEnable,double dcfclk_mhz,double fclk_mhz,double dram_bw_mbps)2794 static noinline_for_stack void calculate_bandwidth_available(
2795 double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],
2796 double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2797 double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM
2798 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2799 double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],
2800 double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],
2801
2802 const struct dml2_soc_bb *soc,
2803 bool HostVMEnable,
2804 double dcfclk_mhz,
2805 double fclk_mhz,
2806 double dram_bw_mbps)
2807 {
2808 unsigned int n, m;
2809
2810 dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
2811 dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
2812 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps);
2813
2814 // Calculate all the bandwidth availabe
2815 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2816 for (n = 0; n < dml2_core_internal_bw_max; n++) {
2817 avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc,
2818 m, // soc_state
2819 n, // bw_type
2820 1, // avg_bw
2821 HostVMEnable,
2822 0, // hvm_only
2823 dcfclk_mhz,
2824 fclk_mhz,
2825 dram_bw_mbps);
2826
2827 urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2828
2829
2830 #ifdef __DML_VBA_DEBUG__
2831 dml2_printf("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]);
2832 dml2_printf("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]);
2833 #endif
2834
2835 // urg_bandwidth_available_vm_only is indexed by soc_state
2836 if (n == dml2_core_internal_bw_dram) {
2837 urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2838 urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2839 }
2840 }
2841
2842 avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
2843 urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
2844
2845 #ifdef __DML_VBA_DEBUG__
2846 dml2_printf("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]);
2847 dml2_printf("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]);
2848 dml2_printf("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]);
2849 #endif
2850 }
2851 }
2852
calculate_avg_bandwidth_required(double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double mall_prefetch_dram_overhead_factor[],double mall_prefetch_sdp_overhead_factor[])2853 static void calculate_avg_bandwidth_required(
2854 double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2855
2856 // input
2857 const struct dml2_display_cfg *display_cfg,
2858 unsigned int num_active_planes,
2859 double ReadBandwidthLuma[],
2860 double ReadBandwidthChroma[],
2861 double cursor_bw[],
2862 double dcc_dram_bw_nom_overhead_factor_p0[],
2863 double dcc_dram_bw_nom_overhead_factor_p1[],
2864 double mall_prefetch_dram_overhead_factor[],
2865 double mall_prefetch_sdp_overhead_factor[])
2866 {
2867 unsigned int n, m, k;
2868 double sdp_overhead_factor;
2869 double dram_overhead_factor_p0;
2870 double dram_overhead_factor_p1;
2871
2872 // Average BW support check
2873 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2874 for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram
2875 avg_bandwidth_required[m][n] = 0;
2876 }
2877 }
2878
2879 // SysActive and SVP Prefetch AVG bandwidth Check
2880 for (k = 0; k < num_active_planes; ++k) {
2881 #ifdef __DML_VBA_DEBUG__
2882 dml2_printf("DML::%s: plane %0d\n", __func__, k);
2883 dml2_printf("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]);
2884 dml2_printf("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]);
2885 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]);
2886 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]);
2887 dml2_printf("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]);
2888 dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]);
2889 #endif
2890
2891 sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k];
2892 dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k];
2893 dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k];
2894
2895 // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation?
2896 // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes
2897 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
2898 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
2899 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
2900 }
2901 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
2902 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
2903
2904 #ifdef __DML_VBA_DEBUG__
2905 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
2906 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
2907 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
2908 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
2909 #endif
2910 }
2911 }
2912
CalculateVMRowAndSwath(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateVMRowAndSwath_params * p)2913 static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
2914 struct dml2_core_calcs_CalculateVMRowAndSwath_params *p)
2915 {
2916 struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals;
2917
2918 s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels);
2919
2920 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2921 if (p->display_cfg->gpuvm_enable == true) {
2922 p->vm_group_bytes[k] = 512;
2923 p->dpte_group_bytes[k] = 512;
2924 } else {
2925 p->vm_group_bytes[k] = 0;
2926 p->dpte_group_bytes[k] = 0;
2927 }
2928
2929 if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) {
2930 if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) {
2931 s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
2932 s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
2933 } else {
2934 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
2935 s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
2936 }
2937
2938 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
2939 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
2940 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
2941 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC;
2942 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC;
2943 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
2944 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
2945 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC;
2946 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
2947 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k];
2948 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC;
2949 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC;
2950 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC;
2951 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
2952 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
2953 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
2954 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k];
2955 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC;
2956 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC;
2957 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC;
2958 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
2959 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC;
2960 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
2961
2962 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k];
2963 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k];
2964 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k];
2965 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k];
2966 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k];
2967 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k];
2968 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k];
2969 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k];
2970 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k];
2971 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k];
2972 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k];
2973 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k];
2974 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k];
2975 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k];
2976
2977 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k];
2978 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k];
2979 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k];
2980 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k];
2981 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k];
2982 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k];
2983
2984 s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
2985
2986 p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2987 p->myPipe[k].VRatioChroma,
2988 p->myPipe[k].VTapsChroma,
2989 p->myPipe[k].InterlaceEnable,
2990 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
2991 p->myPipe[k].SwathHeightC,
2992 p->myPipe[k].RotationAngle,
2993 p->myPipe[k].mirrored,
2994 p->myPipe[k].ViewportStationary,
2995 p->SwathWidthC[k],
2996 p->myPipe[k].ViewportHeightC,
2997 p->myPipe[k].ViewportXStartC,
2998 p->myPipe[k].ViewportYStartC,
2999
3000 // Output
3001 &p->VInitPreFillC[k],
3002 &p->MaxNumSwathC[k]);
3003 } else {
3004 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
3005 s->PTEBufferSizeInRequestsForChroma[k] = 0;
3006 s->PixelPTEBytesPerRowC[k] = 0;
3007 s->PixelPTEBytesPerRowStorageC[k] = 0;
3008 s->vm_bytes_c = 0;
3009 p->MaxNumSwathC[k] = 0;
3010 p->PrefetchSourceLinesC[k] = 0;
3011 s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
3012 s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
3013 s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
3014 }
3015
3016 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
3017 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
3018 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
3019 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY;
3020 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY;
3021 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
3022 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
3023 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY;
3024 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
3025 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k];
3026 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight;
3027 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart;
3028 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart;
3029 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
3030 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
3031 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
3032 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k];
3033 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY;
3034 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY;
3035 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY;
3036 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
3037 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY;
3038 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
3039
3040 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k];
3041 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k];
3042 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k];
3043 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k];
3044 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k];
3045 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k];
3046 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k];
3047 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k];
3048 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k];
3049 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k];
3050 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k];
3051 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k];
3052 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k];
3053 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k];
3054
3055 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k];
3056 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k];
3057 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k];
3058 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k];
3059 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k];
3060 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k];
3061
3062 s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
3063
3064 p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
3065 p->myPipe[k].VRatio,
3066 p->myPipe[k].VTaps,
3067 p->myPipe[k].InterlaceEnable,
3068 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
3069 p->myPipe[k].SwathHeightY,
3070 p->myPipe[k].RotationAngle,
3071 p->myPipe[k].mirrored,
3072 p->myPipe[k].ViewportStationary,
3073 p->SwathWidthY[k],
3074 p->myPipe[k].ViewportHeight,
3075 p->myPipe[k].ViewportXStart,
3076 p->myPipe[k].ViewportYStart,
3077
3078 // Output
3079 &p->VInitPreFillY[k],
3080 &p->MaxNumSwathY[k]);
3081
3082 #ifdef __DML_VBA_DEBUG__
3083 dml2_printf("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l);
3084 dml2_printf("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c);
3085 dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]);
3086 dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]);
3087 #endif
3088 p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels);
3089 p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k];
3090 p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k];
3091 p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k];
3092
3093 #ifdef __DML_VBA_DEBUG__
3094 dml2_printf("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]);
3095 dml2_printf("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]);
3096 #endif
3097 if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
3098 p->PTEBufferSizeNotExceeded[k] = true;
3099 } else {
3100 p->PTEBufferSizeNotExceeded[k] = false;
3101 }
3102
3103 s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
3104 s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
3105 #ifdef __DML_VBA_DEBUG__
3106 if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) {
3107 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
3108 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
3109 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
3110 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
3111 dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
3112 dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
3113 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
3114
3115 dml2_printf("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
3116 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
3117 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
3118 dml2_printf("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
3119 }
3120 #endif
3121 }
3122
3123 CalculateMALLUseForStaticScreen(
3124 p->display_cfg,
3125 p->NumberOfActiveSurfaces,
3126 p->MALLAllocatedForDCN,
3127 p->SurfaceSizeInMALL,
3128 s->one_row_per_frame_fits_in_buffer,
3129 // Output
3130 p->is_using_mall_for_ss);
3131
3132 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3133 if (p->display_cfg->gpuvm_enable) {
3134 if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) {
3135 p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value;
3136 }
3137 p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
3138 dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64);
3139 p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12);
3140 } else {
3141 p->PTE_BUFFER_MODE[k] = 0;
3142 p->BIGK_FRAGMENT_SIZE[k] = 0;
3143 }
3144 }
3145
3146 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3147 p->DCCMetaBufferSizeNotExceeded[k] = true;
3148 #ifdef __DML_VBA_DEBUG__
3149 dml2_printf("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
3150 dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]);
3151 #endif
3152 p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
3153 (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle));
3154
3155 p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame);
3156
3157 if (p->use_one_row_for_frame[k]) {
3158 p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
3159 p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
3160 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
3161 p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
3162 p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
3163 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
3164 p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
3165 }
3166
3167 if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) {
3168 p->DCCMetaBufferSizeNotExceeded[k] = true;
3169 } else {
3170 p->DCCMetaBufferSizeNotExceeded[k] = false;
3171
3172 #ifdef __DML_VBA_DEBUG__
3173 dml2_printf("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]);
3174 dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes);
3175 dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
3176 #endif
3177 }
3178
3179 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
3180 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
3181 p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
3182 p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k];
3183 p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k];
3184
3185 // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs
3186 if (p->use_one_row_for_frame[k])
3187 p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
3188
3189 CalculateRowBandwidth(
3190 p->display_cfg->gpuvm_enable,
3191 p->use_one_row_for_frame[k],
3192 p->myPipe[k].SourcePixelFormat,
3193 p->myPipe[k].VRatio,
3194 p->myPipe[k].VRatioChroma,
3195 p->myPipe[k].DCCEnable,
3196 p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
3197 s->PixelPTEBytesPerRowY[k],
3198 s->PixelPTEBytesPerRowC[k],
3199 p->dpte_row_height_luma[k],
3200 p->dpte_row_height_chroma[k],
3201
3202 p->mrq_present,
3203 p->meta_row_bytes_per_row_ub_l[k],
3204 p->meta_row_bytes_per_row_ub_c[k],
3205 p->meta_row_height_luma[k],
3206 p->meta_row_height_chroma[k],
3207
3208 // Output
3209 &p->dpte_row_bw[k],
3210 &p->meta_row_bw[k]);
3211 #ifdef __DML_VBA_DEBUG__
3212 dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
3213 dml2_printf("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
3214 dml2_printf("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config);
3215 dml2_printf("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
3216 dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
3217 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
3218 dml2_printf("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
3219 dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
3220 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
3221 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
3222 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
3223 dml2_printf("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable);
3224 dml2_printf("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
3225 dml2_printf("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
3226 #endif
3227 }
3228 }
3229
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int urgent_ramp_uclk_cycles,unsigned int df_qos_response_time_fclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_urgent_ramp_latency_margin,double fabric_max_transport_latency_margin)3230 static double CalculateUrgentLatency(
3231 double UrgentLatencyPixelDataOnly,
3232 double UrgentLatencyPixelMixedWithVMData,
3233 double UrgentLatencyVMDataOnly,
3234 bool DoUrgentLatencyAdjustment,
3235 double UrgentLatencyAdjustmentFabricClockComponent,
3236 double UrgentLatencyAdjustmentFabricClockReference,
3237 double FabricClock,
3238 double uclk_freq_mhz,
3239 enum dml2_qos_param_type qos_type,
3240 unsigned int urgent_ramp_uclk_cycles,
3241 unsigned int df_qos_response_time_fclk_cycles,
3242 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
3243 unsigned int mall_overhead_fclk_cycles,
3244 double umc_urgent_ramp_latency_margin,
3245 double fabric_max_transport_latency_margin)
3246 {
3247 double urgent_latency = 0;
3248 if (qos_type == dml2_qos_param_type_dcn4x) {
3249 urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock
3250 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0)
3251 + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0);
3252 } else {
3253 urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
3254 if (DoUrgentLatencyAdjustment == true) {
3255 urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
3256 }
3257 }
3258 #ifdef __DML_VBA_DEBUG__
3259 if (qos_type == dml2_qos_param_type_dcn4x) {
3260 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
3261 dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles);
3262 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3263 dml2_printf("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin);
3264 } else {
3265 dml2_printf("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly);
3266 dml2_printf("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData);
3267 dml2_printf("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly);
3268 dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent);
3269 dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference);
3270 }
3271 dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3272 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency);
3273 #endif
3274 return urgent_latency;
3275 }
3276
CalculateTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int trip_to_memory_uclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)3277 static double CalculateTripToMemory(
3278 double UrgLatency,
3279 double FabricClock,
3280 double uclk_freq_mhz,
3281 enum dml2_qos_param_type qos_type,
3282 unsigned int trip_to_memory_uclk_cycles,
3283 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
3284 unsigned int mall_overhead_fclk_cycles,
3285 double umc_max_latency_margin,
3286 double fabric_max_transport_latency_margin)
3287 {
3288 double trip_to_memory_us;
3289 if (qos_type == dml2_qos_param_type_dcn4x) {
3290 trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock
3291 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
3292 + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
3293 } else {
3294 trip_to_memory_us = UrgLatency;
3295 }
3296
3297 #ifdef __DML_VBA_DEBUG__
3298 if (qos_type == dml2_qos_param_type_dcn4x) {
3299 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
3300 dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles);
3301 dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles);
3302 dml2_printf("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles);
3303 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3304 dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3305 dml2_printf("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin);
3306 dml2_printf("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin);
3307 } else {
3308 dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
3309 }
3310 dml2_printf("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us);
3311 #endif
3312
3313
3314 return trip_to_memory_us;
3315 }
3316
CalculateMetaTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int meta_trip_to_memory_uclk_cycles,unsigned int meta_trip_to_memory_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)3317 static double CalculateMetaTripToMemory(
3318 double UrgLatency,
3319 double FabricClock,
3320 double uclk_freq_mhz,
3321 enum dml2_qos_param_type qos_type,
3322 unsigned int meta_trip_to_memory_uclk_cycles,
3323 unsigned int meta_trip_to_memory_fclk_cycles,
3324 double umc_max_latency_margin,
3325 double fabric_max_transport_latency_margin)
3326 {
3327 double meta_trip_to_memory_us;
3328 if (qos_type == dml2_qos_param_type_dcn4x) {
3329 meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
3330 + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
3331 } else {
3332 meta_trip_to_memory_us = UrgLatency;
3333 }
3334
3335 #ifdef __DML_VBA_DEBUG__
3336 if (qos_type == dml2_qos_param_type_dcn4x) {
3337 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
3338 dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles);
3339 dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles);
3340 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3341 } else {
3342 dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
3343 }
3344 dml2_printf("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us);
3345 #endif
3346
3347
3348 return meta_trip_to_memory_us;
3349 }
3350
calculate_cursor_req_attributes(unsigned int cursor_width,unsigned int cursor_bpp,unsigned int * cursor_lines_per_chunk,unsigned int * cursor_bytes_per_line,unsigned int * cursor_bytes_per_chunk,unsigned int * cursor_bytes)3351 static void calculate_cursor_req_attributes(
3352 unsigned int cursor_width,
3353 unsigned int cursor_bpp,
3354
3355 // output
3356 unsigned int *cursor_lines_per_chunk,
3357 unsigned int *cursor_bytes_per_line,
3358 unsigned int *cursor_bytes_per_chunk,
3359 unsigned int *cursor_bytes)
3360 {
3361 unsigned int cursor_pitch = 0;
3362 unsigned int cursor_bytes_per_req = 0;
3363 unsigned int cursor_width_bytes = 0;
3364 unsigned int cursor_height = 0;
3365
3366 //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply.
3367 //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B
3368 //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width
3369 if (cursor_bpp == 2)
3370 cursor_pitch = 256;
3371 else
3372 cursor_pitch = (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1);
3373
3374 //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows.
3375
3376 cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1);
3377 if (cursor_width_bytes <= 64)
3378 cursor_bytes_per_req = 64;
3379 else if (cursor_width_bytes <= 128)
3380 cursor_bytes_per_req = 128;
3381 else
3382 cursor_bytes_per_req = 256;
3383
3384 //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line.
3385 *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req);
3386
3387 //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines.
3388 if (cursor_bpp == 2) {
3389 *cursor_lines_per_chunk = 16;
3390 } else if (cursor_bpp == 32) {
3391 if (cursor_width <= 32)
3392 *cursor_lines_per_chunk = 16;
3393 else if (cursor_width <= 64)
3394 *cursor_lines_per_chunk = 8;
3395 else if (cursor_width <= 128)
3396 *cursor_lines_per_chunk = 4;
3397 else
3398 *cursor_lines_per_chunk = 2;
3399 } else if (cursor_bpp == 64) {
3400 if (cursor_width <= 16)
3401 *cursor_lines_per_chunk = 16;
3402 else if (cursor_width <= 32)
3403 *cursor_lines_per_chunk = 8;
3404 else if (cursor_width <= 64)
3405 *cursor_lines_per_chunk = 4;
3406 else if (cursor_width <= 128)
3407 *cursor_lines_per_chunk = 2;
3408 else
3409 *cursor_lines_per_chunk = 1;
3410 } else {
3411 if (cursor_width > 0) {
3412 dml2_printf("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
3413 DML2_ASSERT(0);
3414 }
3415 }
3416
3417 *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk;
3418
3419 // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize.
3420 // Only cursor_width is provided for worst case sizing so assume that the cursor is square
3421 cursor_height = cursor_width;
3422 *cursor_bytes = *cursor_bytes_per_line * cursor_height;
3423 #ifdef __DML_VBA_DEBUG__
3424 dml2_printf("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp);
3425 dml2_printf("DML::%s: cursor_width = %d\n", __func__, cursor_width);
3426 dml2_printf("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes);
3427 dml2_printf("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req);
3428 dml2_printf("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk);
3429 dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line);
3430 dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk);
3431 dml2_printf("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes);
3432 dml2_printf("DML::%s: cursor_pitch = %d\n", __func__, cursor_pitch);
3433 #endif
3434 }
3435
calculate_cursor_urgent_burst_factor(unsigned int CursorBufferSize,unsigned int CursorWidth,unsigned int cursor_bytes_per_chunk,unsigned int cursor_lines_per_chunk,double LineTime,double UrgentLatency,double * UrgentBurstFactorCursor,bool * NotEnoughUrgentLatencyHiding)3436 static void calculate_cursor_urgent_burst_factor(
3437 unsigned int CursorBufferSize,
3438 unsigned int CursorWidth,
3439 unsigned int cursor_bytes_per_chunk,
3440 unsigned int cursor_lines_per_chunk,
3441 double LineTime,
3442 double UrgentLatency,
3443
3444 double *UrgentBurstFactorCursor,
3445 bool *NotEnoughUrgentLatencyHiding)
3446 {
3447 unsigned int LinesInCursorBuffer = 0;
3448 double CursorBufferSizeInTime = 0;
3449
3450 if (CursorWidth > 0) {
3451 LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk;
3452
3453 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime;
3454 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
3455 *NotEnoughUrgentLatencyHiding = 1;
3456 *UrgentBurstFactorCursor = 1;
3457 } else {
3458 *NotEnoughUrgentLatencyHiding = 0;
3459 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
3460 }
3461
3462 #ifdef __DML_VBA_DEBUG__
3463 dml2_printf("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer);
3464 dml2_printf("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime);
3465 dml2_printf("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize);
3466 dml2_printf("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk);
3467 dml2_printf("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk);
3468 dml2_printf("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor);
3469 dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
3470 #endif
3471
3472 }
3473 }
3474
CalculateUrgentBurstFactor(const struct dml2_plane_parameters * plane_cfg,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)3475 static void CalculateUrgentBurstFactor(
3476 const struct dml2_plane_parameters *plane_cfg,
3477 unsigned int swath_width_luma_ub,
3478 unsigned int swath_width_chroma_ub,
3479 unsigned int SwathHeightY,
3480 unsigned int SwathHeightC,
3481 double LineTime,
3482 double UrgentLatency,
3483 double VRatio,
3484 double VRatioC,
3485 double BytePerPixelInDETY,
3486 double BytePerPixelInDETC,
3487 unsigned int DETBufferSizeY,
3488 unsigned int DETBufferSizeC,
3489 // Output
3490 double *UrgentBurstFactorLuma,
3491 double *UrgentBurstFactorChroma,
3492 bool *NotEnoughUrgentLatencyHiding)
3493 {
3494 double LinesInDETLuma;
3495 double LinesInDETChroma;
3496 double DETBufferSizeInTimeLuma;
3497 double DETBufferSizeInTimeChroma;
3498
3499 *NotEnoughUrgentLatencyHiding = 0;
3500 *UrgentBurstFactorLuma = 0;
3501 *UrgentBurstFactorChroma = 0;
3502
3503 #ifdef __DML_VBA_DEBUG__
3504 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
3505 dml2_printf("DML::%s: VRatioC = %f\n", __func__, VRatioC);
3506 dml2_printf("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY);
3507 dml2_printf("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC);
3508 dml2_printf("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY);
3509 dml2_printf("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3510 dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime);
3511 #endif
3512 DML2_ASSERT(VRatio > 0);
3513
3514 LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
3515
3516 DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
3517 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
3518 *NotEnoughUrgentLatencyHiding = 1;
3519 *UrgentBurstFactorLuma = 1;
3520 } else {
3521 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
3522 }
3523
3524 if (BytePerPixelInDETC > 0) {
3525 LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
3526
3527 DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
3528 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
3529 *NotEnoughUrgentLatencyHiding = 1;
3530 *UrgentBurstFactorChroma = 1;
3531 } else {
3532 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
3533 }
3534 }
3535
3536 #ifdef __DML_VBA_DEBUG__
3537 dml2_printf("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma);
3538 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
3539 dml2_printf("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma);
3540 dml2_printf("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma);
3541 dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
3542 dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
3543 #endif
3544 }
3545
CalculateDCFCLKDeepSleepTdlut(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int DPPPerSurface[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double dispclk,unsigned int tdlut_bytes_to_deliver[],double prefetch_swath_time_us[],double * DCFClkDeepSleep)3546 static void CalculateDCFCLKDeepSleepTdlut(
3547 const struct dml2_display_cfg *display_cfg,
3548 unsigned int NumberOfActiveSurfaces,
3549 unsigned int BytePerPixelY[],
3550 unsigned int BytePerPixelC[],
3551 unsigned int SwathWidthY[],
3552 unsigned int SwathWidthC[],
3553 unsigned int DPPPerSurface[],
3554 double PSCL_THROUGHPUT[],
3555 double PSCL_THROUGHPUT_CHROMA[],
3556 double Dppclk[],
3557 double ReadBandwidthLuma[],
3558 double ReadBandwidthChroma[],
3559 unsigned int ReturnBusWidth,
3560
3561 double dispclk,
3562 unsigned int tdlut_bytes_to_deliver[],
3563 double prefetch_swath_time_us[],
3564
3565 // Output
3566 double *DCFClkDeepSleep)
3567 {
3568 double DisplayPipeLineDeliveryTimeLuma;
3569 double DisplayPipeLineDeliveryTimeChroma;
3570 double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES];
3571 double ReadBandwidth = 0.0;
3572
3573 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3574 double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
3575
3576 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
3577 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz;
3578 } else {
3579 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3580 }
3581 if (BytePerPixelC[k] == 0) {
3582 DisplayPipeLineDeliveryTimeChroma = 0;
3583 } else {
3584 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
3585 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz;
3586 } else {
3587 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3588 }
3589 }
3590
3591 if (BytePerPixelC[k] > 0) {
3592 DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
3593 __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
3594 } else {
3595 DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
3596 }
3597 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16);
3598
3599 // adjust for 3dlut delivery time
3600 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && tdlut_bytes_to_deliver[k] > 0) {
3601 double tdlut_required_deepsleep_dcfclk = (double) tdlut_bytes_to_deliver[k] / 64.0 / prefetch_swath_time_us[k];
3602
3603 dml2_printf("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3604 dml2_printf("DML::%s: k=%d, tdlut_bytes_to_deliver = %d\n", __func__, k, tdlut_bytes_to_deliver[k]);
3605 dml2_printf("DML::%s: k=%d, prefetch_swath_time_us = %f\n", __func__, k, prefetch_swath_time_us[k]);
3606 dml2_printf("DML::%s: k=%d, tdlut_required_deepsleep_dcfclk = %f\n", __func__, k, tdlut_required_deepsleep_dcfclk);
3607
3608 // increase the deepsleep dcfclk to match the original dispclk throughput rate
3609 if (tdlut_required_deepsleep_dcfclk > DCFClkDeepSleepPerSurface[k]) {
3610 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], tdlut_required_deepsleep_dcfclk);
3611 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], dispclk / 4.0);
3612 }
3613 }
3614
3615 #ifdef __DML_VBA_DEBUG__
3616 dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
3617 dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3618 #endif
3619 }
3620
3621 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3622 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
3623 }
3624
3625 *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth);
3626
3627 #ifdef __DML_VBA_DEBUG__
3628 dml2_printf("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__);
3629 dml2_printf("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
3630 dml2_printf("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
3631 dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
3632 #endif
3633
3634 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3635 *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
3636 }
3637
3638 dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
3639 }
3640
CalculateDCFCLKDeepSleep(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int DPPPerSurface[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)3641 static noinline_for_stack void CalculateDCFCLKDeepSleep(
3642 const struct dml2_display_cfg *display_cfg,
3643 unsigned int NumberOfActiveSurfaces,
3644 unsigned int BytePerPixelY[],
3645 unsigned int BytePerPixelC[],
3646 unsigned int SwathWidthY[],
3647 unsigned int SwathWidthC[],
3648 unsigned int DPPPerSurface[],
3649 double PSCL_THROUGHPUT[],
3650 double PSCL_THROUGHPUT_CHROMA[],
3651 double Dppclk[],
3652 double ReadBandwidthLuma[],
3653 double ReadBandwidthChroma[],
3654 unsigned int ReturnBusWidth,
3655
3656 // Output
3657 double *DCFClkDeepSleep)
3658 {
3659 double zero_double[DML2_MAX_PLANES];
3660 unsigned int zero_integer[DML2_MAX_PLANES];
3661
3662 memset(zero_double, 0, DML2_MAX_PLANES * sizeof(double));
3663 memset(zero_integer, 0, DML2_MAX_PLANES * sizeof(unsigned int));
3664
3665 CalculateDCFCLKDeepSleepTdlut(
3666 display_cfg,
3667 NumberOfActiveSurfaces,
3668 BytePerPixelY,
3669 BytePerPixelC,
3670 SwathWidthY,
3671 SwathWidthC,
3672 DPPPerSurface,
3673 PSCL_THROUGHPUT,
3674 PSCL_THROUGHPUT_CHROMA,
3675 Dppclk,
3676 ReadBandwidthLuma,
3677 ReadBandwidthChroma,
3678 ReturnBusWidth,
3679 0,
3680 zero_integer, //tdlut_bytes_to_deliver,
3681 zero_double, //prefetch_swath_time_us,
3682
3683 // Output
3684 DCFClkDeepSleep);
3685 }
3686
CalculateWriteBackDelay(enum dml2_source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)3687 static double CalculateWriteBackDelay(
3688 enum dml2_source_format_class WritebackPixelFormat,
3689 double WritebackHRatio,
3690 double WritebackVRatio,
3691 unsigned int WritebackVTaps,
3692 unsigned int WritebackDestinationWidth,
3693 unsigned int WritebackDestinationHeight,
3694 unsigned int WritebackSourceHeight,
3695 unsigned int HTotal)
3696 {
3697 double CalculateWriteBackDelay;
3698 double Line_length;
3699 double Output_lines_last_notclamped;
3700 double WritebackVInit;
3701
3702 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3703 Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
3704 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0);
3705 if (Output_lines_last_notclamped < 0) {
3706 CalculateWriteBackDelay = 0;
3707 } else {
3708 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3709 }
3710 return CalculateWriteBackDelay;
3711 }
3712
CalculateMaxVStartup(bool ptoi_supported,unsigned int vblank_nom_default_us,const struct dml2_timing_cfg * timing,double write_back_delay_us)3713 static unsigned int CalculateMaxVStartup(
3714 bool ptoi_supported,
3715 unsigned int vblank_nom_default_us,
3716 const struct dml2_timing_cfg *timing,
3717 double write_back_delay_us)
3718 {
3719 unsigned int vblank_size = 0;
3720 unsigned int max_vstartup_lines = 0;
3721
3722 double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000);
3723 unsigned int vblank_actual = timing->v_total - timing->v_active;
3724 unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0);
3725 unsigned int vblank_avail = (timing->vblank_nom == 0) ? vblank_nom_default_in_line : (unsigned int)timing->vblank_nom;
3726
3727 vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail);
3728
3729 if (timing->interlaced && !ptoi_supported)
3730 max_vstartup_lines = (unsigned int)(math_floor2((vblank_size - 1) / 2.0, 1.0));
3731 else
3732 max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0));
3733 #ifdef __DML_VBA_DEBUG__
3734 dml2_printf("DML::%s: VBlankNom = %u\n", __func__, timing->vblank_nom);
3735 dml2_printf("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
3736 dml2_printf("DML::%s: line_time_us = %f\n", __func__, line_time_us);
3737 dml2_printf("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
3738 dml2_printf("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
3739 dml2_printf("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
3740 #endif
3741 max_vstartup_lines = (unsigned int)math_min2(max_vstartup_lines, DML_MAX_VSTARTUP_START);
3742 return max_vstartup_lines;
3743 }
3744
CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params * p)3745 static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch,
3746 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p)
3747 {
3748 unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 };
3749 unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 };
3750 unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 };
3751 unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 };
3752 unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 };
3753 unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 };
3754
3755 unsigned int TotalActiveDPP = 0;
3756 bool NoChromaOrLinear = true;
3757 unsigned int SurfaceDoingUnboundedRequest = 0;
3758 unsigned int DETBufferSizeInKByteForSwathCalculation;
3759
3760 const long TTUFIFODEPTH = 8;
3761 const long MAXIMUMCOMPRESSION = 4;
3762
3763 #ifdef __DML_VBA_DEBUG__
3764 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
3765 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3766 dml2_printf("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
3767 }
3768 #endif
3769 CalculateSwathWidth(
3770 p->display_cfg,
3771 p->ForceSingleDPP,
3772 p->NumberOfActiveSurfaces,
3773 p->ODMMode,
3774 p->BytePerPixY,
3775 p->BytePerPixC,
3776 p->Read256BytesBlockHeightY,
3777 p->Read256BytesBlockHeightC,
3778 p->Read256BytesBlockWidthY,
3779 p->Read256BytesBlockWidthC,
3780 p->surf_linear128_l,
3781 p->surf_linear128_c,
3782 p->DPPPerSurface,
3783
3784 // Output
3785 p->req_per_swath_ub_l,
3786 p->req_per_swath_ub_c,
3787 SwathWidthSingleDPP,
3788 SwathWidthSingleDPPChroma,
3789 p->SwathWidth,
3790 p->SwathWidthChroma,
3791 MaximumSwathHeightY,
3792 MaximumSwathHeightC,
3793 p->swath_width_luma_ub,
3794 p->swath_width_chroma_ub);
3795
3796 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3797 p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
3798 p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
3799 #ifdef __DML_VBA_DEBUG__
3800 dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
3801 dml2_printf("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
3802 dml2_printf("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
3803 dml2_printf("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
3804 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3805 dml2_printf("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
3806 dml2_printf("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
3807 dml2_printf("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
3808 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3809 #endif
3810 if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) {
3811 p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256));
3812 p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256));
3813 }
3814 }
3815
3816 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3817 TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
3818 if (p->DPPPerSurface[k] > 0)
3819 SurfaceDoingUnboundedRequest = k;
3820 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha
3821 || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
3822 NoChromaOrLinear = false;
3823 }
3824 }
3825
3826 *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear);
3827
3828 CalculateDETBufferSize(
3829 &scratch->CalculateDETBufferSize_locals,
3830 p->display_cfg,
3831 p->ForceSingleDPP,
3832 p->NumberOfActiveSurfaces,
3833 *p->UnboundedRequestEnabled,
3834 p->nomDETInKByte,
3835 p->MaxTotalDETInKByte,
3836 p->ConfigReturnBufferSizeInKByte,
3837 p->MinCompressedBufferSizeInKByte,
3838 p->ConfigReturnBufferSegmentSizeInkByte,
3839 p->CompressedBufferSegmentSizeInkByte,
3840 p->ReadBandwidthLuma,
3841 p->ReadBandwidthChroma,
3842 p->full_swath_bytes_l,
3843 p->full_swath_bytes_c,
3844 p->DPPPerSurface,
3845
3846 // Output
3847 p->DETBufferSizeInKByte, // per hubp pipe
3848 p->CompressedBufferSizeInkByte);
3849
3850 #ifdef __DML_VBA_DEBUG__
3851 dml2_printf("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
3852 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
3853 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
3854 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
3855 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
3856 #endif
3857
3858 *p->ViewportSizeSupport = true;
3859 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3860
3861 DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]);
3862 #ifdef __DML_VBA_DEBUG__
3863 dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
3864 #endif
3865 if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
3866 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3867 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3868 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3869 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3870
3871 if (p->surf_linear128_l[k])
3872 p->request_size_bytes_luma[k] = 128;
3873 else
3874 p->request_size_bytes_luma[k] = 256;
3875
3876 if (p->surf_linear128_c[k])
3877 p->request_size_bytes_chroma[k] = 128;
3878 else
3879 p->request_size_bytes_chroma[k] = 256;
3880
3881 } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3882 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3883 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3884 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3885 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3886 p->request_size_bytes_luma[k] = 256;
3887 p->request_size_bytes_chroma[k] = 256;
3888
3889 } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3890 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
3891 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3892 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
3893 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3894 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3895 p->request_size_bytes_chroma[k] = 256;
3896
3897 } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3898 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3899 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
3900 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3901 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
3902 p->request_size_bytes_luma[k] = 256;
3903 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3904
3905 } else {
3906 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
3907 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
3908 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
3909 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
3910 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3911 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3912 }
3913
3914 if (p->SwathHeightC[k] == 0)
3915 p->request_size_bytes_chroma[k] = 0;
3916
3917 if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
3918 p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
3919 *p->ViewportSizeSupport = false;
3920 dml2_printf("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]);
3921 dml2_printf("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]);
3922 dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
3923 dml2_printf("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]);
3924 dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]);
3925 dml2_printf("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]);
3926 dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]);
3927 p->ViewportSizeSupportPerSurface[k] = false;
3928 } else {
3929 p->ViewportSizeSupportPerSurface[k] = true;
3930 }
3931
3932 if (p->SwathHeightC[k] == 0) {
3933 #ifdef __DML_VBA_DEBUG__
3934 dml2_printf("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k);
3935 #endif
3936 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
3937 p->DETBufferSizeC[k] = 0;
3938 } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
3939 #ifdef __DML_VBA_DEBUG__
3940 dml2_printf("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k);
3941 #endif
3942 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
3943 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
3944 } else {
3945 #ifdef __DML_VBA_DEBUG__
3946 dml2_printf("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k);
3947 #endif
3948 p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
3949 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
3950 }
3951
3952 #ifdef __DML_VBA_DEBUG__
3953 dml2_printf("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
3954 dml2_printf("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
3955 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3956 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3957 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
3958 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
3959 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
3960 dml2_printf("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3961 dml2_printf("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
3962 dml2_printf("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
3963 #endif
3964
3965 }
3966
3967 *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64;
3968 if (*p->UnboundedRequestEnabled) {
3969 *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b,
3970 (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0);
3971 #ifdef __DML_VBA_DEBUG__
3972 dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]);
3973 dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes);
3974 #endif
3975 }
3976 #ifdef __DML_VBA_DEBUG__
3977 dml2_printf("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b);
3978 #endif
3979
3980 *p->hw_debug5 = false;
3981 #ifdef ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE
3982 if (p->NumberOfActiveSurfaces > 1)
3983 *p->hw_debug5 = true;
3984 #else
3985 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3986 if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1)
3987 && p->display_cfg->plane_descriptors[k].surface.dcc.enable
3988 && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1)
3989 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k])))
3990 *p->hw_debug5 = true;
3991 #ifdef __DML_VBA_DEBUG__
3992 dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled);
3993 dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION);
3994 dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH);
3995 dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte);
3996 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
3997 dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
3998 #endif
3999 }
4000 #endif
4001 }
4002
DecideODMMode(unsigned int HActive,double MaxDispclk,unsigned int MaximumPixelsPerLinePerDSCUnit,enum dml2_output_format_class OutFormat,bool UseDSC,unsigned int NumberOfDSCSlices,double SurfaceRequiredDISPCLKWithoutODMCombine,double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,double SurfaceRequiredDISPCLKWithODMCombineFourToOne)4003 static enum dml2_odm_mode DecideODMMode(unsigned int HActive,
4004 double MaxDispclk,
4005 unsigned int MaximumPixelsPerLinePerDSCUnit,
4006 enum dml2_output_format_class OutFormat,
4007 bool UseDSC,
4008 unsigned int NumberOfDSCSlices,
4009 double SurfaceRequiredDISPCLKWithoutODMCombine,
4010 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4011 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4012 double SurfaceRequiredDISPCLKWithODMCombineFourToOne)
4013 {
4014 enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock;
4015 enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive;
4016 enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive;
4017 enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass;
4018
4019 MinimumRequiredODMModeForMaxDispClock =
4020 (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass :
4021 (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 :
4022 (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
4023 if (ODMMode < MinimumRequiredODMModeForMaxDispClock)
4024 ODMMode = MinimumRequiredODMModeForMaxDispClock;
4025
4026 if (UseDSC) {
4027 MinimumRequiredODMModeForMaxDSCHActive =
4028 (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass :
4029 (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 :
4030 (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
4031 if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive)
4032 ODMMode = MinimumRequiredODMModeForMaxDSCHActive;
4033 }
4034
4035 if (OutFormat == dml2_420) {
4036 MinimumRequiredODMModeForMax420HActive =
4037 (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass :
4038 (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 :
4039 (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
4040 if (ODMMode < MinimumRequiredODMModeForMax420HActive)
4041 ODMMode = MinimumRequiredODMModeForMax420HActive;
4042 }
4043
4044 if (UseDSC) {
4045 if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4)
4046 ODMMode = dml2_odm_mode_combine_2to1;
4047 if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8)
4048 ODMMode = dml2_odm_mode_combine_3to1;
4049 if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12)
4050 ODMMode = dml2_odm_mode_combine_4to1;
4051 }
4052
4053 return ODMMode;
4054 }
4055
CalculateODMConstraints(enum dml2_odm_mode ODMUse,double SurfaceRequiredDISPCLKWithoutODMCombine,double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,double SurfaceRequiredDISPCLKWithODMCombineFourToOne,unsigned int MaximumPixelsPerLinePerDSCUnit,double * DISPCLKRequired,unsigned int * NumberOfDPPRequired,unsigned int * MaxHActiveForDSC,unsigned int * MaxDSCSlices,unsigned int * MaxHActiveFor420)4056 static void CalculateODMConstraints(
4057 enum dml2_odm_mode ODMUse,
4058 double SurfaceRequiredDISPCLKWithoutODMCombine,
4059 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4060 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4061 double SurfaceRequiredDISPCLKWithODMCombineFourToOne,
4062 unsigned int MaximumPixelsPerLinePerDSCUnit,
4063 /* Output */
4064 double *DISPCLKRequired,
4065 unsigned int *NumberOfDPPRequired,
4066 unsigned int *MaxHActiveForDSC,
4067 unsigned int *MaxDSCSlices,
4068 unsigned int *MaxHActiveFor420)
4069 {
4070 switch (ODMUse) {
4071 case dml2_odm_mode_combine_2to1:
4072 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
4073 *NumberOfDPPRequired = 2;
4074 break;
4075 case dml2_odm_mode_combine_3to1:
4076 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
4077 *NumberOfDPPRequired = 3;
4078 break;
4079 case dml2_odm_mode_combine_4to1:
4080 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
4081 *NumberOfDPPRequired = 4;
4082 break;
4083 case dml2_odm_mode_auto:
4084 case dml2_odm_mode_split_1to2:
4085 case dml2_odm_mode_mso_1to2:
4086 case dml2_odm_mode_mso_1to4:
4087 case dml2_odm_mode_bypass:
4088 default:
4089 *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine;
4090 *NumberOfDPPRequired = 1;
4091 break;
4092 }
4093 *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit;
4094 *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC;
4095 *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH;
4096 }
4097
ValidateODMMode(enum dml2_odm_mode ODMMode,double MaxDispclk,unsigned int HActive,enum dml2_output_format_class OutFormat,bool UseDSC,unsigned int NumberOfDSCSlices,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double DISPCLKRequired,unsigned int NumberOfDPPRequired,unsigned int MaxHActiveForDSC,unsigned int MaxDSCSlices,unsigned int MaxHActiveFor420)4098 static bool ValidateODMMode(enum dml2_odm_mode ODMMode,
4099 double MaxDispclk,
4100 unsigned int HActive,
4101 enum dml2_output_format_class OutFormat,
4102 bool UseDSC,
4103 unsigned int NumberOfDSCSlices,
4104 unsigned int TotalNumberOfActiveDPP,
4105 unsigned int MaxNumDPP,
4106 double DISPCLKRequired,
4107 unsigned int NumberOfDPPRequired,
4108 unsigned int MaxHActiveForDSC,
4109 unsigned int MaxDSCSlices,
4110 unsigned int MaxHActiveFor420)
4111 {
4112 bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true;
4113 bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1);
4114 unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1;
4115 unsigned int h_timing_div_mode =
4116 (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 :
4117 (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle;
4118
4119 if (DISPCLKRequired > MaxDispclk)
4120 return false;
4121 if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP)
4122 return false;
4123 if (are_odm_segments_symmetrical) {
4124 if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle))
4125 return false;
4126 }
4127 if (HActive % h_timing_div_mode)
4128 /*
4129 * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and
4130 * OTG_H_SYNC_A_START/END all need to be visible by h timing div
4131 * mode. This logic only checks H active.
4132 */
4133 return false;
4134
4135 if (UseDSC) {
4136 if (HActive > MaxHActiveForDSC)
4137 return false;
4138 if (NumberOfDSCSlices > MaxDSCSlices)
4139 return false;
4140 if (HActive % NumberOfDSCSlices)
4141 return false;
4142 if (NumberOfDSCSlices % NumberOfDPPRequired)
4143 return false;
4144 if (is_max_dsc_slice_required) {
4145 if (NumberOfDSCSlices != MaxDSCSlices)
4146 return false;
4147 }
4148 }
4149
4150 if (OutFormat == dml2_420) {
4151 if (HActive > MaxHActiveFor420)
4152 return false;
4153 }
4154
4155 return true;
4156 }
4157
CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum dml2_output_format_class OutFormat,enum dml2_output_encoder_class Output,enum dml2_odm_mode ODMUse,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum dml2_odm_mode * ODMMode,double * RequiredDISPCLKPerSurface)4158 static noinline_for_stack void CalculateODMMode(
4159 unsigned int MaximumPixelsPerLinePerDSCUnit,
4160 unsigned int HActive,
4161 enum dml2_output_format_class OutFormat,
4162 enum dml2_output_encoder_class Output,
4163 enum dml2_odm_mode ODMUse,
4164 double MaxDispclk,
4165 bool DSCEnable,
4166 unsigned int TotalNumberOfActiveDPP,
4167 unsigned int MaxNumDPP,
4168 double PixelClock,
4169 unsigned int NumberOfDSCSlices,
4170
4171 // Output
4172 bool *TotalAvailablePipesSupport,
4173 unsigned int *NumberOfDPP,
4174 enum dml2_odm_mode *ODMMode,
4175 double *RequiredDISPCLKPerSurface)
4176 {
4177 double SurfaceRequiredDISPCLKWithoutODMCombine;
4178 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
4179 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
4180 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
4181 double DISPCLKRequired;
4182 unsigned int NumberOfDPPRequired;
4183 unsigned int MaxHActiveForDSC;
4184 unsigned int MaxDSCSlices;
4185 unsigned int MaxHActiveFor420;
4186 bool success;
4187 bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0);
4188 enum dml2_odm_mode DecidedODMMode;
4189
4190 SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock);
4191 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock);
4192 SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock);
4193 SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock);
4194 #ifdef __DML_VBA_DEBUG__
4195 dml2_printf("DML::%s: ODMUse = %d\n", __func__, ODMUse);
4196 dml2_printf("DML::%s: Output = %d\n", __func__, Output);
4197 dml2_printf("DML::%s: DSCEnable = %d\n", __func__, DSCEnable);
4198 dml2_printf("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk);
4199 dml2_printf("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit);
4200 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine);
4201 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne);
4202 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne);
4203 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne);
4204 #endif
4205 if (ODMUse == dml2_odm_mode_auto)
4206 DecidedODMMode = DecideODMMode(HActive,
4207 MaxDispclk,
4208 MaximumPixelsPerLinePerDSCUnit,
4209 OutFormat,
4210 UseDSC,
4211 NumberOfDSCSlices,
4212 SurfaceRequiredDISPCLKWithoutODMCombine,
4213 SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4214 SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4215 SurfaceRequiredDISPCLKWithODMCombineFourToOne);
4216 else
4217 DecidedODMMode = ODMUse;
4218 CalculateODMConstraints(DecidedODMMode,
4219 SurfaceRequiredDISPCLKWithoutODMCombine,
4220 SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4221 SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4222 SurfaceRequiredDISPCLKWithODMCombineFourToOne,
4223 MaximumPixelsPerLinePerDSCUnit,
4224 &DISPCLKRequired,
4225 &NumberOfDPPRequired,
4226 &MaxHActiveForDSC,
4227 &MaxDSCSlices,
4228 &MaxHActiveFor420);
4229 success = ValidateODMMode(DecidedODMMode,
4230 MaxDispclk,
4231 HActive,
4232 OutFormat,
4233 UseDSC,
4234 NumberOfDSCSlices,
4235 TotalNumberOfActiveDPP,
4236 MaxNumDPP,
4237 DISPCLKRequired,
4238 NumberOfDPPRequired,
4239 MaxHActiveForDSC,
4240 MaxDSCSlices,
4241 MaxHActiveFor420);
4242
4243 *ODMMode = DecidedODMMode;
4244 *TotalAvailablePipesSupport = success;
4245 *NumberOfDPP = NumberOfDPPRequired;
4246 *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0;
4247 #ifdef __DML_VBA_DEBUG__
4248 dml2_printf("DML::%s: ODMMode = %d\n", __func__, *ODMMode);
4249 dml2_printf("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP);
4250 dml2_printf("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport);
4251 dml2_printf("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface);
4252 #endif
4253 }
4254
CalculateOutputLink(struct dml2_core_internal_scratch * s,double PHYCLK,double PHYCLKD18,double PHYCLKD32,double Downspreading,enum dml2_output_encoder_class Output,enum dml2_output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,enum dml2_dsc_enable_option DSCEnable,unsigned int OutputLinkDPLanes,enum dml2_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,bool * RequiresFEC,double * OutBpp,enum dml2_core_internal_output_type * OutputType,enum dml2_core_internal_output_type_rate * OutputRate,unsigned int * RequiredSlots)4255 static noinline_for_stack void CalculateOutputLink(
4256 struct dml2_core_internal_scratch *s,
4257 double PHYCLK,
4258 double PHYCLKD18,
4259 double PHYCLKD32,
4260 double Downspreading,
4261 enum dml2_output_encoder_class Output,
4262 enum dml2_output_format_class OutputFormat,
4263 unsigned int HTotal,
4264 unsigned int HActive,
4265 double PixelClockBackEnd,
4266 double ForcedOutputLinkBPP,
4267 unsigned int DSCInputBitPerComponent,
4268 unsigned int NumberOfDSCSlices,
4269 double AudioSampleRate,
4270 unsigned int AudioSampleLayout,
4271 enum dml2_odm_mode ODMModeNoDSC,
4272 enum dml2_odm_mode ODMModeDSC,
4273 enum dml2_dsc_enable_option DSCEnable,
4274 unsigned int OutputLinkDPLanes,
4275 enum dml2_output_link_dp_rate OutputLinkDPRate,
4276
4277 // Output
4278 bool *RequiresDSC,
4279 bool *RequiresFEC,
4280 double *OutBpp,
4281 enum dml2_core_internal_output_type *OutputType,
4282 enum dml2_core_internal_output_type_rate *OutputRate,
4283 unsigned int *RequiredSlots)
4284 {
4285 bool LinkDSCEnable;
4286 unsigned int dummy;
4287 *RequiresDSC = false;
4288 *RequiresFEC = false;
4289 *OutBpp = 0;
4290
4291 *OutputType = dml2_core_internal_output_type_unknown;
4292 *OutputRate = dml2_core_internal_output_rate_unknown;
4293
4294 #ifdef __DML_VBA_DEBUG__
4295 dml2_printf("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable);
4296 dml2_printf("DML::%s: PHYCLK = %f\n", __func__, PHYCLK);
4297 dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
4298 dml2_printf("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate);
4299 dml2_printf("DML::%s: HActive = %u\n", __func__, HActive);
4300 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
4301 dml2_printf("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC);
4302 dml2_printf("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC);
4303 dml2_printf("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP);
4304 dml2_printf("DML::%s: Output (encoder) = %u\n", __func__, Output);
4305 dml2_printf("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate);
4306 #endif
4307 {
4308 if (Output == dml2_hdmi) {
4309 *RequiresDSC = false;
4310 *RequiresFEC = false;
4311 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
4312 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4313 //OutputTypeAndRate = "HDMI";
4314 *OutputType = dml2_core_internal_output_type_hdmi;
4315 } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) {
4316 if (DSCEnable == dml2_dsc_enable) {
4317 *RequiresDSC = true;
4318 LinkDSCEnable = true;
4319 if (Output == dml2_dp || Output == dml2_dp2p0) {
4320 *RequiresFEC = true;
4321 } else {
4322 *RequiresFEC = false;
4323 }
4324 } else {
4325 *RequiresDSC = false;
4326 LinkDSCEnable = false;
4327 if (Output == dml2_dp2p0) {
4328 *RequiresFEC = true;
4329 } else {
4330 *RequiresFEC = false;
4331 }
4332 }
4333 if (Output == dml2_dp2p0) {
4334 *OutBpp = 0;
4335 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) {
4336 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4337 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4338 if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4339 *RequiresDSC = true;
4340 LinkDSCEnable = true;
4341 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4342 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4343 }
4344 //OutputTypeAndRate = Output & " UHBR10";
4345 *OutputType = dml2_core_internal_output_type_dp2p0;
4346 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10;
4347 }
4348 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) {
4349 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4350 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4351
4352 if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4353 *RequiresDSC = true;
4354 LinkDSCEnable = true;
4355 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4356 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4357 }
4358 //OutputTypeAndRate = Output & " UHBR13p5";
4359 *OutputType = dml2_core_internal_output_type_dp2p0;
4360 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5;
4361 }
4362 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) {
4363 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4364 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4365 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4366 *RequiresDSC = true;
4367 LinkDSCEnable = true;
4368 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4369 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4370 }
4371 //OutputTypeAndRate = Output & " UHBR20";
4372 *OutputType = dml2_core_internal_output_type_dp2p0;
4373 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20;
4374 }
4375 } else { // output is dp or edp
4376 *OutBpp = 0;
4377 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) {
4378 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4379 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4380 if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4381 *RequiresDSC = true;
4382 LinkDSCEnable = true;
4383 if (Output == dml2_dp) {
4384 *RequiresFEC = true;
4385 }
4386 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4387 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4388 }
4389 //OutputTypeAndRate = Output & " HBR";
4390 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4391 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr;
4392 }
4393 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) {
4394 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4395 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4396
4397 if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4398 *RequiresDSC = true;
4399 LinkDSCEnable = true;
4400 if (Output == dml2_dp) {
4401 *RequiresFEC = true;
4402 }
4403 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4404 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4405 }
4406 //OutputTypeAndRate = Output & " HBR2";
4407 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4408 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2;
4409 }
4410 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
4411 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4412 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4413
4414 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4415 *RequiresDSC = true;
4416 LinkDSCEnable = true;
4417 if (Output == dml2_dp) {
4418 *RequiresFEC = true;
4419 }
4420 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4421 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4422 }
4423 //OutputTypeAndRate = Output & " HBR3";
4424 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4425 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3;
4426 }
4427 }
4428 } else if (Output == dml2_hdmifrl) {
4429 if (DSCEnable == dml2_dsc_enable) {
4430 *RequiresDSC = true;
4431 LinkDSCEnable = true;
4432 *RequiresFEC = true;
4433 } else {
4434 *RequiresDSC = false;
4435 LinkDSCEnable = false;
4436 *RequiresFEC = false;
4437 }
4438 *OutBpp = 0;
4439 if (PHYCLKD18 >= 3000.0 / 18) {
4440 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4441 //OutputTypeAndRate = Output & "3x3";
4442 *OutputType = dml2_core_internal_output_type_hdmifrl;
4443 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3;
4444 }
4445 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
4446 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4447 //OutputTypeAndRate = Output & "6x3";
4448 *OutputType = dml2_core_internal_output_type_hdmifrl;
4449 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3;
4450 }
4451 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
4452 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4453 //OutputTypeAndRate = Output & "6x4";
4454 *OutputType = dml2_core_internal_output_type_hdmifrl;
4455 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4;
4456 }
4457 if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) {
4458 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4459 //OutputTypeAndRate = Output & "8x4";
4460 *OutputType = dml2_core_internal_output_type_hdmifrl;
4461 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4;
4462 }
4463 if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) {
4464 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4465 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) {
4466 *RequiresDSC = true;
4467 LinkDSCEnable = true;
4468 *RequiresFEC = true;
4469 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4470 }
4471 //OutputTypeAndRate = Output & "10x4";
4472 *OutputType = dml2_core_internal_output_type_hdmifrl;
4473 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4;
4474 }
4475 if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) {
4476 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4477 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4478 *RequiresDSC = true;
4479 LinkDSCEnable = true;
4480 *RequiresFEC = true;
4481 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4482 }
4483 //OutputTypeAndRate = Output & "12x4";
4484 *OutputType = dml2_core_internal_output_type_hdmifrl;
4485 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4;
4486 }
4487 }
4488 }
4489 #ifdef __DML_VBA_DEBUG__
4490 dml2_printf("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC);
4491 dml2_printf("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC);
4492 dml2_printf("DML::%s: OutBpp = %f\n", __func__, *OutBpp);
4493 #endif
4494 }
4495
CalculateWriteBackDISPCLK(enum dml2_source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)4496 static double CalculateWriteBackDISPCLK(
4497 enum dml2_source_format_class WritebackPixelFormat,
4498 double PixelClock,
4499 double WritebackHRatio,
4500 double WritebackVRatio,
4501 unsigned int WritebackHTaps,
4502 unsigned int WritebackVTaps,
4503 unsigned int WritebackSourceWidth,
4504 unsigned int WritebackDestinationWidth,
4505 unsigned int HTotal,
4506 unsigned int WritebackLineBufferSize)
4507 {
4508 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4509
4510 DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio;
4511 DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal;
4512 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth;
4513 return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
4514 }
4515
RequiredDTBCLK(bool DSCEnable,double PixelClock,enum dml2_output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)4516 static double RequiredDTBCLK(
4517 bool DSCEnable,
4518 double PixelClock,
4519 enum dml2_output_format_class OutputFormat,
4520 double OutputBpp,
4521 unsigned int DSCSlices,
4522 unsigned int HTotal,
4523 unsigned int HActive,
4524 unsigned int AudioRate,
4525 unsigned int AudioLayout)
4526 {
4527 if (DSCEnable != true) {
4528 return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
4529 } else {
4530 double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2);
4531 double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
4532 double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
4533 double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
4534 double HActiveTribyteRate = PixelWordRate * HCActive / HActive;
4535 return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
4536 }
4537 }
4538
DSCDelayRequirement(bool DSCEnabled,enum dml2_odm_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum dml2_output_format_class OutputFormat,enum dml2_output_encoder_class Output,double PixelClock,double PixelClockBackEnd)4539 static unsigned int DSCDelayRequirement(
4540 bool DSCEnabled,
4541 enum dml2_odm_mode ODMMode,
4542 unsigned int DSCInputBitPerComponent,
4543 double OutputBpp,
4544 unsigned int HActive,
4545 unsigned int HTotal,
4546 unsigned int NumberOfDSCSlices,
4547 enum dml2_output_format_class OutputFormat,
4548 enum dml2_output_encoder_class Output,
4549 double PixelClock,
4550 double PixelClockBackEnd)
4551 {
4552 unsigned int DSCDelayRequirement_val = 0;
4553 unsigned int NumberOfDSCSlicesFactor = 1;
4554
4555 if (DSCEnabled == true && OutputBpp != 0) {
4556
4557 if (ODMMode == dml2_odm_mode_combine_4to1)
4558 NumberOfDSCSlicesFactor = 4;
4559 else if (ODMMode == dml2_odm_mode_combine_3to1)
4560 NumberOfDSCSlicesFactor = 3;
4561 else if (ODMMode == dml2_odm_mode_combine_2to1)
4562 NumberOfDSCSlicesFactor = 2;
4563
4564 DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)),
4565 (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output));
4566
4567 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0));
4568 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
4569
4570 } else {
4571 DSCDelayRequirement_val = 0;
4572 }
4573 #ifdef __DML_VBA_DEBUG__
4574 dml2_printf("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled);
4575 dml2_printf("DML::%s: ODMMode = %u\n", __func__, ODMMode);
4576 dml2_printf("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
4577 dml2_printf("DML::%s: HActive = %u\n", __func__, HActive);
4578 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
4579 dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock);
4580 dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
4581 dml2_printf("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
4582 dml2_printf("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
4583 dml2_printf("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
4584 dml2_printf("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
4585 #endif
4586
4587 return DSCDelayRequirement_val;
4588 }
4589
CalculateSurfaceSizeInMall(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int BytesPerPixelY[],unsigned int BytesPerPixelC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)4590 static void CalculateSurfaceSizeInMall(
4591 const struct dml2_display_cfg *display_cfg,
4592 unsigned int NumberOfActiveSurfaces,
4593 unsigned int MALLAllocatedForDCN,
4594 unsigned int BytesPerPixelY[],
4595 unsigned int BytesPerPixelC[],
4596 unsigned int Read256BytesBlockWidthY[],
4597 unsigned int Read256BytesBlockWidthC[],
4598 unsigned int Read256BytesBlockHeightY[],
4599 unsigned int Read256BytesBlockHeightC[],
4600 unsigned int ReadBlockWidthY[],
4601 unsigned int ReadBlockWidthC[],
4602 unsigned int ReadBlockHeightY[],
4603 unsigned int ReadBlockHeightC[],
4604
4605 // Output
4606 unsigned int SurfaceSizeInMALL[],
4607 bool *ExceededMALLSize)
4608 {
4609 unsigned int TotalSurfaceSizeInMALLForSS = 0;
4610 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
4611 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
4612
4613 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4614 const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition;
4615 const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface;
4616
4617 if (composition->viewport.stationary) {
4618 SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]),
4619 math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) -
4620 math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) *
4621 math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]),
4622 math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
4623 math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]);
4624
4625 if (ReadBlockWidthC[k] > 0) {
4626 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
4627 math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]),
4628 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
4629 math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) *
4630 math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]),
4631 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
4632 math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]);
4633 }
4634 } else {
4635 SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
4636 math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
4637 if (ReadBlockWidthC[k] > 0) {
4638 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
4639 math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
4640 math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
4641 }
4642 }
4643 }
4644
4645 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4646 /* SS and Subvp counted separate as they are never used at the same time */
4647 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
4648 TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k];
4649 else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable)
4650 TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k];
4651 }
4652
4653 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
4654 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
4655
4656 #ifdef __DML_VBA_DEBUG__
4657 dml2_printf("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024);
4658 dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP);
4659 dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS);
4660 dml2_printf("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize);
4661 #endif
4662 }
4663
calculate_tdlut_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_tdlut_setting_params * p)4664 static void calculate_tdlut_setting(
4665 struct dml2_core_internal_scratch *scratch,
4666 struct dml2_core_calcs_calculate_tdlut_setting_params *p)
4667 {
4668 // locals
4669 unsigned int tdlut_bpe = 8;
4670 unsigned int tdlut_width;
4671 unsigned int tdlut_pitch_bytes;
4672 unsigned int tdlut_footprint_bytes;
4673 unsigned int vmpg_bytes;
4674 unsigned int tdlut_vmpg_per_frame;
4675 unsigned int tdlut_pte_req_per_frame;
4676 unsigned int tdlut_bytes_per_line;
4677 unsigned int tdlut_delivery_cycles;
4678 double tdlut_drain_rate;
4679 unsigned int tdlut_mpc_width;
4680 unsigned int tdlut_bytes_per_group_simple;
4681
4682 if (!p->setup_for_tdlut) {
4683 *p->tdlut_groups_per_2row_ub = 0;
4684 *p->tdlut_opt_time = 0;
4685 *p->tdlut_drain_time = 0;
4686 *p->tdlut_bytes_to_deliver = 0;
4687 *p->tdlut_bytes_per_group = 0;
4688 *p->tdlut_pte_bytes_per_frame = 0;
4689 *p->tdlut_bytes_per_frame = 0;
4690 return;
4691 }
4692
4693 if (p->tdlut_mpc_width_flag) {
4694 tdlut_mpc_width = 33;
4695 tdlut_bytes_per_group_simple = 39*256;
4696 } else {
4697 tdlut_mpc_width = 17;
4698 tdlut_bytes_per_group_simple = 10*256;
4699 }
4700
4701 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
4702
4703 if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) {
4704 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
4705 tdlut_width = 4916;
4706 else
4707 tdlut_width = 35940;
4708 } else {
4709 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
4710 tdlut_width = 17;
4711 else // dml2_tdlut_width_33_cube
4712 tdlut_width = 33;
4713 }
4714
4715 if (p->is_gfx11)
4716 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment
4717 else
4718 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment
4719
4720 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear)
4721 tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width;
4722 else
4723 tdlut_footprint_bytes = tdlut_pitch_bytes;
4724
4725 if (!p->gpuvm_enable) {
4726 tdlut_vmpg_per_frame = 0;
4727 tdlut_pte_req_per_frame = 0;
4728 } else {
4729 tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1;
4730 tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1;
4731 }
4732 tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request
4733 *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64;
4734
4735 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) {
4736 //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice
4737 *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width;
4738 *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width;
4739 //the delivery cycles is DispClk cycles per line * number of lines * number of slices
4740 tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width;
4741 tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1);
4742 } else {
4743 //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements
4744 *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256);
4745 *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple;
4746 tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1);
4747 tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz;
4748 }
4749
4750 //the tdlut is fetched during the 2 row times of prefetch.
4751 if (p->setup_for_tdlut) {
4752 *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1);
4753 *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
4754 *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate;
4755 *p->tdlut_bytes_to_deliver = (unsigned int) (p->cursor_buffer_size * 1024.0);
4756 }
4757
4758 #ifdef __DML_VBA_DEBUG__
4759 dml2_printf("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable);
4760 dml2_printf("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes);
4761 dml2_printf("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame);
4762 dml2_printf("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame);
4763
4764 dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz);
4765 dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width);
4766 dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear");
4767 dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes);
4768 dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes);
4769 dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame);
4770 dml2_printf("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line);
4771 dml2_printf("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group);
4772 dml2_printf("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate);
4773 dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles);
4774 dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
4775 dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
4776 dml2_printf("DML::%s: tdlut_bytes_to_deliver = %d\n", __func__, *p->tdlut_bytes_to_deliver);
4777 dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
4778 #endif
4779 }
4780
CalculateTarb(const struct dml2_display_cfg * display_cfg,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,double ReturnBW,unsigned int MetaChunkSize,double * Tarb,double * Tarb_prefetch)4781 static void CalculateTarb(
4782 const struct dml2_display_cfg *display_cfg,
4783 unsigned int PixelChunkSizeInKByte,
4784 unsigned int NumberOfActiveSurfaces,
4785 unsigned int NumberOfDPP[],
4786 unsigned int dpte_group_bytes[],
4787 unsigned int tdlut_bytes_per_group[],
4788 double HostVMInefficiencyFactor,
4789 double HostVMInefficiencyFactorPrefetch,
4790 unsigned int HostVMMinPageSize,
4791 double ReturnBW,
4792 unsigned int MetaChunkSize,
4793
4794 // output
4795 double *Tarb,
4796 double *Tarb_prefetch)
4797 {
4798 double extra_bytes = 0;
4799 double extra_bytes_prefetch = 0;
4800 double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels);
4801
4802 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4803 extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024);
4804
4805 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
4806 extra_bytes = extra_bytes + (MetaChunkSize * 1024);
4807
4808 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
4809 extra_bytes = extra_bytes + tdlut_bytes_per_group[k];
4810 }
4811
4812 extra_bytes_prefetch = extra_bytes;
4813
4814 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4815 if (display_cfg->gpuvm_enable == true) {
4816 extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
4817 extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch;
4818 }
4819 }
4820 *Tarb = extra_bytes / ReturnBW;
4821 *Tarb_prefetch = extra_bytes_prefetch / ReturnBW;
4822 #ifdef __DML_VBA_DEBUG__
4823 dml2_printf("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte);
4824 dml2_printf("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize);
4825 dml2_printf("DML::%s: extra_bytes = %f\n", __func__, extra_bytes);
4826 dml2_printf("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch);
4827 #endif
4828 }
4829
CalculateTWait(long reserved_vblank_time_ns,double UrgentLatency,double Ttrip,double g6_temp_read_blackout_us)4830 static double CalculateTWait(
4831 long reserved_vblank_time_ns,
4832 double UrgentLatency,
4833 double Ttrip,
4834 double g6_temp_read_blackout_us)
4835 {
4836 double TWait;
4837 double t_urg_trip = math_max2(UrgentLatency, Ttrip);
4838 TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip;
4839
4840 #ifdef __DML_VBA_DEBUG__
4841 dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns);
4842 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
4843 dml2_printf("DML::%s: Ttrip = %f\n", __func__, Ttrip);
4844 dml2_printf("DML::%s: TWait = %f\n", __func__, TWait);
4845 #endif
4846 return TWait;
4847 }
4848
4849
CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,unsigned int * VUpdateWidthPix,unsigned int * VReadyOffsetPix)4850 static void CalculateVUpdateAndDynamicMetadataParameters(
4851 unsigned int MaxInterDCNTileRepeaters,
4852 double Dppclk,
4853 double Dispclk,
4854 double DCFClkDeepSleep,
4855 double PixelClock,
4856 unsigned int HTotal,
4857 unsigned int VBlank,
4858 unsigned int DynamicMetadataTransmittedBytes,
4859 unsigned int DynamicMetadataLinesBeforeActiveRequired,
4860 unsigned int InterlaceEnable,
4861 bool ProgressiveToInterlaceUnitInOPP,
4862
4863 // Output
4864 double *TSetup,
4865 double *Tdmbf,
4866 double *Tdmec,
4867 double *Tdmsks,
4868 unsigned int *VUpdateOffsetPix,
4869 unsigned int *VUpdateWidthPix,
4870 unsigned int *VReadyOffsetPix)
4871 {
4872 double TotalRepeaterDelayTime;
4873 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
4874 *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
4875 *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
4876 *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0));
4877 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
4878 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
4879 *Tdmec = HTotal / PixelClock;
4880
4881 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
4882 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
4883 } else {
4884 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
4885 }
4886 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
4887 *Tdmsks = *Tdmsks / 2;
4888 }
4889 #ifdef __DML_VBA_DEBUG__
4890 dml2_printf("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
4891 dml2_printf("DML::%s: VBlank = %u\n", __func__, VBlank);
4892 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
4893 dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock);
4894 dml2_printf("DML::%s: Dppclk = %f\n", __func__, Dppclk);
4895 dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
4896 dml2_printf("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
4897 dml2_printf("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
4898
4899 dml2_printf("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
4900 dml2_printf("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
4901 dml2_printf("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
4902
4903 dml2_printf("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
4904 #endif
4905 }
4906
get_urgent_bandwidth_required(struct dml2_core_shared_get_urgent_bandwidth_required_locals * l,const struct dml2_display_cfg * display_cfg,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool inc_flip_bw,bool use_qual_row_bw,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double dcc_dram_bw_pref_overhead_factor_p0[],double dcc_dram_bw_pref_overhead_factor_p1[],double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double PrefetchBandwidthOto[],double excess_vactive_fill_bw_l[],double excess_vactive_fill_bw_c[],double cursor_bw[],double dpte_row_bw[],double meta_row_bw[],double prefetch_cursor_bw[],double prefetch_vmrow_bw[],double flip_bw[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double surface_required_bw[],double surface_peak_required_bw[])4907 static double get_urgent_bandwidth_required(
4908 struct dml2_core_shared_get_urgent_bandwidth_required_locals *l,
4909 const struct dml2_display_cfg *display_cfg,
4910 enum dml2_core_internal_soc_state_type state_type,
4911 enum dml2_core_internal_bw_type bw_type,
4912 bool inc_flip_bw, // including flip bw
4913 bool use_qual_row_bw,
4914 unsigned int NumberOfActiveSurfaces,
4915 unsigned int NumberOfDPP[],
4916 double dcc_dram_bw_nom_overhead_factor_p0[],
4917 double dcc_dram_bw_nom_overhead_factor_p1[],
4918 double dcc_dram_bw_pref_overhead_factor_p0[],
4919 double dcc_dram_bw_pref_overhead_factor_p1[],
4920 double mall_prefetch_sdp_overhead_factor[],
4921 double mall_prefetch_dram_overhead_factor[],
4922 double ReadBandwidthLuma[],
4923 double ReadBandwidthChroma[],
4924 double PrefetchBandwidthLuma[],
4925 double PrefetchBandwidthChroma[],
4926 double PrefetchBandwidthOto[],
4927 double excess_vactive_fill_bw_l[],
4928 double excess_vactive_fill_bw_c[],
4929 double cursor_bw[],
4930 double dpte_row_bw[],
4931 double meta_row_bw[],
4932 double prefetch_cursor_bw[],
4933 double prefetch_vmrow_bw[],
4934 double flip_bw[],
4935 double UrgentBurstFactorLuma[],
4936 double UrgentBurstFactorChroma[],
4937 double UrgentBurstFactorCursor[],
4938 double UrgentBurstFactorLumaPre[],
4939 double UrgentBurstFactorChromaPre[],
4940 double UrgentBurstFactorCursorPre[],
4941 /* outputs */
4942 double surface_required_bw[],
4943 double surface_peak_required_bw[])
4944 {
4945 // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS
4946 // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation
4947
4948 memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals));
4949
4950 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4951 l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0;
4952 l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
4953 l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
4954 l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
4955 l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
4956
4957 l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0;
4958 l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1;
4959 l->adj_factor_cur = UrgentBurstFactorCursor[k];
4960 l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0;
4961 l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1;
4962 l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k];
4963
4964 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]);
4965 bool exclude_this_plane = 0;
4966
4967 // Exclude phantom pipe in bw calculation for non svp prefetch state
4968 if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom)
4969 exclude_this_plane = 1;
4970
4971 // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip.
4972 // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe.
4973 if (use_qual_row_bw) {
4974 if (display_cfg->hostvm_enable)
4975 l->per_plane_flip_bw[k] = 0; // qual_row_bw
4976 else if (!display_cfg->plane_descriptors[k].immediate_flip)
4977 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
4978 } else {
4979 // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM)
4980 if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw)
4981 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
4982 else
4983 l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k];
4984 }
4985
4986 if (!exclude_this_plane) {
4987 l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k];
4988 l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur;
4989 l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
4990 l->flip_and_prefetch_bw_oto = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthOto[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
4991 l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k];
4992 surface_required_bw[k] = math_max5(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw, l->flip_and_prefetch_bw_oto);
4993
4994 /* export peak required bandwidth for the surface */
4995 surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]);
4996
4997 #ifdef __DML_VBA_DEBUG__
4998 dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw);
4999 dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw);
5000 dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw);
5001 dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw);
5002 dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]);
5003 dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]);
5004 #endif
5005 } else {
5006 surface_required_bw[k] = 0.0;
5007 }
5008
5009 l->required_bandwidth_mbps += surface_required_bw[k];
5010
5011 #ifdef __DML_VBA_DEBUG__
5012 dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]);
5013 dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw);
5014 dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
5015 dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor);
5016 dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0);
5017 dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1);
5018 dml2_printf("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur);
5019
5020 dml2_printf("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre);
5021 dml2_printf("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre);
5022 dml2_printf("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre);
5023
5024 dml2_printf("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]);
5025 dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]);
5026 dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]);
5027 dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]);
5028 dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]);
5029 dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]);
5030 dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]);
5031
5032 dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]);
5033 dml2_printf("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]);
5034 dml2_printf("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]);
5035 dml2_printf("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]);
5036 dml2_printf("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]);
5037 dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
5038 dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane);
5039 dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
5040 #endif
5041 }
5042
5043 return l->required_bandwidth_mbps;
5044 }
5045
CalculateExtraLatency(const struct dml2_display_cfg * display_cfg,unsigned int ROBBufferSizeInKByte,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,double FabricClock,unsigned int PixelChunkSizeInKByte,double ReturnBW,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,enum dml2_qos_param_type qos_type,bool max_outstanding_when_urgent_expected,unsigned int max_outstanding_requests,unsigned int request_size_bytes_luma[],unsigned int request_size_bytes_chroma[],unsigned int MetaChunkSize,unsigned int dchub_arb_to_ret_delay,double Ttrip,unsigned int hostvm_mode,double * ExtraLatency,double * ExtraLatency_sr,double * ExtraLatencyPrefetch)5046 static void CalculateExtraLatency(
5047 const struct dml2_display_cfg *display_cfg,
5048 unsigned int ROBBufferSizeInKByte,
5049 unsigned int RoundTripPingLatencyCycles,
5050 unsigned int ReorderingBytes,
5051 double DCFCLK,
5052 double FabricClock,
5053 unsigned int PixelChunkSizeInKByte,
5054 double ReturnBW,
5055 unsigned int NumberOfActiveSurfaces,
5056 unsigned int NumberOfDPP[],
5057 unsigned int dpte_group_bytes[],
5058 unsigned int tdlut_bytes_per_group[],
5059 double HostVMInefficiencyFactor,
5060 double HostVMInefficiencyFactorPrefetch,
5061 unsigned int HostVMMinPageSize,
5062 enum dml2_qos_param_type qos_type,
5063 bool max_outstanding_when_urgent_expected,
5064 unsigned int max_outstanding_requests,
5065 unsigned int request_size_bytes_luma[],
5066 unsigned int request_size_bytes_chroma[],
5067 unsigned int MetaChunkSize,
5068 unsigned int dchub_arb_to_ret_delay,
5069 double Ttrip,
5070 unsigned int hostvm_mode,
5071
5072 // output
5073 double *ExtraLatency, // Tex
5074 double *ExtraLatency_sr, // Tex_sr
5075 double *ExtraLatencyPrefetch)
5076
5077 {
5078 double Tarb;
5079 double Tarb_prefetch;
5080 double Tex_trips;
5081 unsigned int max_request_size_bytes = 0;
5082
5083 CalculateTarb(
5084 display_cfg,
5085 PixelChunkSizeInKByte,
5086 NumberOfActiveSurfaces,
5087 NumberOfDPP,
5088 dpte_group_bytes,
5089 tdlut_bytes_per_group,
5090 HostVMInefficiencyFactor,
5091 HostVMInefficiencyFactorPrefetch,
5092 HostVMMinPageSize,
5093 ReturnBW,
5094 MetaChunkSize,
5095 // output
5096 &Tarb,
5097 &Tarb_prefetch);
5098
5099 Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0;
5100
5101 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
5102 if (request_size_bytes_luma[k] > max_request_size_bytes)
5103 max_request_size_bytes = request_size_bytes_luma[k];
5104 if (request_size_bytes_chroma[k] > max_request_size_bytes)
5105 max_request_size_bytes = request_size_bytes_chroma[k];
5106 }
5107
5108 if (qos_type == dml2_qos_param_type_dcn4x) {
5109 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK;
5110 *ExtraLatency = *ExtraLatency_sr;
5111 if (max_outstanding_when_urgent_expected)
5112 *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW;
5113 } else {
5114 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW;
5115 *ExtraLatency = *ExtraLatency_sr;
5116 }
5117 *ExtraLatency = *ExtraLatency + Tex_trips;
5118 *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch;
5119 *ExtraLatency = *ExtraLatency + Tarb;
5120 *ExtraLatency_sr = *ExtraLatency_sr + Tarb;
5121
5122 #ifdef __DML_VBA_DEBUG__
5123 dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type);
5124 dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode);
5125 dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips);
5126 dml2_printf("DML::%s: max_outstanding_when_urgent_expected=%u\n", __func__, max_outstanding_when_urgent_expected);
5127 dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock);
5128 dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
5129 dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
5130 dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
5131 dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes);
5132 dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb);
5133 dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency);
5134 dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr);
5135 dml2_printf("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch);
5136 #endif
5137 }
5138
CalculatePrefetchSchedule(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculatePrefetchSchedule_params * p)5139 static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p)
5140 {
5141 struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals;
5142 bool dcc_mrq_enable;
5143
5144 unsigned int vm_bytes;
5145 unsigned int extra_tdpe_bytes;
5146 unsigned int tdlut_row_bytes;
5147 unsigned int Lo;
5148
5149 s->NoTimeToPrefetch = false;
5150 s->DPPCycles = 0;
5151 s->DISPCLKCycles = 0;
5152 s->DSTTotalPixelsAfterScaler = 0.0;
5153 s->LineTime = 0.0;
5154 s->dst_y_prefetch_equ = 0.0;
5155 s->prefetch_bw_oto = 0.0;
5156 s->Tvm_oto = 0.0;
5157 s->Tr0_oto = 0.0;
5158 s->Tvm_oto_lines = 0.0;
5159 s->Tr0_oto_lines = 0.0;
5160 s->dst_y_prefetch_oto = 0.0;
5161 s->TimeForFetchingVM = 0.0;
5162 s->TimeForFetchingRowInVBlank = 0.0;
5163 s->LinesToRequestPrefetchPixelData = 0.0;
5164 s->HostVMDynamicLevelsTrips = 0;
5165 s->trip_to_mem = 0.0;
5166 *p->Tvm_trips = 0.0;
5167 *p->Tr0_trips = 0.0;
5168 s->Tvm_trips_rounded = 0.0;
5169 s->Tr0_trips_rounded = 0.0;
5170 s->max_Tsw = 0.0;
5171 s->Lsw_oto = 0.0;
5172 *p->Tpre_rounded = 0.0;
5173 s->prefetch_bw_equ = 0.0;
5174 s->Tvm_equ = 0.0;
5175 s->Tr0_equ = 0.0;
5176 s->Tdmbf = 0.0;
5177 s->Tdmec = 0.0;
5178 s->Tdmsks = 0.0;
5179 *p->prefetch_sw_bytes = 0.0;
5180 s->prefetch_bw_pr = 0.0;
5181 s->bytes_pp = 0.0;
5182 s->dep_bytes = 0.0;
5183 s->min_Lsw_oto = 0.0;
5184 s->min_Lsw_equ = 0.0;
5185 s->Tsw_est1 = 0.0;
5186 s->Tsw_est2 = 0.0;
5187 s->Tsw_est3 = 0.0;
5188 s->cursor_prefetch_bytes = 0;
5189 *p->prefetch_cursor_bw = 0;
5190 *p->RequiredPrefetchBWOTO = 0.0;
5191
5192 dcc_mrq_enable = (p->dcc_enable && p->mrq_present);
5193
5194 s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip)
5195
5196 if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) {
5197 s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels;
5198 } else {
5199 s->HostVMDynamicLevelsTrips = 0;
5200 }
5201 #ifdef __DML_VBA_DEBUG__
5202 dml2_printf("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable);
5203 dml2_printf("DML::%s: mrq_present = %u\n", __func__, p->mrq_present);
5204 dml2_printf("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable);
5205 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable);
5206 dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
5207 dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
5208 dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup);
5209 dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable);
5210 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5211 dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait);
5212 dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
5213 dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
5214 dml2_printf("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
5215 dml2_printf("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk);
5216 #endif
5217 CalculateVUpdateAndDynamicMetadataParameters(
5218 p->MaxInterDCNTileRepeaters,
5219 p->myPipe->Dppclk,
5220 p->myPipe->Dispclk,
5221 p->myPipe->DCFClkDeepSleep,
5222 p->myPipe->PixelClock,
5223 p->myPipe->HTotal,
5224 p->myPipe->VBlank,
5225 p->DynamicMetadataTransmittedBytes,
5226 p->DynamicMetadataLinesBeforeActiveRequired,
5227 p->myPipe->InterlaceEnable,
5228 p->myPipe->ProgressiveToInterlaceUnitInOPP,
5229 p->TSetup,
5230
5231 // Output
5232 &s->Tdmbf,
5233 &s->Tdmec,
5234 &s->Tdmsks,
5235 p->VUpdateOffsetPix,
5236 p->VUpdateWidthPix,
5237 p->VReadyOffsetPix);
5238
5239 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
5240 s->trip_to_mem = p->Ttrip;
5241 *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg);
5242 if (dcc_mrq_enable)
5243 *p->Tvm_trips_flip = *p->Tvm_trips;
5244 else
5245 *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem;
5246
5247 *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
5248 *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2);
5249
5250 if (p->DynamicMetadataVMEnabled == true) {
5251 *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips;
5252 *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip;
5253 } else {
5254 *p->Tdmdl_vm = 0;
5255 *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex
5256 }
5257
5258 if (p->DynamicMetadataEnable == true) {
5259 if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
5260 *p->NotEnoughTimeForDynamicMetadata = true;
5261 dml2_printf("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
5262 dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
5263 dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
5264 dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
5265 dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
5266 } else {
5267 *p->NotEnoughTimeForDynamicMetadata = false;
5268 }
5269 } else {
5270 *p->NotEnoughTimeForDynamicMetadata = false;
5271 }
5272
5273 if (p->myPipe->ScalerEnabled)
5274 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
5275 else
5276 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
5277
5278 s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
5279
5280 s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal;
5281
5282 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
5283 return true;
5284
5285 *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay);
5286 *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
5287 ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) +
5288 ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0));
5289
5290 #ifdef __DML_VBA_DEBUG__
5291 dml2_printf("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled);
5292 dml2_printf("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
5293 dml2_printf("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
5294 dml2_printf("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
5295 dml2_printf("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
5296 dml2_printf("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
5297 dml2_printf("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
5298 dml2_printf("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
5299 dml2_printf("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
5300 dml2_printf("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
5301
5302 dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
5303 dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
5304 dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
5305 dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time);
5306 #endif
5307
5308 if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
5309 *p->DSTYAfterScaler = 1;
5310 else
5311 *p->DSTYAfterScaler = 0;
5312
5313 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
5314 *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
5315 *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal)));
5316 #ifdef __DML_VBA_DEBUG__
5317 dml2_printf("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
5318 dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
5319 #endif
5320
5321 #ifdef __DML_VBA_DEBUG__
5322 dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
5323 dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
5324 dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
5325 dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
5326 dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
5327 dml2_printf("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips);
5328 #endif
5329 if (p->display_cfg->gpuvm_enable) {
5330 s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
5331 *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
5332 } else {
5333 if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut)
5334 s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0);
5335 else
5336 s->Tvm_trips_rounded = s->LineTime / 4.0;
5337 *p->Tvm_trips_flip_rounded = s->LineTime / 4.0;
5338 }
5339
5340 s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0);
5341 *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0);
5342
5343 if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) {
5344 s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
5345 *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
5346 } else {
5347 s->Tr0_trips_rounded = s->LineTime / 4.0;
5348 *p->Tr0_trips_flip_rounded = s->LineTime / 4.0;
5349 }
5350 s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0);
5351 *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0);
5352
5353 if (p->display_cfg->gpuvm_enable == true) {
5354 if (p->display_cfg->gpuvm_max_page_table_levels >= 3) {
5355 *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1));
5356 } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) {
5357 *p->Tno_bw = p->ExtraLatencyPrefetch;
5358 } else {
5359 *p->Tno_bw = 0;
5360 }
5361 } else {
5362 *p->Tno_bw = 0;
5363 }
5364
5365 if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3)
5366 *p->Tno_bw_flip = *p->Tno_bw;
5367 else
5368 *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip
5369
5370 if (dml_is_420(p->myPipe->SourcePixelFormat)) {
5371 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0;
5372 } else {
5373 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
5374 }
5375
5376 *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
5377 *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor;
5378
5379 vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes;
5380 extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128);
5381
5382 if (p->setup_for_tdlut)
5383 vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0);
5384
5385 tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0);
5386
5387 s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__;
5388 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime);
5389 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0);
5390
5391 // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto
5392 // Note: in prefetch calculation, acounting is done mostly per-pipe.
5393 // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time
5394 s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface;
5395
5396 // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1)
5397 s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime;
5398
5399 if (p->myPipe->BytePerPixelC > 0) {
5400 s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface;
5401 s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime;
5402 }
5403
5404 /* oto prefetch bw should be always be less than total vactive bw */
5405 //DML2_ASSERT(s->prefetch_bw_oto < s->per_pipe_vactive_sw_bw * p->myPipe->DPPPerSurface);
5406
5407 s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor;
5408
5409 s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime));
5410
5411 s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0;
5412
5413 s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto,
5414 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
5415 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
5416
5417 /* oto bw needs to be outputted even if the oto schedule isn't being used to avoid ms/mp mismatch.
5418 * mp will fail if ms decides to use equ schedule and mp decides to use oto schedule
5419 * and the required bandwidth increases when going from ms to mp
5420 */
5421 *p->RequiredPrefetchBWOTO = s->prefetch_bw_oto;
5422
5423 #ifdef __DML_VBA_DEBUG__
5424 dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l);
5425 dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c);
5426 dml2_printf("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw);
5427 #endif
5428
5429 if (p->display_cfg->gpuvm_enable == true) {
5430 s->Tvm_oto = math_max3(
5431 *p->Tvm_trips,
5432 *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
5433 s->LineTime / 4.0);
5434
5435 #ifdef __DML_VBA_DEBUG__
5436 dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
5437 dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
5438 dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0);
5439 #endif
5440 } else {
5441 s->Tvm_oto = s->Tvm_trips_rounded;
5442 }
5443
5444 if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) {
5445 s->Tr0_oto = math_max3(
5446 *p->Tr0_trips,
5447 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto,
5448 s->LineTime / 4.0);
5449 #ifdef __DML_VBA_DEBUG__
5450 dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
5451 dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
5452 dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
5453 #endif
5454 } else
5455 s->Tr0_oto = s->LineTime / 4.0;
5456
5457 s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
5458 s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
5459 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
5460
5461 #ifdef DML_GLOBAL_PREFETCH_CHECK
5462 dml2_printf("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre);
5463 if (p->impacted_dst_y_pre > 0) {
5464 dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
5465 s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre);
5466 dml2_printf("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto);
5467 }
5468 #endif
5469 *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime;
5470
5471 //To (time for delay after scaler) in line time
5472 Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal);
5473
5474 s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__;
5475 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime);
5476 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0);
5477 //Tpre_equ in line time
5478 if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable)
5479 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo;
5480 else
5481 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo;
5482
5483 #ifdef DML_GLOBAL_PREFETCH_CHECK
5484 s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ);
5485
5486 s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
5487
5488 if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ)
5489 s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ;
5490 #endif
5491
5492 s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
5493
5494 #ifdef __DML_VBA_DEBUG__
5495 dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
5496 dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
5497 dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
5498 dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
5499 dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip);
5500 dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
5501 dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
5502 dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor);
5503 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
5504 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5505 dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
5506 dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
5507 dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
5508 dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
5509 dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes);
5510 dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
5511 dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
5512 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
5513 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
5514 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5515 dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
5516 dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
5517 dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip);
5518 dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip);
5519 dml2_printf("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr);
5520 dml2_printf("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
5521 dml2_printf("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
5522 dml2_printf("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
5523 dml2_printf("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
5524 dml2_printf("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
5525 dml2_printf("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
5526 dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
5527 dml2_printf("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
5528 dml2_printf("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes);
5529 dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes);
5530 #endif
5531 double Tpre = s->dst_y_prefetch_equ * s->LineTime;
5532 s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
5533 *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
5534
5535 #ifdef __DML_VBA_DEBUG__
5536 dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
5537 dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime);
5538 dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup);
5539 dml2_printf("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
5540 dml2_printf("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
5541 dml2_printf("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
5542 dml2_printf("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
5543 dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
5544 dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
5545 dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
5546 dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait);
5547 dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
5548 dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
5549 dml2_printf("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch);
5550 dml2_printf("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
5551 dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
5552 dml2_printf("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p);
5553 dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip);
5554 dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
5555 dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
5556 dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor);
5557 dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes);
5558 dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
5559 dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre));
5560 dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
5561 #endif
5562
5563 *p->dst_y_per_vm_vblank = 0;
5564 *p->dst_y_per_row_vblank = 0;
5565 *p->VRatioPrefetchY = 0;
5566 *p->VRatioPrefetchC = 0;
5567 *p->RequiredPrefetchPixelDataBWLuma = 0;
5568
5569 // Derive bandwidth by finding how much data to move within the time constraint
5570 // Tpre_rounded is Tpre rounding to 2-bit fraction
5571 // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time
5572 // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time
5573 // So that means prefetch bw calculated can be higher since the total time available for prefetch is less
5574 bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime;
5575 bool tpre_gt_req_latency = true;
5576 #if 0
5577 // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained.
5578 // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages.
5579 // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary.
5580 tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch);
5581 #endif
5582
5583 if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) {
5584 s->prefetch_bw1 = 0.;
5585 s->prefetch_bw2 = 0.;
5586 s->prefetch_bw3 = 0.;
5587 s->prefetch_bw4 = 0.;
5588
5589 // prefetch_bw1: VM + 2*R0 + SW
5590 if (*p->Tpre_rounded - *p->Tno_bw > 0) {
5591 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor
5592 + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)
5593 + *p->prefetch_sw_bytes)
5594 / (*p->Tpre_rounded - *p->Tno_bw);
5595 s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1;
5596 } else
5597 s->prefetch_bw1 = 0;
5598
5599 dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
5600 if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) {
5601 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) /
5602 (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
5603 #ifdef __DML_VBA_DEBUG__
5604 dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)));
5605 dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded);
5606 dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw);
5607 dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
5608 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5609 dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
5610 dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
5611 dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
5612 #endif
5613 }
5614
5615 // prefetch_bw2: VM + SW
5616 if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) {
5617 s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) /
5618 (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded);
5619 s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2;
5620 } else
5621 s->prefetch_bw2 = 0;
5622
5623 dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2);
5624 if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) {
5625 s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime);
5626 dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2);
5627 }
5628
5629 // prefetch_bw3: 2*R0 + SW
5630 if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) {
5631 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) /
5632 (*p->Tpre_rounded - s->Tvm_trips_rounded);
5633 s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3;
5634 } else
5635 s->prefetch_bw3 = 0;
5636
5637 dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
5638 if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) {
5639 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
5640 dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
5641 }
5642
5643 // prefetch_bw4: SW
5644 if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
5645 s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
5646 else
5647 s->prefetch_bw4 = 0;
5648
5649 #ifdef __DML_VBA_DEBUG__
5650 dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
5651 dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre));
5652 dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
5653 dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips));
5654 dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
5655 dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2);
5656 dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
5657 dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1);
5658 dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2);
5659 dml2_printf("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3);
5660 dml2_printf("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4);
5661 #endif
5662 {
5663 bool Case1OK = false;
5664 bool Case2OK = false;
5665 bool Case3OK = false;
5666
5667 // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement
5668 // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive
5669 // vs the latency based number
5670
5671 // prefetch_bw1: VM + 2*R0 + SW
5672 // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data)
5673 // here is to make sure equ bw wont be more agressive than the latency-based requirement.
5674 // check vm time >= vm_trips
5675 // check r0 time >= r0_trips
5676
5677 double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes);
5678
5679 dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded);
5680 dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded);
5681
5682 if (s->prefetch_bw1 > 0) {
5683 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1;
5684 double row_transfer_time = total_row_bytes / s->prefetch_bw1;
5685 dml2_printf("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5686 dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time);
5687 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
5688 Case1OK = true;
5689 }
5690 }
5691
5692 // prefetch_bw2: VM + SW
5693 // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw)
5694 // check vm time >= vm_trips
5695 // check r0 time < r0_trips
5696 if (s->prefetch_bw2 > 0) {
5697 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2;
5698 double row_transfer_time = total_row_bytes / s->prefetch_bw2;
5699 dml2_printf("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5700 dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time);
5701 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) {
5702 Case2OK = true;
5703 }
5704 }
5705
5706 // prefetch_bw3: VM + 2*R0
5707 // check vm time < vm_trips
5708 // check r0 time >= r0_trips
5709 if (s->prefetch_bw3 > 0) {
5710 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3;
5711 double row_transfer_time = total_row_bytes / s->prefetch_bw3;
5712 dml2_printf("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5713 dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time);
5714 if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
5715 Case3OK = true;
5716 }
5717 }
5718
5719 if (Case1OK) {
5720 s->prefetch_bw_equ = s->prefetch_bw1;
5721 } else if (Case2OK) {
5722 s->prefetch_bw_equ = s->prefetch_bw2;
5723 } else if (Case3OK) {
5724 s->prefetch_bw_equ = s->prefetch_bw3;
5725 } else {
5726 s->prefetch_bw_equ = s->prefetch_bw4;
5727 }
5728
5729 s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ,
5730 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
5731 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
5732 #ifdef __DML_VBA_DEBUG__
5733 dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK);
5734 dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK);
5735 dml2_printf("DML::%s: Case3OK: %u\n", __func__, Case3OK);
5736 dml2_printf("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
5737 #endif
5738
5739 if (s->prefetch_bw_equ > 0) {
5740 if (p->display_cfg->gpuvm_enable == true) {
5741 s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4);
5742 } else {
5743 s->Tvm_equ = s->LineTime / 4;
5744 }
5745
5746 if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) {
5747 s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes
5748 *p->Tr0_trips,
5749 s->LineTime / 4);
5750 } else {
5751 s->Tr0_equ = s->LineTime / 4;
5752 }
5753 } else {
5754 s->Tvm_equ = 0;
5755 s->Tr0_equ = 0;
5756 dml2_printf("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
5757 }
5758 }
5759 #ifdef __DML_VBA_DEBUG__
5760 dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
5761 dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
5762 #endif
5763 // Use the more stressful prefetch schedule
5764 if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
5765 *p->dst_y_prefetch = s->dst_y_prefetch_oto;
5766 s->TimeForFetchingVM = s->Tvm_oto;
5767 s->TimeForFetchingRowInVBlank = s->Tr0_oto;
5768
5769 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
5770 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
5771 #ifdef __DML_VBA_DEBUG__
5772 dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__);
5773 #endif
5774
5775 } else {
5776 *p->dst_y_prefetch = s->dst_y_prefetch_equ;
5777
5778 if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted)
5779 *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted;
5780
5781 s->TimeForFetchingVM = s->Tvm_equ;
5782 s->TimeForFetchingRowInVBlank = s->Tr0_equ;
5783
5784 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
5785 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
5786
5787 #ifdef __DML_VBA_DEBUG__
5788 dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
5789 #endif
5790 }
5791
5792 // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank)
5793 s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw
5794
5795 s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line);
5796 *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime);
5797 *p->prefetch_swath_time_us = (s->LinesToRequestPrefetchPixelData * s->LineTime);
5798
5799 #ifdef __DML_VBA_DEBUG__
5800 dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
5801 dml2_printf("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
5802 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5803 dml2_printf("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch);
5804 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
5805 dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
5806 dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
5807 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5808 dml2_printf("DML::%s: prefetch_swath_time_us = %f\n", __func__, *p->prefetch_swath_time_us);
5809
5810 dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
5811 dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
5812 dml2_printf("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
5813 dml2_printf("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
5814 #endif
5815 DML2_ASSERT(*p->dst_y_prefetch < 64);
5816
5817 unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime);
5818 if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) {
5819 *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
5820 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0);
5821 #ifdef __DML_VBA_DEBUG__
5822 dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
5823 dml2_printf("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
5824 dml2_printf("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
5825 #endif
5826 if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
5827 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
5828 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY,
5829 (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
5830 } else {
5831 s->NoTimeToPrefetch = true;
5832 dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
5833 *p->VRatioPrefetchY = 0;
5834 }
5835 #ifdef __DML_VBA_DEBUG__
5836 dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
5837 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5838 dml2_printf("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
5839 #endif
5840 }
5841
5842 *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
5843 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0);
5844
5845 #ifdef __DML_VBA_DEBUG__
5846 dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
5847 dml2_printf("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
5848 dml2_printf("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
5849 #endif
5850 if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
5851 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
5852 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
5853 } else {
5854 s->NoTimeToPrefetch = true;
5855 dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
5856 *p->VRatioPrefetchC = 0;
5857 }
5858 #ifdef __DML_VBA_DEBUG__
5859 dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
5860 dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
5861 dml2_printf("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
5862 #endif
5863 }
5864
5865 *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime;
5866 *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime;
5867
5868 #ifdef __DML_VBA_DEBUG__
5869 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
5870 dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
5871 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5872 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
5873 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
5874 #endif
5875 } else {
5876 s->NoTimeToPrefetch = true;
5877 dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required);
5878 dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ);
5879 *p->VRatioPrefetchY = 0;
5880 *p->VRatioPrefetchC = 0;
5881 *p->RequiredPrefetchPixelDataBWLuma = 0;
5882 *p->RequiredPrefetchPixelDataBWChroma = 0;
5883 }
5884 dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM);
5885 dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM);
5886 dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank);
5887 dml2_printf("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime);
5888 dml2_printf("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime);
5889 dml2_printf("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n");
5890 dml2_printf("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
5891 dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
5892
5893 } else {
5894 dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
5895 dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
5896 __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime);
5897 dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded+Tvm_trips_rounded+2.0*Tr0_trips_rounded+min_Tsw_equ (%f) should be > \n",
5898 __func__, tpre_gt_req_latency, (s->min_Lsw_equ*s->LineTime + s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded), p->Turg, s->trip_to_mem, p->ExtraLatencyPrefetch);
5899 s->NoTimeToPrefetch = true;
5900 s->TimeForFetchingVM = 0;
5901 s->TimeForFetchingRowInVBlank = 0;
5902 *p->dst_y_per_vm_vblank = 0;
5903 *p->dst_y_per_row_vblank = 0;
5904 s->LinesToRequestPrefetchPixelData = 0;
5905 *p->VRatioPrefetchY = 0;
5906 *p->VRatioPrefetchC = 0;
5907 *p->RequiredPrefetchPixelDataBWLuma = 0;
5908 *p->RequiredPrefetchPixelDataBWChroma = 0;
5909 }
5910
5911 {
5912 double prefetch_vm_bw;
5913 double prefetch_row_bw;
5914
5915 if (vm_bytes == 0) {
5916 prefetch_vm_bw = 0;
5917 } else if (*p->dst_y_per_vm_vblank > 0) {
5918 #ifdef __DML_VBA_DEBUG__
5919 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5920 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
5921 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5922 #endif
5923 prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime);
5924 #ifdef __DML_VBA_DEBUG__
5925 dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
5926 #endif
5927 } else {
5928 prefetch_vm_bw = 0;
5929 s->NoTimeToPrefetch = true;
5930 dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank);
5931 }
5932
5933 if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) {
5934 prefetch_row_bw = 0;
5935 } else if (*p->dst_y_per_row_vblank > 0) {
5936 prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime);
5937
5938 #ifdef __DML_VBA_DEBUG__
5939 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
5940 dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
5941 dml2_printf("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
5942 #endif
5943 } else {
5944 prefetch_row_bw = 0;
5945 s->NoTimeToPrefetch = true;
5946 dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank);
5947 }
5948
5949 *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw);
5950 }
5951
5952 if (s->NoTimeToPrefetch) {
5953 s->TimeForFetchingVM = 0;
5954 s->TimeForFetchingRowInVBlank = 0;
5955 *p->dst_y_per_vm_vblank = 0;
5956 *p->dst_y_per_row_vblank = 0;
5957 *p->dst_y_prefetch = 0;
5958 s->LinesToRequestPrefetchPixelData = 0;
5959 *p->VRatioPrefetchY = 0;
5960 *p->VRatioPrefetchC = 0;
5961 *p->RequiredPrefetchPixelDataBWLuma = 0;
5962 *p->RequiredPrefetchPixelDataBWChroma = 0;
5963 *p->prefetch_vmrow_bw = 0;
5964 }
5965
5966 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank);
5967 dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank);
5968 dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw);
5969 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
5970 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
5971 dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch);
5972
5973 return s->NoTimeToPrefetch;
5974 }
5975
get_num_lb_source_lines(unsigned int max_line_buffer_lines,unsigned int line_buffer_size_bits,unsigned int num_pipes,unsigned int vp_width,unsigned int vp_height,double h_ratio,enum dml2_rotation_angle rotation_angle)5976 static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines,
5977 unsigned int line_buffer_size_bits,
5978 unsigned int num_pipes,
5979 unsigned int vp_width,
5980 unsigned int vp_height,
5981 double h_ratio,
5982 enum dml2_rotation_angle rotation_angle)
5983 {
5984 unsigned int num_lb_source_lines = 0;
5985 double lb_bit_per_pixel = 57.0;
5986 unsigned recin_width = vp_width/num_pipes;
5987
5988 if (dml_is_vertical_rotation(rotation_angle))
5989 recin_width = vp_height/num_pipes;
5990
5991 num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines,
5992 math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0));
5993
5994 return num_lb_source_lines;
5995 }
5996
find_max_impact_plane(unsigned int this_plane_idx,unsigned int num_planes,unsigned int Trpd_dcfclk_cycles[])5997 static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[])
5998 {
5999 int max_value = -1;
6000 int max_idx = -1;
6001 for (unsigned int i = 0; i < num_planes; i++) {
6002 if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) {
6003 max_value = Trpd_dcfclk_cycles[i];
6004 max_idx = i;
6005 }
6006 }
6007 if (max_idx <= 0) {
6008 DML2_ASSERT(max_idx >= 0);
6009 max_idx = this_plane_idx;
6010 }
6011
6012 return max_idx;
6013 }
6014
calculate_impacted_Tsw(unsigned int exclude_plane_idx,unsigned int num_planes,double * prefetch_swath_bytes,double bw_mbps)6015 static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps)
6016 {
6017 double sum = 0.;
6018 for (unsigned int i = 0; i < num_planes; i++) {
6019 if (i != exclude_plane_idx) {
6020 sum += prefetch_swath_bytes[i];
6021 }
6022 }
6023 return sum / bw_mbps;
6024 }
6025
6026 // a global check against the aggregate effect of the per plane prefetch schedule
CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params * p)6027 static noinline_for_stack bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch,
6028 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p)
6029 {
6030 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals;
6031 unsigned int i, k;
6032
6033 memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals));
6034
6035 *p->recalc_prefetch_schedule = 0;
6036 s->prefetch_global_check_passed = 1;
6037 // worst case if the rob and cdb is fully hogged
6038 s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0);
6039 #ifdef __DML_VBA_DEBUG__
6040 dml2_printf("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes);
6041 dml2_printf("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes);
6042 dml2_printf("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes);
6043 dml2_printf("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps);
6044 dml2_printf("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz);
6045 dml2_printf("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles);
6046 #endif
6047
6048 // calculate the return impact from each plane, request is 256B per dcfclk
6049 for (i = 0; i < p->num_active_planes; i++) {
6050 s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i];
6051 s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i];
6052 s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i];
6053 s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i];
6054
6055 if (p->pixel_format[i] == dml2_420_10) {
6056 s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5);
6057 s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5);
6058 s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5);
6059 s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5);
6060 }
6061
6062 s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l);
6063 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]);
6064
6065 #ifdef __DML_VBA_DEBUG__
6066 dml2_printf("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]);
6067 dml2_printf("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l);
6068 dml2_printf("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]);
6069 dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]);
6070 dml2_printf("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]);
6071 dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det);
6072 #endif
6073
6074 if (s->src_swath_bytes_c[i] > 0) { // dual_plane
6075 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c);
6076
6077 if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) {
6078 s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]);
6079 }
6080
6081 #ifdef __DML_VBA_DEBUG__
6082 dml2_printf("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c);
6083 dml2_printf("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]);
6084 dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]);
6085 dml2_printf("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]);
6086 #endif
6087 }
6088
6089 s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate
6090 s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk
6091
6092 #ifdef __DML_VBA_DEBUG__
6093 dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det);
6094 dml2_printf("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us);
6095 dml2_printf("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]);
6096 #endif
6097 // clamping to worst case delay which is one which occupy the full rob+cdb
6098 if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles)
6099 s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles;
6100 }
6101
6102 // Figure out the impacted prefetch time for each plane
6103 // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw
6104 for (i = 0; i < p->num_active_planes; i++) {
6105 k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i
6106 // the rest of planes (except for k) complete for bw
6107 p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz;
6108 p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps);
6109 p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25);
6110
6111 #ifdef __DML_VBA_DEBUG__
6112 dml2_printf("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k);
6113 #endif
6114 }
6115
6116 if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) {
6117 for (i = 0; i < p->num_active_planes; i++) {
6118 if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) {
6119 s->prefetch_global_check_passed = 0;
6120 *p->recalc_prefetch_schedule = 1;
6121 }
6122 #ifdef __DML_VBA_DEBUG__
6123 dml2_printf("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]);
6124 dml2_printf("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]);
6125 #endif
6126 }
6127 } else {
6128 // likely a mode programming calls, assume support, and no recalc - not used anyways
6129 s->prefetch_global_check_passed = 1;
6130 *p->recalc_prefetch_schedule = 0;
6131 }
6132
6133 #ifdef __DML_VBA_DEBUG__
6134 dml2_printf("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed);
6135 dml2_printf("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule);
6136 #endif
6137
6138 return s->prefetch_global_check_passed;
6139 }
6140
calculate_peak_bandwidth_required(struct dml2_core_internal_scratch * s,struct dml2_core_calcs_calculate_peak_bandwidth_required_params * p)6141 static void calculate_peak_bandwidth_required(
6142 struct dml2_core_internal_scratch *s,
6143 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p)
6144 {
6145 unsigned int n;
6146 unsigned int m;
6147
6148 struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals;
6149
6150 memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals));
6151
6152 #ifdef __DML_VBA_DEBUG__
6153 dml2_printf("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw);
6154 dml2_printf("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes);
6155 #endif
6156
6157 for (unsigned int k = 0; k < p->num_active_planes; ++k) {
6158 l->unity_array[k] = 1.0;
6159 l->zero_array[k] = 0.0;
6160 }
6161
6162 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
6163 for (n = 0; n < dml2_core_internal_bw_max; n++) {
6164 get_urgent_bandwidth_required(
6165 &s->get_urgent_bandwidth_required_locals,
6166 p->display_cfg,
6167 m,
6168 n,
6169 0, //inc_flip_bw,
6170 0, //use_qual_row_bw
6171 p->num_active_planes,
6172 p->num_of_dpp,
6173 p->dcc_dram_bw_nom_overhead_factor_p0,
6174 p->dcc_dram_bw_nom_overhead_factor_p1,
6175 p->dcc_dram_bw_pref_overhead_factor_p0,
6176 p->dcc_dram_bw_pref_overhead_factor_p1,
6177 p->mall_prefetch_sdp_overhead_factor,
6178 p->mall_prefetch_dram_overhead_factor,
6179 p->surface_read_bandwidth_l,
6180 p->surface_read_bandwidth_c,
6181 l->zero_array, //PrefetchBandwidthLuma,
6182 l->zero_array, //PrefetchBandwidthChroma,
6183 l->zero_array, //PrefetchBWOTO
6184 l->zero_array,
6185 l->zero_array,
6186 l->zero_array,
6187 p->dpte_row_bw,
6188 p->meta_row_bw,
6189 l->zero_array, //prefetch_cursor_bw,
6190 l->zero_array, //prefetch_vmrow_bw,
6191 l->zero_array, //flip_bw,
6192 l->zero_array,
6193 l->zero_array,
6194 l->zero_array,
6195 l->zero_array,
6196 l->zero_array,
6197 l->zero_array,
6198 p->surface_avg_vactive_required_bw[m][n],
6199 p->surface_peak_required_bw[m][n]);
6200
6201 p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6202 &s->get_urgent_bandwidth_required_locals,
6203 p->display_cfg,
6204 m,
6205 n,
6206 0, //inc_flip_bw,
6207 0, //use_qual_row_bw
6208 p->num_active_planes,
6209 p->num_of_dpp,
6210 p->dcc_dram_bw_nom_overhead_factor_p0,
6211 p->dcc_dram_bw_nom_overhead_factor_p1,
6212 p->dcc_dram_bw_pref_overhead_factor_p0,
6213 p->dcc_dram_bw_pref_overhead_factor_p1,
6214 p->mall_prefetch_sdp_overhead_factor,
6215 p->mall_prefetch_dram_overhead_factor,
6216 p->surface_read_bandwidth_l,
6217 p->surface_read_bandwidth_c,
6218 l->zero_array, //PrefetchBandwidthLuma,
6219 l->zero_array, //PrefetchBandwidthChroma,
6220 l->zero_array, //PrefetchBWOTO
6221 p->excess_vactive_fill_bw_l,
6222 p->excess_vactive_fill_bw_c,
6223 p->cursor_bw,
6224 p->dpte_row_bw,
6225 p->meta_row_bw,
6226 l->zero_array, //prefetch_cursor_bw,
6227 l->zero_array, //prefetch_vmrow_bw,
6228 l->zero_array, //flip_bw,
6229 p->urgent_burst_factor_l,
6230 p->urgent_burst_factor_c,
6231 p->urgent_burst_factor_cursor,
6232 p->urgent_burst_factor_prefetch_l,
6233 p->urgent_burst_factor_prefetch_c,
6234 p->urgent_burst_factor_prefetch_cursor,
6235 l->surface_dummy_bw,
6236 p->surface_peak_required_bw[m][n]);
6237
6238 p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6239 &s->get_urgent_bandwidth_required_locals,
6240 p->display_cfg,
6241 m,
6242 n,
6243 p->inc_flip_bw,
6244 0, //use_qual_row_bw
6245 p->num_active_planes,
6246 p->num_of_dpp,
6247 p->dcc_dram_bw_nom_overhead_factor_p0,
6248 p->dcc_dram_bw_nom_overhead_factor_p1,
6249 p->dcc_dram_bw_pref_overhead_factor_p0,
6250 p->dcc_dram_bw_pref_overhead_factor_p1,
6251 p->mall_prefetch_sdp_overhead_factor,
6252 p->mall_prefetch_dram_overhead_factor,
6253 p->surface_read_bandwidth_l,
6254 p->surface_read_bandwidth_c,
6255 p->prefetch_bandwidth_l,
6256 p->prefetch_bandwidth_c,
6257 p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw
6258 p->excess_vactive_fill_bw_l,
6259 p->excess_vactive_fill_bw_c,
6260 p->cursor_bw,
6261 p->dpte_row_bw,
6262 p->meta_row_bw,
6263 p->prefetch_cursor_bw,
6264 p->prefetch_vmrow_bw,
6265 p->flip_bw,
6266 p->urgent_burst_factor_l,
6267 p->urgent_burst_factor_c,
6268 p->urgent_burst_factor_cursor,
6269 p->urgent_burst_factor_prefetch_l,
6270 p->urgent_burst_factor_prefetch_c,
6271 p->urgent_burst_factor_prefetch_cursor,
6272 l->surface_dummy_bw,
6273 p->surface_peak_required_bw[m][n]);
6274
6275 p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required(
6276 &s->get_urgent_bandwidth_required_locals,
6277 p->display_cfg,
6278 m,
6279 n,
6280 0, //inc_flip_bw
6281 1, //use_qual_row_bw
6282 p->num_active_planes,
6283 p->num_of_dpp,
6284 p->dcc_dram_bw_nom_overhead_factor_p0,
6285 p->dcc_dram_bw_nom_overhead_factor_p1,
6286 p->dcc_dram_bw_pref_overhead_factor_p0,
6287 p->dcc_dram_bw_pref_overhead_factor_p1,
6288 p->mall_prefetch_sdp_overhead_factor,
6289 p->mall_prefetch_dram_overhead_factor,
6290 p->surface_read_bandwidth_l,
6291 p->surface_read_bandwidth_c,
6292 p->prefetch_bandwidth_l,
6293 p->prefetch_bandwidth_c,
6294 p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw
6295 p->excess_vactive_fill_bw_l,
6296 p->excess_vactive_fill_bw_c,
6297 p->cursor_bw,
6298 p->dpte_row_bw,
6299 p->meta_row_bw,
6300 p->prefetch_cursor_bw,
6301 p->prefetch_vmrow_bw,
6302 p->flip_bw,
6303 p->urgent_burst_factor_l,
6304 p->urgent_burst_factor_c,
6305 p->urgent_burst_factor_cursor,
6306 p->urgent_burst_factor_prefetch_l,
6307 p->urgent_burst_factor_prefetch_c,
6308 p->urgent_burst_factor_prefetch_cursor,
6309 l->surface_dummy_bw,
6310 p->surface_peak_required_bw[m][n]);
6311
6312 p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
6313 &s->get_urgent_bandwidth_required_locals,
6314 p->display_cfg,
6315 m,
6316 n,
6317 p->inc_flip_bw,
6318 0, //use_qual_row_bw
6319 p->num_active_planes,
6320 p->num_of_dpp,
6321 p->dcc_dram_bw_nom_overhead_factor_p0,
6322 p->dcc_dram_bw_nom_overhead_factor_p1,
6323 p->dcc_dram_bw_pref_overhead_factor_p0,
6324 p->dcc_dram_bw_pref_overhead_factor_p1,
6325 p->mall_prefetch_sdp_overhead_factor,
6326 p->mall_prefetch_dram_overhead_factor,
6327 p->surface_read_bandwidth_l,
6328 p->surface_read_bandwidth_c,
6329 p->prefetch_bandwidth_l,
6330 p->prefetch_bandwidth_c,
6331 p->prefetch_bandwidth_oto, // to prevent ms/mp mismatch when oto bw > total vactive bw
6332 p->excess_vactive_fill_bw_l,
6333 p->excess_vactive_fill_bw_c,
6334 p->cursor_bw,
6335 p->dpte_row_bw,
6336 p->meta_row_bw,
6337 p->prefetch_cursor_bw,
6338 p->prefetch_vmrow_bw,
6339 p->flip_bw,
6340 l->unity_array,
6341 l->unity_array,
6342 l->unity_array,
6343 l->unity_array,
6344 l->unity_array,
6345 l->unity_array,
6346 l->surface_dummy_bw,
6347 p->surface_peak_required_bw[m][n]);
6348
6349 #ifdef __DML_VBA_DEBUG__
6350 dml2_printf("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]);
6351 dml2_printf("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
6352 dml2_printf("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
6353 dml2_printf("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]);
6354 #endif
6355 DML2_ASSERT(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
6356 }
6357 }
6358 }
6359
check_urgent_bandwidth_support(double * frac_urg_bandwidth_nom,double * frac_urg_bandwidth_mall,bool * vactive_bandwidth_support_ok,bool * bandwidth_support_ok,unsigned int mall_allocated_for_dcn_mbytes,double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6360 static void check_urgent_bandwidth_support(
6361 double *frac_urg_bandwidth_nom,
6362 double *frac_urg_bandwidth_mall,
6363 bool *vactive_bandwidth_support_ok, // vactive ok
6364 bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok
6365
6366 unsigned int mall_allocated_for_dcn_mbytes,
6367 double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6368 double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6369 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6370 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6371 {
6372 double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6373 double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6374 double frac_urg_bandwidth_mall_sdp;
6375 double frac_urg_bandwidth_mall_dram;
6376 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0)
6377 frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6378 else
6379 frac_urg_bandwidth_mall_sdp = 0.0;
6380 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0)
6381 frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6382 else
6383 frac_urg_bandwidth_mall_dram = 0.0;
6384
6385 *bandwidth_support_ok = 1;
6386 *vactive_bandwidth_support_ok = 1;
6387
6388 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth
6389 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram
6390 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL
6391 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch
6392
6393 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6394 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6395
6396 if (mall_allocated_for_dcn_mbytes > 0) {
6397 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6398 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6399 }
6400
6401 *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram);
6402 *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram);
6403
6404 *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0);
6405
6406 if (mall_allocated_for_dcn_mbytes > 0)
6407 *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0);
6408
6409 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6410 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6411 if (mall_allocated_for_dcn_mbytes > 0) {
6412 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6413 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6414 }
6415
6416 #ifdef __DML_VBA_DEBUG__
6417 dml2_printf("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp);
6418 dml2_printf("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram);
6419 dml2_printf("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom);
6420
6421 dml2_printf("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp);
6422 dml2_printf("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram);
6423 dml2_printf("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall);
6424 dml2_printf("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok);
6425
6426 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
6427 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
6428 dml2_printf("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
6429 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
6430 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]);
6431 }
6432 }
6433 #endif
6434 }
6435
get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6436 static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,
6437 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip
6438 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6439 {
6440 double flip_bw_available_mbps;
6441 double flip_bw_available_sdp_mbps;
6442 double flip_bw_available_dram_mbps;
6443
6444 flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp];
6445 flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram];
6446 flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps;
6447
6448 #ifdef __DML_VBA_DEBUG__
6449 dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
6450 dml2_printf("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]);
6451 dml2_printf("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]);
6452 dml2_printf("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]);
6453 dml2_printf("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]);
6454 dml2_printf("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps);
6455 dml2_printf("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps);
6456 dml2_printf("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps);
6457 #endif
6458
6459 return flip_bw_available_mbps;
6460 }
6461
calculate_immediate_flip_bandwidth_support(double * frac_urg_bandwidth_flip,bool * flip_bandwidth_support_ok,enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6462 static void calculate_immediate_flip_bandwidth_support(
6463 // Output
6464 double *frac_urg_bandwidth_flip,
6465 bool *flip_bandwidth_support_ok,
6466
6467 // Input
6468 enum dml2_core_internal_soc_state_type eval_state,
6469 double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6470 double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6471 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6472 {
6473 double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp];
6474 double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram];
6475
6476 *flip_bandwidth_support_ok = true;
6477 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
6478 *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n];
6479
6480 #ifdef __DML_VBA_DEBUG__
6481 dml2_printf("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n));
6482 dml2_printf("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]);
6483 dml2_printf("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]);
6484 dml2_printf("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
6485 dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
6486 #endif
6487 DML2_ASSERT(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
6488 }
6489
6490 *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram;
6491 *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0);
6492
6493 #ifdef __DML_VBA_DEBUG__
6494 dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
6495 dml2_printf("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp);
6496 dml2_printf("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram);
6497 dml2_printf("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip);
6498 dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
6499
6500 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
6501 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
6502 dml2_printf("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
6503 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
6504 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]);
6505 }
6506 }
6507 #endif
6508 }
6509
CalculateFlipSchedule(struct dml2_core_internal_scratch * s,bool iflip_enable,bool use_lb_flip_bw,double HostVMInefficiencyFactor,double Tvm_trips_flip,double Tr0_trips_flip,double Tvm_trips_flip_rounded,double Tr0_trips_flip_rounded,bool GPUVMEnable,double vm_bytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum dml2_source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw_flip,unsigned int dpte_row_height,unsigned int dpte_row_height_chroma,bool use_one_row_for_frame_flip,unsigned int max_flip_time_us,unsigned int max_flip_time_lines,unsigned int per_pipe_flip_bytes,unsigned int meta_row_bytes,unsigned int meta_row_height,unsigned int meta_row_height_chroma,bool dcc_mrq_enable,double * dst_y_per_vm_flip,double * dst_y_per_row_flip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)6510 static void CalculateFlipSchedule(
6511 struct dml2_core_internal_scratch *s,
6512 bool iflip_enable,
6513 bool use_lb_flip_bw,
6514 double HostVMInefficiencyFactor,
6515 double Tvm_trips_flip,
6516 double Tr0_trips_flip,
6517 double Tvm_trips_flip_rounded,
6518 double Tr0_trips_flip_rounded,
6519 bool GPUVMEnable,
6520 double vm_bytes, // vm_bytes
6521 double DPTEBytesPerRow, // dpte_row_bytes
6522 double BandwidthAvailableForImmediateFlip,
6523 unsigned int TotImmediateFlipBytes,
6524 enum dml2_source_format_class SourcePixelFormat,
6525 double LineTime,
6526 double VRatio,
6527 double VRatioChroma,
6528 double Tno_bw_flip,
6529 unsigned int dpte_row_height,
6530 unsigned int dpte_row_height_chroma,
6531 bool use_one_row_for_frame_flip,
6532 unsigned int max_flip_time_us,
6533 unsigned int max_flip_time_lines,
6534 unsigned int per_pipe_flip_bytes,
6535 unsigned int meta_row_bytes,
6536 unsigned int meta_row_height,
6537 unsigned int meta_row_height_chroma,
6538 bool dcc_mrq_enable,
6539
6540 // Output
6541 double *dst_y_per_vm_flip,
6542 double *dst_y_per_row_flip,
6543 double *final_flip_bw,
6544 bool *ImmediateFlipSupportedForPipe)
6545 {
6546 struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals;
6547
6548 l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha;
6549 l->dpte_row_bytes = DPTEBytesPerRow;
6550
6551 #ifdef __DML_VBA_DEBUG__
6552 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
6553 dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us);
6554 dml2_printf("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines);
6555 dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
6556 dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
6557 dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw);
6558 dml2_printf("DML::%s: iflip_enable = %u\n", __func__, iflip_enable);
6559 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
6560 dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime);
6561 dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip);
6562 dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip);
6563 dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip);
6564 dml2_printf("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded);
6565 dml2_printf("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded);
6566 dml2_printf("DML::%s: vm_bytes = %f\n", __func__, vm_bytes);
6567 dml2_printf("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
6568 dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes);
6569 dml2_printf("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes);
6570 dml2_printf("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height);
6571 dml2_printf("DML::%s: meta_row_height = %d\n", __func__, meta_row_height);
6572 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
6573 #endif
6574
6575 if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) {
6576 if (l->dual_plane) {
6577 if (dcc_mrq_enable & GPUVMEnable) {
6578 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
6579 l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma);
6580 } else if (GPUVMEnable) {
6581 l->min_row_height = dpte_row_height;
6582 l->min_row_height_chroma = dpte_row_height_chroma;
6583 } else {
6584 l->min_row_height = meta_row_height;
6585 l->min_row_height_chroma = meta_row_height_chroma;
6586 }
6587 l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma);
6588 } else {
6589 if (dcc_mrq_enable & GPUVMEnable)
6590 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
6591 else if (GPUVMEnable)
6592 l->min_row_height = dpte_row_height;
6593 else
6594 l->min_row_height = meta_row_height;
6595
6596 l->min_row_time = l->min_row_height * LineTime / VRatio;
6597 }
6598 #ifdef __DML_VBA_DEBUG__
6599 dml2_printf("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
6600 #endif
6601 DML2_ASSERT(l->min_row_time > 0);
6602
6603 if (use_lb_flip_bw) {
6604 // For mode check, calculation the flip bw requirement with worst case flip time
6605 l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio),
6606 math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us));
6607
6608 //The lower bound on flip bandwidth
6609 // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required
6610 l->lb_flip_bw = 0;
6611
6612 if (iflip_enable) {
6613 l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor;
6614 l->num_rows = 2;
6615 l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes);
6616 l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes;
6617 l->lb_flip_bw = math_max3(
6618 l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip),
6619 l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded),
6620 l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
6621 #ifdef __DML_VBA_DEBUG__
6622 dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time);
6623 dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes);
6624 dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes);
6625 dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes);
6626 dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip));
6627 dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded));
6628 dml2_printf("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
6629
6630 if (l->lb_flip_bw > 0) {
6631 dml2_printf("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw);
6632 dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows);
6633 dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime);
6634 dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows);
6635 dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded));
6636 }
6637 #endif
6638 l->lb_flip_bw = math_max3(l->lb_flip_bw,
6639 l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip,
6640 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
6641
6642 #ifdef __DML_VBA_DEBUG__
6643 dml2_printf("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip);
6644 dml2_printf("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
6645 #endif
6646 }
6647
6648 *final_flip_bw = l->lb_flip_bw;
6649
6650 *dst_y_per_vm_flip = 1; // not used
6651 *dst_y_per_row_flip = 1; // not used
6652 *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded);
6653 } else {
6654 if (iflip_enable) {
6655 l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i)
6656 double portion = (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes;
6657
6658 #ifdef __DML_VBA_DEBUG__
6659 dml2_printf("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes);
6660 dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
6661 dml2_printf("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW);
6662 dml2_printf("DML::%s: portion of flip bw = %f\n", __func__, portion);
6663 #endif
6664 if (l->ImmediateFlipBW == 0) {
6665 l->Tvm_flip = 0;
6666 l->Tr0_flip = 0;
6667 } else {
6668 l->Tvm_flip = math_max3(Tvm_trips_flip,
6669 Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW,
6670 LineTime / 4.0);
6671
6672 l->Tr0_flip = math_max3(Tr0_trips_flip,
6673 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW,
6674 LineTime / 4.0);
6675 }
6676 #ifdef __DML_VBA_DEBUG__
6677 dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor);
6678 dml2_printf("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes));
6679
6680 dml2_printf("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip);
6681 dml2_printf("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip);
6682 #endif
6683 *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0;
6684 *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0;
6685
6686 *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime),
6687 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime));
6688
6689 if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) {
6690 *ImmediateFlipSupportedForPipe = false;
6691 } else {
6692 *ImmediateFlipSupportedForPipe = iflip_enable;
6693 }
6694 } else {
6695 l->Tvm_flip = 0;
6696 l->Tr0_flip = 0;
6697 *dst_y_per_vm_flip = 0;
6698 *dst_y_per_row_flip = 0;
6699 *final_flip_bw = 0;
6700 *ImmediateFlipSupportedForPipe = iflip_enable;
6701 }
6702 }
6703 } else {
6704 l->Tvm_flip = 0;
6705 l->Tr0_flip = 0;
6706 *dst_y_per_vm_flip = 0;
6707 *dst_y_per_row_flip = 0;
6708 *final_flip_bw = 0;
6709 *ImmediateFlipSupportedForPipe = iflip_enable;
6710 }
6711
6712 #ifdef __DML_VBA_DEBUG__
6713 if (!use_lb_flip_bw) {
6714 dml2_printf("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip);
6715 dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip);
6716 dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip);
6717 dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip);
6718 dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time);
6719 }
6720 dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
6721 dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
6722 #endif
6723 }
6724
CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params * p)6725 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
6726 struct dml2_core_internal_scratch *scratch,
6727 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p)
6728 {
6729 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
6730
6731 enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy;
6732 double reserved_vblank_time_us;
6733 bool FoundCriticalSurface = false;
6734
6735 s->TotalActiveWriteback = 0;
6736 p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
6737
6738 #ifdef __DML_VBA_DEBUG__
6739 dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
6740 #endif
6741
6742 p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
6743 p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
6744 p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
6745 p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6746 p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6747 p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6748 p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6749 if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) {
6750 p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6751 p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6752 p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6753 p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6754 }
6755 p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark;
6756
6757 #ifdef __DML_VBA_DEBUG__
6758 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
6759 dml2_printf("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
6760 dml2_printf("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
6761 dml2_printf("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time);
6762 dml2_printf("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime);
6763 dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
6764 dml2_printf("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
6765 dml2_printf("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
6766 dml2_printf("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
6767 dml2_printf("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
6768 dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
6769 dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
6770 dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
6771 dml2_printf("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us);
6772 #endif
6773
6774 s->TotalActiveWriteback = 0;
6775 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6776 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
6777 s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
6778 }
6779 }
6780
6781 if (s->TotalActiveWriteback <= 1) {
6782 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
6783 } else {
6784 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
6785 }
6786 if (p->USRRetrainingRequired)
6787 p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
6788
6789 if (s->TotalActiveWriteback <= 1) {
6790 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
6791 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
6792 } else {
6793 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
6794 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
6795 }
6796
6797 if (p->USRRetrainingRequired)
6798 p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
6799
6800 if (p->USRRetrainingRequired)
6801 p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
6802
6803 #ifdef __DML_VBA_DEBUG__
6804 dml2_printf("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
6805 dml2_printf("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
6806 dml2_printf("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
6807 dml2_printf("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired);
6808 dml2_printf("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
6809 #endif
6810
6811 s->TotalPixelBW = 0.0;
6812 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6813 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
6814 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
6815 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
6816 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
6817 s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
6818 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz);
6819 }
6820
6821 *p->global_fclk_change_supported = true;
6822 *p->global_dram_clock_change_supported = true;
6823
6824 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6825 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
6826 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
6827 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
6828 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
6829 double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
6830 double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
6831 double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio;
6832 double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio;
6833 double LBBitPerPixel = 57;
6834
6835 s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1));
6836 s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1));
6837
6838 #ifdef __DML_VBA_DEBUG__
6839 dml2_printf("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines);
6840 dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
6841 dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel);
6842 dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
6843 dml2_printf("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps);
6844 #endif
6845
6846 s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz);
6847 s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz);
6848
6849 s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
6850 if (p->UnboundedRequestEnabled) {
6851 s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW;
6852 }
6853
6854 s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
6855 s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k]));
6856 s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio;
6857
6858 s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz;
6859
6860 if (p->NumberOfActiveSurfaces > 1) {
6861 s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio;
6862 }
6863
6864 if (p->BytePerPixelDETC[k] > 0) {
6865 s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
6866 s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k]));
6867 s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c;
6868 s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz;
6869 if (p->NumberOfActiveSurfaces > 1) {
6870 s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c;
6871 }
6872 s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
6873 } else {
6874 s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
6875 }
6876
6877 s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark;
6878 s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark;
6879 s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
6880 s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us;
6881
6882 if (p->VActiveLatencyHidingMargin)
6883 p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
6884
6885 if (p->VActiveLatencyHidingUs)
6886 p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding;
6887
6888 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
6889 s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0
6890 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
6891 * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
6892 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0);
6893 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
6894 s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
6895 }
6896 s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
6897
6898 s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
6899
6900 s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
6901 s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
6902 }
6903 p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
6904
6905 uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy;
6906 reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000;
6907
6908 p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported;
6909 if (s->ActiveFCLKChangeLatencyMargin[k] > 0)
6910 p->FCLKChangeSupport[k] = dml2_pstate_change_vactive;
6911 else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency)
6912 p->FCLKChangeSupport[k] = dml2_pstate_change_vblank;
6913
6914 if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported)
6915 *p->global_fclk_change_supported = false;
6916
6917 p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported;
6918 if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) {
6919 if (p->display_cfg->overrides.all_streams_blanked ||
6920 (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency))
6921 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive;
6922 else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
6923 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
6924 else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
6925 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
6926 } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
6927 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive;
6928 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
6929 p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank;
6930 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr)
6931 p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr;
6932 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp)
6933 p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp;
6934 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
6935 p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame;
6936
6937 if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported)
6938 *p->global_dram_clock_change_supported = false;
6939
6940 s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1));
6941 s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k]));
6942 s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
6943 s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k];
6944
6945 #ifdef __DML_VBA_DEBUG__
6946 dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
6947 dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
6948 dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
6949 dml2_printf("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
6950 dml2_printf("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
6951 dml2_printf("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
6952 dml2_printf("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
6953 dml2_printf("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
6954 dml2_printf("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]);
6955 dml2_printf("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
6956 #endif
6957 p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
6958
6959 if (p->BytePerPixelDETC[k] > 0) {
6960 s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k]));
6961 s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
6962 s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k];
6963
6964 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format))
6965 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c));
6966 else
6967 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c));
6968
6969 #ifdef __DML_VBA_DEBUG__
6970 dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]);
6971 dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
6972 dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
6973 dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
6974 #endif
6975 }
6976 }
6977
6978 *p->g6_temp_read_support = true;
6979 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6980 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) &&
6981 (s->g6_temp_read_latency_margin[k] < 0)) {
6982 *p->g6_temp_read_support = false;
6983 }
6984 }
6985
6986 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6987 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface)
6988 || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
6989 FoundCriticalSurface = true;
6990 *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
6991 }
6992 }
6993
6994 #ifdef __DML_VBA_DEBUG__
6995 dml2_printf("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported);
6996 dml2_printf("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported);
6997 dml2_printf("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
6998 dml2_printf("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
6999 #endif
7000 }
7001
calculate_bytes_to_fetch_required_to_hide_latency(struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params * p)7002 static void calculate_bytes_to_fetch_required_to_hide_latency(
7003 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p)
7004 {
7005 unsigned int dst_lines_to_hide;
7006 unsigned int src_lines_to_hide_l;
7007 unsigned int src_lines_to_hide_c;
7008 unsigned int plane_index;
7009 unsigned int stream_index;
7010
7011 for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) {
7012 if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index]))
7013 continue;
7014
7015 stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index;
7016
7017 dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us /
7018 ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total /
7019 (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0));
7020
7021 src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide,
7022 p->swath_height_l[plane_index]);
7023 p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index];
7024
7025 src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide,
7026 p->swath_height_c[plane_index]);
7027 p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index];
7028
7029 if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) {
7030 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index];
7031 if (p->meta_row_height_c[plane_index]) {
7032 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index];
7033 }
7034 }
7035
7036 if (p->display_cfg->gpuvm_enable == true) {
7037 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index];
7038 if (p->dpte_row_height_c[plane_index]) {
7039 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index];
7040 }
7041 }
7042 }
7043 }
7044
calculate_vactive_det_fill_latency(const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,unsigned int bytes_required_l[],unsigned int bytes_required_c[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double surface_read_bw_l[],double surface_read_bw_c[],double (* surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],double (* surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],double vactive_det_fill_delay_us[])7045 static noinline_for_stack void calculate_vactive_det_fill_latency(
7046 const struct dml2_display_cfg *display_cfg,
7047 unsigned int num_active_planes,
7048 unsigned int bytes_required_l[],
7049 unsigned int bytes_required_c[],
7050 double dcc_dram_bw_nom_overhead_factor_p0[],
7051 double dcc_dram_bw_nom_overhead_factor_p1[],
7052 double surface_read_bw_l[],
7053 double surface_read_bw_c[],
7054 double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
7055 double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
7056 /* output */
7057 double vactive_det_fill_delay_us[])
7058 {
7059 double effective_excess_bandwidth;
7060 double effective_excess_bandwidth_l;
7061 double effective_excess_bandwidth_c;
7062 double adj_factor;
7063 unsigned int plane_index;
7064 unsigned int soc_state;
7065 unsigned int bw_type;
7066
7067 for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
7068 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
7069 continue;
7070
7071 vactive_det_fill_delay_us[plane_index] = 0.0;
7072 for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) {
7073 for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) {
7074 effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]);
7075
7076 /* luma */
7077 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0;
7078
7079 effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
7080 if (effective_excess_bandwidth_l > 0.0) {
7081 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l);
7082 }
7083
7084 /* chroma */
7085 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0;
7086
7087 effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
7088 if (effective_excess_bandwidth_c > 0.0) {
7089 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c);
7090 }
7091 }
7092 }
7093 }
7094 }
7095
calculate_excess_vactive_bandwidth_required(const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,unsigned int bytes_required_l[],unsigned int bytes_required_c[],double excess_vactive_fill_bw_l[],double excess_vactive_fill_bw_c[])7096 static void calculate_excess_vactive_bandwidth_required(
7097 const struct dml2_display_cfg *display_cfg,
7098 unsigned int num_active_planes,
7099 unsigned int bytes_required_l[],
7100 unsigned int bytes_required_c[],
7101 /* outputs */
7102 double excess_vactive_fill_bw_l[],
7103 double excess_vactive_fill_bw_c[])
7104 {
7105 unsigned int plane_index;
7106
7107 for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
7108 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
7109 continue;
7110
7111 excess_vactive_fill_bw_l[plane_index] = 0.0;
7112 excess_vactive_fill_bw_c[plane_index] = 0.0;
7113
7114 if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us > 0) {
7115 excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us;
7116 excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us;
7117 }
7118 }
7119 }
7120
uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz,const struct dml2_dram_params * dram_config)7121 static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config)
7122 {
7123 double bw_mbps = 0;
7124 bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
7125
7126 return bw_mbps;
7127 }
7128
dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps,const struct dml2_dram_params * dram_config)7129 static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config)
7130 {
7131 double uclk_mhz = 0;
7132
7133 uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
7134
7135 return uclk_mhz;
7136 }
7137
get_qos_param_index(unsigned long uclk_freq_khz,const struct dml2_dcn4_uclk_dpm_dependent_qos_params * per_uclk_dpm_params)7138 static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params)
7139 {
7140 unsigned int i;
7141 unsigned int index = 0;
7142
7143 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
7144 dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
7145
7146 if (i == 0)
7147 index = 0;
7148 else
7149 index = i - 1;
7150
7151 if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz ||
7152 per_uclk_dpm_params[i].minimum_uclk_khz == 0) {
7153 break;
7154 }
7155 }
7156 #if defined(__DML_VBA_DEBUG__)
7157 dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz);
7158 dml2_printf("DML::%s: index = %d\n", __func__, index);
7159 #endif
7160 return index;
7161 }
7162
get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz,const struct dml2_soc_state_table * clk_table)7163 static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
7164 {
7165 unsigned int i;
7166 bool clk_entry_found = 0;
7167
7168 for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
7169 dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
7170
7171 if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
7172 clk_entry_found = 1;
7173 break;
7174 }
7175 }
7176
7177 if (!clk_entry_found)
7178 DML2_ASSERT(clk_entry_found);
7179 #if defined(__DML_VBA_DEBUG__)
7180 dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
7181 dml2_printf("DML::%s: index = %d\n", __func__, i);
7182 #endif
7183 return i;
7184 }
7185
get_pipe_flip_bytes(double hostvm_inefficiency_factor,unsigned int vm_bytes,unsigned int dpte_row_bytes,unsigned int meta_row_bytes)7186 static unsigned int get_pipe_flip_bytes(
7187 double hostvm_inefficiency_factor,
7188 unsigned int vm_bytes,
7189 unsigned int dpte_row_bytes,
7190 unsigned int meta_row_bytes)
7191 {
7192 unsigned int flip_bytes = 0;
7193
7194 flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes);
7195 flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor);
7196
7197 return flip_bytes;
7198 }
7199
calculate_hostvm_inefficiency_factor(double * HostVMInefficiencyFactor,double * HostVMInefficiencyFactorPrefetch,bool gpuvm_enable,bool hostvm_enable,unsigned int remote_iommu_outstanding_translations,unsigned int max_outstanding_reqs,double urg_bandwidth_avail_active_pixel_and_vm,double urg_bandwidth_avail_active_vm_only)7200 static void calculate_hostvm_inefficiency_factor(
7201 double *HostVMInefficiencyFactor,
7202 double *HostVMInefficiencyFactorPrefetch,
7203
7204 bool gpuvm_enable,
7205 bool hostvm_enable,
7206 unsigned int remote_iommu_outstanding_translations,
7207 unsigned int max_outstanding_reqs,
7208 double urg_bandwidth_avail_active_pixel_and_vm,
7209 double urg_bandwidth_avail_active_vm_only)
7210 {
7211 *HostVMInefficiencyFactor = 1;
7212 *HostVMInefficiencyFactorPrefetch = 1;
7213
7214 if (gpuvm_enable && hostvm_enable) {
7215 *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only;
7216 *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor;
7217
7218 if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs))
7219 *HostVMInefficiencyFactorPrefetch = 4;
7220 #ifdef __DML_VBA_DEBUG__
7221 dml2_printf("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm);
7222 dml2_printf("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only);
7223 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor);
7224 dml2_printf("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch);
7225 #endif
7226 }
7227 }
7228
7229 struct dml2_core_internal_g6_temp_read_blackouts_table {
7230 struct {
7231 unsigned int uclk_khz;
7232 unsigned int blackout_us;
7233 } entries[DML_MAX_CLK_TABLE_SIZE];
7234 };
7235
7236 struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = {
7237 .entries = {
7238 {
7239 .uclk_khz = 96000,
7240 .blackout_us = 23,
7241 },
7242 {
7243 .uclk_khz = 435000,
7244 .blackout_us = 10,
7245 },
7246 {
7247 .uclk_khz = 521000,
7248 .blackout_us = 10,
7249 },
7250 {
7251 .uclk_khz = 731000,
7252 .blackout_us = 8,
7253 },
7254 {
7255 .uclk_khz = 822000,
7256 .blackout_us = 8,
7257 },
7258 {
7259 .uclk_khz = 962000,
7260 .blackout_us = 5,
7261 },
7262 {
7263 .uclk_khz = 1069000,
7264 .blackout_us = 5,
7265 },
7266 {
7267 .uclk_khz = 1187000,
7268 .blackout_us = 5,
7269 },
7270 },
7271 };
7272
get_g6_temp_read_blackout_us(struct dml2_soc_bb * soc,unsigned int uclk_freq_khz,unsigned int min_clk_index)7273 static double get_g6_temp_read_blackout_us(
7274 struct dml2_soc_bb *soc,
7275 unsigned int uclk_freq_khz,
7276 unsigned int min_clk_index)
7277 {
7278 unsigned int i;
7279 unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
7280
7281 if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) {
7282 /* overrides are present in the SoC BB */
7283 return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index];
7284 }
7285
7286 /* use internal table */
7287 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
7288
7289 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
7290 if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz ||
7291 core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) {
7292 break;
7293 }
7294
7295 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us;
7296 }
7297
7298 return (double)blackout_us;
7299 }
7300
get_max_urgent_latency_us(struct dml2_dcn4x_soc_qos_params * dcn4x,double uclk_freq_mhz,double FabricClock,unsigned int min_clk_index)7301 static double get_max_urgent_latency_us(
7302 struct dml2_dcn4x_soc_qos_params *dcn4x,
7303 double uclk_freq_mhz,
7304 double FabricClock,
7305 unsigned int min_clk_index)
7306 {
7307 double latency;
7308 latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz
7309 * (1 + dcn4x->umc_max_latency_margin / 100.0)
7310 + dcn4x->mall_overhead_fclk_cycles / FabricClock
7311 + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock
7312 * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0);
7313 return latency;
7314 }
7315
calculate_pstate_keepout_dst_lines(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_watermarks * watermarks,unsigned int pstate_keepout_dst_lines[])7316 static void calculate_pstate_keepout_dst_lines(
7317 const struct dml2_display_cfg *display_cfg,
7318 const struct dml2_core_internal_watermarks *watermarks,
7319 unsigned int pstate_keepout_dst_lines[])
7320 {
7321 const struct dml2_stream_parameters *stream_descriptor;
7322 unsigned int i;
7323
7324 for (i = 0; i < display_cfg->num_planes; i++) {
7325 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) {
7326 stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index];
7327
7328 pstate_keepout_dst_lines[i] =
7329 (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz));
7330
7331 if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) {
7332 pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1;
7333 }
7334 }
7335 }
7336 }
7337
dml_core_mode_support(struct dml2_core_calcs_mode_support_ex * in_out_params)7338 static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params)
7339 {
7340 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
7341 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
7342 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
7343
7344 #if defined(__DML_VBA_DEBUG__)
7345 double old_ReadBandwidthLuma;
7346 double old_ReadBandwidthChroma;
7347 #endif
7348 double outstanding_latency_us = 0;
7349 double min_return_bw_for_latency;
7350
7351 struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
7352 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
7353 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
7354 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
7355 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
7356 #ifdef DML_GLOBAL_PREFETCH_CHECK
7357 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
7358 #endif
7359 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
7360 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
7361 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
7362 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
7363 unsigned int k, m, n;
7364
7365 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
7366 memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support));
7367
7368 mode_lib->ms.num_active_planes = display_cfg->num_planes;
7369 get_stream_output_bpp(s->OutputBpp, display_cfg);
7370
7371 mode_lib->ms.state_idx = in_out_params->min_clk_index;
7372 mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000);
7373 mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000);
7374 mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000);
7375 mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000;
7376 mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000;
7377 mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dispclk / 1000;
7378 mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000;
7379 mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000;
7380 mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config);
7381 mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000);
7382 mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000);
7383 mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
7384 mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table);
7385
7386 #if defined(__DML_VBA_DEBUG__)
7387 dml2_printf("DML::%s: --- START --- \n", __func__);
7388 dml2_printf("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
7389 dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
7390 dml2_printf("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index);
7391 dml2_printf("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
7392 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps);
7393 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
7394 dml2_printf("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
7395 dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
7396 dml2_printf("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK);
7397 dml2_printf("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz);
7398 dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
7399 dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz);
7400 dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock);
7401 dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes);
7402 dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present);
7403
7404 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
7405 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
7406 #endif
7407
7408 CalculateMaxDETAndMinCompressedBufferSize(
7409 mode_lib->ip.config_return_buffer_size_in_kbytes,
7410 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
7411 mode_lib->ip.rob_buffer_size_kbytes,
7412 mode_lib->ip.max_num_dpp,
7413 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
7414 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
7415 mode_lib->ip.dcn_mrq_present,
7416
7417 /* Output */
7418 &mode_lib->ms.MaxTotalDETInKByte,
7419 &mode_lib->ms.NomDETInKByte,
7420 &mode_lib->ms.MinCompressedBufferSizeInKByte);
7421
7422 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
7423
7424 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
7425
7426 /*Scale Ratio, taps Support Check*/
7427 mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
7428 // Many core tests are still setting scaling parameters "incorrectly"
7429 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7430 if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false
7431 && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
7432 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0
7433 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0
7434 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0
7435 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) {
7436 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
7437 } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0
7438 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0
7439 || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1)
7440 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio
7441 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio
7442 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps
7443 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps
7444 || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
7445 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 ||
7446 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 ||
7447 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) ||
7448 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio ||
7449 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio ||
7450 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps ||
7451 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) {
7452 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
7453 }
7454 }
7455
7456 /*Source Format, Pixel Format and Scan Support Check*/
7457 mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
7458 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7459 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
7460 mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
7461 }
7462 }
7463
7464 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7465 CalculateBytePerPixelAndBlockSizes(
7466 display_cfg->plane_descriptors[k].pixel_format,
7467 display_cfg->plane_descriptors[k].surface.tiling,
7468 display_cfg->plane_descriptors[k].surface.plane0.pitch,
7469 display_cfg->plane_descriptors[k].surface.plane1.pitch,
7470
7471 /* Output */
7472 &mode_lib->ms.BytePerPixelY[k],
7473 &mode_lib->ms.BytePerPixelC[k],
7474 &mode_lib->ms.BytePerPixelInDETY[k],
7475 &mode_lib->ms.BytePerPixelInDETC[k],
7476 &mode_lib->ms.Read256BlockHeightY[k],
7477 &mode_lib->ms.Read256BlockHeightC[k],
7478 &mode_lib->ms.Read256BlockWidthY[k],
7479 &mode_lib->ms.Read256BlockWidthC[k],
7480 &mode_lib->ms.MacroTileHeightY[k],
7481 &mode_lib->ms.MacroTileHeightC[k],
7482 &mode_lib->ms.MacroTileWidthY[k],
7483 &mode_lib->ms.MacroTileWidthC[k],
7484 &mode_lib->ms.surf_linear128_l[k],
7485 &mode_lib->ms.surf_linear128_c[k]);
7486 }
7487
7488 /*Bandwidth Support Check*/
7489 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7490 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
7491 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
7492 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
7493 } else {
7494 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
7495 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
7496 }
7497 }
7498
7499 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7500 mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
7501 mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
7502
7503 mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width *
7504 display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
7505
7506 #ifdef __DML_VBA_DEBUG__
7507 old_ReadBandwidthLuma = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
7508 old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0;
7509 dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma);
7510 dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma);
7511 dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]);
7512 dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]);
7513 #endif
7514 }
7515
7516 // Writeback bandwidth
7517 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7518 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) {
7519 mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
7520 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
7521 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
7522 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
7523 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0;
7524 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
7525 mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height
7526 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width
7527 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height
7528 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
7529 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0;
7530 } else {
7531 mode_lib->ms.WriteBandwidth[k][0] = 0.0;
7532 }
7533 }
7534
7535 /*Writeback Latency support check*/
7536 mode_lib->ms.support.WritebackLatencySupport = true;
7537 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7538 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 &&
7539 (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) {
7540 mode_lib->ms.support.WritebackLatencySupport = false;
7541 }
7542 }
7543
7544
7545 /* Writeback Scale Ratio and Taps Support Check */
7546 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
7547 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7548 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
7549 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio
7550 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio
7551 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio
7552 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio
7553 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps
7554 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps
7555 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps
7556 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps
7557 || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) {
7558 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
7559 }
7560 if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) {
7561 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
7562 }
7563 }
7564 }
7565
7566 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7567 CalculateSinglePipeDPPCLKAndSCLThroughput(
7568 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
7569 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
7570 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
7571 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
7572 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
7573 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
7574 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7575 display_cfg->plane_descriptors[k].pixel_format,
7576 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
7577 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
7578 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
7579 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
7580 /* Output */
7581 &mode_lib->ms.PSCL_FACTOR[k],
7582 &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
7583 &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
7584 }
7585
7586 // Max Viewport Size support
7587 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7588 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
7589 s->MaximumSwathWidthSupportLuma = 15360;
7590 } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video
7591 s->MaximumSwathWidthSupportLuma = 7680 + 16;
7592 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video
7593 s->MaximumSwathWidthSupportLuma = 4320 + 16;
7594 } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha
7595 s->MaximumSwathWidthSupportLuma = 5120 + 16;
7596 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp
7597 s->MaximumSwathWidthSupportLuma = 3072 + 16;
7598 } else {
7599 s->MaximumSwathWidthSupportLuma = 6144 + 16;
7600 }
7601
7602 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
7603 s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0);
7604 } else {
7605 s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
7606 }
7607
7608 unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits;
7609 unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits;
7610
7611 /*
7612 #if defined(DV_BUILD)
7613 // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming.
7614 if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) {
7615 lb_buffer_size_bits_luma = 34620 * 57;
7616 lb_buffer_size_bits_chroma = 13560 * 57;
7617 }
7618 #endif
7619 */
7620 mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /
7621 (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0));
7622 if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
7623 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
7624 } else {
7625 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /
7626 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0));
7627 }
7628
7629 mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
7630 mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
7631
7632 dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]);
7633 dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma);
7634 dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
7635
7636 dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]);
7637 dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma);
7638 dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
7639 }
7640
7641 /* Cursor Support Check */
7642 mode_lib->ms.support.CursorSupport = true;
7643 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7644 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
7645 if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false)
7646 mode_lib->ms.support.CursorSupport = false;
7647 }
7648 }
7649
7650 /* Valid Pitch Check */
7651 mode_lib->ms.support.PitchSupport = true;
7652 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7653
7654 // data pitch
7655 unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k];
7656
7657 if (mode_lib->ms.surf_linear128_l[k])
7658 alignment_l = alignment_l / 2;
7659
7660 mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l);
7661 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
7662 unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k];
7663
7664 if (mode_lib->ms.surf_linear128_c[k])
7665 alignment_c = alignment_c / 2;
7666 mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c);
7667 } else {
7668 mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch;
7669 }
7670
7671 if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch ||
7672 mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) {
7673 mode_lib->ms.support.PitchSupport = false;
7674 #if defined(__DML_VBA_DEBUG__)
7675 dml2_printf("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]);
7676 dml2_printf("DML::%s: k=%u PitchY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch);
7677 dml2_printf("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]);
7678 dml2_printf("DML::%s: k=%u PitchC = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch);
7679 dml2_printf("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport);
7680 #endif
7681 }
7682
7683 // meta pitch
7684 if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) {
7685 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch,
7686 display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
7687
7688 if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch)
7689 mode_lib->ms.support.PitchSupport = false;
7690
7691 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
7692 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch,
7693 display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
7694
7695 if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch)
7696 mode_lib->ms.support.PitchSupport = false;
7697 }
7698 } else {
7699 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0;
7700 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0;
7701 }
7702 }
7703
7704 mode_lib->ms.support.ViewportExceedsSurface = false;
7705 if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) {
7706 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7707 if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width ||
7708 display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) {
7709 mode_lib->ms.support.ViewportExceedsSurface = true;
7710 #if defined(__DML_VBA_DEBUG__)
7711 dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
7712 dml2_printf("DML::%s: k=%u SurfaceWidthY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width);
7713 dml2_printf("DML::%s: k=%u ViewportHeight = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
7714 dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height);
7715 dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface);
7716 #endif
7717 }
7718 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
7719 if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width ||
7720 display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) {
7721 mode_lib->ms.support.ViewportExceedsSurface = true;
7722 }
7723 }
7724 }
7725 }
7726
7727 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
7728 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
7729 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
7730 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
7731 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
7732 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
7733 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
7734 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
7735 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
7736 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7737 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
7738 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
7739 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
7740 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l;
7741 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c;
7742 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
7743 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
7744 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
7745 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
7746 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
7747 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
7748 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l;
7749 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c;
7750 CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
7751 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
7752 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
7753 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
7754 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
7755 CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2];
7756 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
7757
7758 // output
7759 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
7760 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
7761 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3];
7762 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4];
7763 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5];
7764 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6];
7765 CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7];
7766 CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8];
7767 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26];
7768 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27];
7769 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9];
7770 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10];
7771 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11];
7772 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
7773 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
7774 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
7775 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1];
7776 CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2];
7777 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
7778 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
7779 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
7780
7781 // This calls is just to find out if there is enough DET space to support full vp in 1 pipe.
7782 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
7783
7784 mode_lib->ms.TotalNumberOfActiveDPP = 0;
7785 mode_lib->ms.support.TotalAvailablePipesSupport = true;
7786
7787 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7788 /*Number Of DSC Slices*/
7789 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable ||
7790 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) {
7791
7792 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0)
7793 mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices;
7794 else {
7795 if (s->PixelClockBackEnd[k] > 4800) {
7796 mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4));
7797 } else if (s->PixelClockBackEnd[k] > 2400) {
7798 mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
7799 } else if (s->PixelClockBackEnd[k] > 1200) {
7800 mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
7801 } else if (s->PixelClockBackEnd[k] > 340) {
7802 mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
7803 } else {
7804 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
7805 }
7806 }
7807 } else {
7808 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
7809 }
7810
7811 CalculateODMMode(
7812 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
7813 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7814 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7815 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
7816 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
7817 mode_lib->ms.max_dispclk_freq_mhz,
7818 false, // DSCEnable
7819 mode_lib->ms.TotalNumberOfActiveDPP,
7820 mode_lib->ip.max_num_dpp,
7821 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7822 mode_lib->ms.support.NumberOfDSCSlices[k],
7823
7824 /* Output */
7825 &s->TotalAvailablePipesSupportNoDSC,
7826 &s->NumberOfDPPNoDSC,
7827 &s->ODMModeNoDSC,
7828 &s->RequiredDISPCLKPerSurfaceNoDSC);
7829
7830 CalculateODMMode(
7831 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
7832 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7833 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7834 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
7835 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
7836 mode_lib->ms.max_dispclk_freq_mhz,
7837 true, // DSCEnable
7838 mode_lib->ms.TotalNumberOfActiveDPP,
7839 mode_lib->ip.max_num_dpp,
7840 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7841 mode_lib->ms.support.NumberOfDSCSlices[k],
7842
7843 /* Output */
7844 &s->TotalAvailablePipesSupportDSC,
7845 &s->NumberOfDPPDSC,
7846 &s->ODMModeDSC,
7847 &s->RequiredDISPCLKPerSurfaceDSC);
7848
7849 CalculateOutputLink(
7850 &mode_lib->scratch,
7851 ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000),
7852 ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000),
7853 ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000),
7854 mode_lib->soc.phy_downspread_percent,
7855 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
7856 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7857 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
7858 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7859 s->PixelClockBackEnd[k],
7860 s->OutputBpp[k],
7861 mode_lib->ip.maximum_dsc_bits_per_component,
7862 mode_lib->ms.support.NumberOfDSCSlices[k],
7863 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
7864 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout,
7865 s->ODMModeNoDSC,
7866 s->ODMModeDSC,
7867 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable,
7868 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count,
7869 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate,
7870
7871 /* Output */
7872 &mode_lib->ms.RequiresDSC[k],
7873 &mode_lib->ms.RequiresFEC[k],
7874 &mode_lib->ms.OutputBpp[k],
7875 &mode_lib->ms.OutputType[k],
7876 &mode_lib->ms.OutputRate[k],
7877 &mode_lib->ms.RequiredSlots[k]);
7878
7879 if (s->OutputBpp[k] == 0.0) {
7880 s->OutputBpp[k] = mode_lib->ms.OutputBpp[k];
7881 }
7882
7883 if (mode_lib->ms.RequiresDSC[k] == false) {
7884 mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC;
7885 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC;
7886 if (!s->TotalAvailablePipesSupportNoDSC)
7887 mode_lib->ms.support.TotalAvailablePipesSupport = false;
7888 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC;
7889 } else {
7890 mode_lib->ms.ODMMode[k] = s->ODMModeDSC;
7891 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC;
7892 if (!s->TotalAvailablePipesSupportDSC)
7893 mode_lib->ms.support.TotalAvailablePipesSupport = false;
7894 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC;
7895 }
7896 #if defined(__DML_VBA_DEBUG__)
7897 dml2_printf("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
7898 dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
7899 #endif
7900
7901 // ensure the number dsc slices is integer multiple based on ODM mode
7902 mode_lib->ms.support.DSCSlicesODMModeSupported = true;
7903 if (mode_lib->ms.RequiresDSC[k]) {
7904 // fail a ms check if the override num_slices doesn't align with odm mode setting
7905 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) {
7906 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
7907 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0);
7908 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
7909 mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12);
7910 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
7911 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0);
7912 #if defined(__DML_VBA_DEBUG__)
7913 if (!mode_lib->ms.support.DSCSlicesODMModeSupported) {
7914 dml2_printf("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k);
7915 dml2_printf("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices);
7916 dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
7917 }
7918 #endif
7919 } else {
7920 // safe guard to ensure the dml derived dsc slices and odm setting are compatible
7921 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
7922 mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0);
7923 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
7924 mode_lib->ms.support.NumberOfDSCSlices[k] = 12;
7925 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
7926 mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0);
7927 }
7928
7929 } else {
7930 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
7931 }
7932 }
7933
7934 mode_lib->ms.support.incorrect_imall_usage = 0;
7935 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7936 if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
7937 mode_lib->ms.support.incorrect_imall_usage = 1;
7938 }
7939
7940 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7941 mode_lib->ms.MPCCombine[k] = false;
7942 mode_lib->ms.NoOfDPP[k] = 1;
7943
7944 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) {
7945 mode_lib->ms.MPCCombine[k] = false;
7946 mode_lib->ms.NoOfDPP[k] = 4;
7947 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) {
7948 mode_lib->ms.MPCCombine[k] = false;
7949 mode_lib->ms.NoOfDPP[k] = 3;
7950 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) {
7951 mode_lib->ms.MPCCombine[k] = false;
7952 mode_lib->ms.NoOfDPP[k] = 2;
7953 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) {
7954 mode_lib->ms.MPCCombine[k] = true;
7955 mode_lib->ms.NoOfDPP[k] = 2;
7956 mode_lib->ms.TotalNumberOfActiveDPP++;
7957 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) {
7958 mode_lib->ms.MPCCombine[k] = false;
7959 mode_lib->ms.NoOfDPP[k] = 1;
7960 if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
7961 dml2_printf("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__);
7962 }
7963 } else {
7964 if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
7965 mode_lib->ms.MPCCombine[k] = true;
7966 mode_lib->ms.NoOfDPP[k] = 2;
7967 mode_lib->ms.TotalNumberOfActiveDPP++;
7968 }
7969 }
7970 #if defined(__DML_VBA_DEBUG__)
7971 dml2_printf("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]);
7972 #endif
7973 }
7974
7975 if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp)
7976 mode_lib->ms.support.TotalAvailablePipesSupport = false;
7977
7978
7979 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0;
7980 for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) {
7981 if (mode_lib->ms.NoOfDPP[k] == 1)
7982 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1;
7983 }
7984
7985 //DISPCLK/DPPCLK
7986 mode_lib->ms.WritebackRequiredDISPCLK = 0;
7987 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7988 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
7989 mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK,
7990 CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
7991 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7992 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
7993 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
7994 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps,
7995 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
7996 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width,
7997 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
7998 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
7999 mode_lib->ip.writeback_line_buffer_buffer_size));
8000 }
8001 }
8002
8003 mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK;
8004 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8005 mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]);
8006 }
8007
8008 mode_lib->ms.GlobalDPPCLK = 0;
8009 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8010 mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k];
8011 mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]);
8012 }
8013
8014 mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz));
8015
8016 /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */
8017 s->TotalNumberOfActiveOTG = 0;
8018 s->TotalNumberOfActiveHDMIFRL = 0;
8019 s->TotalNumberOfActiveDP2p0 = 0;
8020 s->TotalNumberOfActiveDP2p0Outputs = 0;
8021 s->TotalNumberOfActiveWriteback = 0;
8022 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
8023
8024 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8025 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
8026 if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
8027 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
8028
8029 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0)
8030 s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
8031
8032 s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
8033 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)
8034 s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
8035 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) {
8036 s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
8037 // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup
8038 //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) {
8039 s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
8040 //}
8041 }
8042 }
8043 }
8044 }
8045
8046 /* Writeback Mode Support Check */
8047 mode_lib->ms.support.EnoughWritebackUnits = 1;
8048 if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) {
8049 mode_lib->ms.support.EnoughWritebackUnits = false;
8050 }
8051 mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg);
8052 mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs);
8053 mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs);
8054
8055
8056 mode_lib->ms.support.ExceededMultistreamSlots = false;
8057 mode_lib->ms.support.LinkCapacitySupport = true;
8058 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8059 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false &&
8060 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8061 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) {
8062 mode_lib->ms.support.LinkCapacitySupport = false;
8063 }
8064 }
8065
8066 mode_lib->ms.support.P2IWith420 = false;
8067 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
8068 mode_lib->ms.support.DSC422NativeNotSupported = false;
8069 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
8070 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
8071 mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
8072 mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
8073 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
8074 mode_lib->ms.support.NotEnoughLanesForMSO = false;
8075
8076 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8077 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8078 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8079 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true)
8080 mode_lib->ms.support.P2IWith420 = true;
8081
8082 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support)
8083 mode_lib->ms.support.DSC422NativeNotSupported = true;
8084
8085 if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 ||
8086 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) &&
8087 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) ||
8088 ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 ||
8089 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) &&
8090 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0))
8091 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
8092
8093 // FIXME_STAGE2
8094 //if (display_cfg->output.OutputMultistreamEn[k] == 1) {
8095 // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na)
8096 // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
8097 // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0)
8098 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
8099 // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
8100 // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0)
8101 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
8102 // }
8103 //}
8104
8105 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8106 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi ||
8107 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) {
8108 // FIXME_STAGE2
8109 //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k)
8110 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
8111 //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
8112 // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n)
8113 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
8114 //}
8115 }
8116 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 ||
8117 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4))
8118 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
8119
8120 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) ||
8121 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4))
8122 mode_lib->ms.support.NotEnoughLanesForMSO = true;
8123 }
8124 }
8125
8126 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
8127 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8128 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl &&
8129 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
8130 mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK(
8131 mode_lib->ms.RequiresDSC[k],
8132 s->PixelClockBackEnd[k],
8133 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8134 mode_lib->ms.OutputBpp[k],
8135 mode_lib->ms.support.NumberOfDSCSlices[k],
8136 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8137 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8138 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
8139 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout);
8140
8141 if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_clocks_khz.dtbclk / 1000)) {
8142 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
8143 }
8144 } else {
8145 /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus
8146 * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider
8147 * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK
8148 * required - by setting phantom dtbclk to 0 we ignore it.
8149 */
8150 mode_lib->ms.RequiredDTBCLK[k] = 0;
8151 }
8152 }
8153
8154 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
8155 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8156 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp ||
8157 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 ||
8158 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
8159 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8160 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) {
8161 s->DSCFormatFactor = 2;
8162 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) {
8163 s->DSCFormatFactor = 1;
8164 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
8165 s->DSCFormatFactor = 2;
8166 } else {
8167 s->DSCFormatFactor = 1;
8168 }
8169 #ifdef __DML_VBA_DEBUG__
8170 dml2_printf("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
8171 #endif
8172 if (mode_lib->ms.RequiresDSC[k] == true) {
8173 s->PixelClockBackEndFactor = 3.0;
8174
8175 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8176 s->PixelClockBackEndFactor = 12.0;
8177 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8178 s->PixelClockBackEndFactor = 9.0;
8179 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8180 s->PixelClockBackEndFactor = 6.0;
8181
8182 mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor;
8183 if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) {
8184 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
8185 }
8186
8187 #ifdef __DML_VBA_DEBUG__
8188 dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
8189 dml2_printf("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]);
8190 dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
8191 dml2_printf("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
8192 #endif
8193 }
8194 }
8195 }
8196
8197 /* Check DSC Unit and Slices Support */
8198 mode_lib->ms.support.NotEnoughDSCSlices = false;
8199 s->TotalDSCUnitsRequired = 0;
8200 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
8201 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
8202
8203 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8204 if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
8205 s->NumDSCUnitRequired = 1;
8206
8207 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
8208 s->NumDSCUnitRequired = 4;
8209 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
8210 s->NumDSCUnitRequired = 3;
8211 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
8212 s->NumDSCUnitRequired = 2;
8213
8214 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit)
8215 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
8216 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired;
8217
8218 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired)
8219 mode_lib->ms.support.NotEnoughDSCSlices = true;
8220 }
8221 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
8222 }
8223
8224 mode_lib->ms.support.NotEnoughDSCUnits = false;
8225 if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) {
8226 mode_lib->ms.support.NotEnoughDSCUnits = true;
8227 }
8228
8229 /*DSC Delay per state*/
8230 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8231 mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
8232 mode_lib->ms.ODMMode[k],
8233 mode_lib->ip.maximum_dsc_bits_per_component,
8234 s->OutputBpp[k],
8235 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
8236 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
8237 mode_lib->ms.support.NumberOfDSCSlices[k],
8238 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
8239 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
8240 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
8241 s->PixelClockBackEnd[k]);
8242 }
8243
8244 // Figure out the swath and DET configuration after the num dpp per plane is figured out
8245 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
8246 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode;
8247 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
8248
8249 // output
8250 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
8251 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
8252 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub;
8253 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub;
8254 CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY;
8255 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC;
8256 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY;
8257 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC;
8258 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma;
8259 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma;
8260 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that
8261 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
8262 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
8263 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled;
8264 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3];
8265 CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1];
8266 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte;
8267 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
8268 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport;
8269
8270 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
8271
8272 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
8273 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
8274 mode_lib->ms.SurfaceSizeInMALL[k] = 0;
8275 mode_lib->ms.support.ExceededMALLSize = 0;
8276 } else {
8277 CalculateSurfaceSizeInMall(
8278 display_cfg,
8279 mode_lib->ms.num_active_planes,
8280 mode_lib->soc.mall_allocated_for_dcn_mbytes,
8281
8282 mode_lib->ms.BytePerPixelY,
8283 mode_lib->ms.BytePerPixelC,
8284 mode_lib->ms.Read256BlockWidthY,
8285 mode_lib->ms.Read256BlockWidthC,
8286 mode_lib->ms.Read256BlockHeightY,
8287 mode_lib->ms.Read256BlockHeightC,
8288 mode_lib->ms.MacroTileWidthY,
8289 mode_lib->ms.MacroTileWidthC,
8290 mode_lib->ms.MacroTileHeightY,
8291 mode_lib->ms.MacroTileHeightC,
8292
8293 /* Output */
8294 mode_lib->ms.SurfaceSizeInMALL,
8295 &mode_lib->ms.support.ExceededMALLSize);
8296 }
8297
8298 mode_lib->ms.TotalNumberOfDCCActiveDPP = 0;
8299 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8300 if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
8301 mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k];
8302 }
8303 }
8304
8305 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8306 s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8307 s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k];
8308 s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
8309 s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
8310 s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
8311 s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
8312 s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
8313 s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
8314 s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
8315 s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
8316 s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
8317 s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
8318 s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
8319 s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
8320 s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
8321 s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8322 s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
8323 s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
8324 s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
8325 s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
8326 s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
8327 s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8328 s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8329 s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
8330 s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
8331 s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
8332 s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
8333 s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
8334 s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
8335 s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
8336 s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8337 s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8338 s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
8339 s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k];
8340 s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k];
8341
8342 s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
8343 s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
8344 }
8345
8346 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
8347 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8348 CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
8349 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
8350 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
8351 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
8352 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
8353 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY;
8354 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC;
8355 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
8356 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
8357 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8358
8359 // output
8360 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded;
8361 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12];
8362 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13];
8363 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
8364 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
8365 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA
8366 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA
8367 CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16];
8368 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
8369 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17];
8370 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18];
8371 CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19];
8372 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20];
8373 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21];
8374 CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22];
8375 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
8376 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
8377 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
8378 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
8379 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23];
8380 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24];
8381 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY;
8382 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC;
8383 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
8384 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
8385 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY;
8386 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC;
8387 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
8388 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
8389 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
8390 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
8391 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes;
8392 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame;
8393 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip;
8394 CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0];
8395 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
8396 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25];
8397 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded;
8398 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw;
8399 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes;
8400 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
8401 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
8402 CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26];
8403 CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27];
8404 CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28];
8405 CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma;
8406 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29];
8407 CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30];
8408 CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31];
8409 CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32];
8410 CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma;
8411 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33];
8412
8413 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
8414
8415 mode_lib->ms.support.PTEBufferSizeNotExceeded = true;
8416 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true;
8417
8418 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8419 if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false)
8420 mode_lib->ms.support.PTEBufferSizeNotExceeded = false;
8421
8422 if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false)
8423 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false;
8424
8425 #ifdef __DML_VBA_DEBUG__
8426 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]);
8427 dml2_printf("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]);
8428 #endif
8429 }
8430 #ifdef __DML_VBA_DEBUG__
8431 dml2_printf("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded);
8432 dml2_printf("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded);
8433 #endif
8434
8435 /* VActive bytes to fetch for UCLK P-State */
8436 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
8437 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8438
8439 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes;
8440 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP;
8441 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma;
8442 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma;
8443 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
8444 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
8445 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height;
8446 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma;
8447 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
8448 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
8449 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY;
8450 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC;
8451 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY;
8452 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC;
8453 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY;
8454 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC;
8455 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
8456
8457 /* outputs */
8458 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l;
8459 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c;
8460
8461 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
8462
8463 /* Excess VActive bandwidth required to fill DET */
8464 calculate_excess_vactive_bandwidth_required(
8465 display_cfg,
8466 mode_lib->ms.num_active_planes,
8467 s->pstate_bytes_required_l,
8468 s->pstate_bytes_required_c,
8469 /* outputs */
8470 mode_lib->ms.excess_vactive_fill_bw_l,
8471 mode_lib->ms.excess_vactive_fill_bw_c);
8472
8473 mode_lib->ms.UrgLatency = CalculateUrgentLatency(
8474 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
8475 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
8476 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
8477 mode_lib->soc.do_urgent_latency_adjustment,
8478 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
8479 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
8480 mode_lib->ms.FabricClock,
8481 mode_lib->ms.uclk_freq_mhz,
8482 mode_lib->soc.qos_parameters.qos_type,
8483 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles,
8484 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
8485 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
8486 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
8487 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
8488 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
8489
8490 mode_lib->ms.TripToMemory = CalculateTripToMemory(
8491 mode_lib->ms.UrgLatency,
8492 mode_lib->ms.FabricClock,
8493 mode_lib->ms.uclk_freq_mhz,
8494 mode_lib->soc.qos_parameters.qos_type,
8495 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles,
8496 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
8497 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
8498 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
8499 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
8500
8501 mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory);
8502
8503 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8504 double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8505 bool cursor_not_enough_urgent_latency_hiding = 0;
8506
8507 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
8508 calculate_cursor_req_attributes(
8509 display_cfg->plane_descriptors[k].cursor.cursor_width,
8510 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
8511
8512 // output
8513 &s->cursor_lines_per_chunk[k],
8514 &s->cursor_bytes_per_line[k],
8515 &s->cursor_bytes_per_chunk[k],
8516 &s->cursor_bytes[k]);
8517
8518 calculate_cursor_urgent_burst_factor(
8519 mode_lib->ip.cursor_buffer_size,
8520 display_cfg->plane_descriptors[k].cursor.cursor_width,
8521 s->cursor_bytes_per_chunk[k],
8522 s->cursor_lines_per_chunk[k],
8523 line_time_us,
8524 mode_lib->ms.UrgLatency,
8525
8526 // output
8527 &mode_lib->ms.UrgentBurstFactorCursor[k],
8528 &cursor_not_enough_urgent_latency_hiding);
8529 }
8530
8531 mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k];
8532
8533 #ifdef __DML_VBA_DEBUG__
8534 dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k);
8535 dml2_printf("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
8536 dml2_printf("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
8537 #endif
8538
8539 CalculateUrgentBurstFactor(
8540 &display_cfg->plane_descriptors[k],
8541 mode_lib->ms.swath_width_luma_ub[k],
8542 mode_lib->ms.swath_width_chroma_ub[k],
8543 mode_lib->ms.SwathHeightY[k],
8544 mode_lib->ms.SwathHeightC[k],
8545 line_time_us,
8546 mode_lib->ms.UrgLatency,
8547 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
8548 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
8549 mode_lib->ms.BytePerPixelInDETY[k],
8550 mode_lib->ms.BytePerPixelInDETC[k],
8551 mode_lib->ms.DETBufferSizeY[k],
8552 mode_lib->ms.DETBufferSizeC[k],
8553
8554 // Output
8555 &mode_lib->ms.UrgentBurstFactorLuma[k],
8556 &mode_lib->ms.UrgentBurstFactorChroma[k],
8557 &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]);
8558
8559 mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
8560 }
8561
8562 CalculateDCFCLKDeepSleep(
8563 display_cfg,
8564 mode_lib->ms.num_active_planes,
8565 mode_lib->ms.BytePerPixelY,
8566 mode_lib->ms.BytePerPixelC,
8567 mode_lib->ms.SwathWidthY,
8568 mode_lib->ms.SwathWidthC,
8569 mode_lib->ms.NoOfDPP,
8570 mode_lib->ms.PSCL_FACTOR,
8571 mode_lib->ms.PSCL_FACTOR_CHROMA,
8572 mode_lib->ms.RequiredDPPCLK,
8573 mode_lib->ms.vactive_sw_bw_l,
8574 mode_lib->ms.vactive_sw_bw_c,
8575 mode_lib->soc.return_bus_width_bytes,
8576
8577 /* Output */
8578 &mode_lib->ms.dcfclk_deepsleep);
8579
8580 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8581 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
8582 mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay(
8583 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
8584 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
8585 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
8586 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
8587 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
8588 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
8589 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
8590 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK;
8591 } else {
8592 mode_lib->ms.WritebackDelayTime[k] = 0.0;
8593 }
8594 }
8595
8596 // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide
8597 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8598 bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
8599 s->MaximumVStartup[k] = CalculateMaxVStartup(
8600 mode_lib->ip.ptoi_supported,
8601 mode_lib->ip.vblank_nom_default_us,
8602 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
8603 mode_lib->ms.WritebackDelayTime[k]);
8604 mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]);
8605 }
8606
8607 #ifdef __DML_VBA_DEBUG__
8608 dml2_printf("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]);
8609 #endif
8610
8611 /* Immediate Flip and MALL parameters */
8612 s->ImmediateFlipRequired = false;
8613 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8614 s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip;
8615 }
8616
8617 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
8618 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8619 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
8620 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
8621 ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) &&
8622 (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])));
8623 }
8624
8625 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
8626 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8627 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
8628 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) ||
8629 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame));
8630 }
8631
8632 s->FullFrameMALLPStateMethod = false;
8633 s->SubViewportMALLPStateMethod = false;
8634 s->PhantomPipeMALLPStateMethod = false;
8635 s->SubViewportMALLRefreshGreaterThan120Hz = false;
8636 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8637 if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
8638 s->FullFrameMALLPStateMethod = true;
8639 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) {
8640 s->SubViewportMALLPStateMethod = true;
8641 if (!display_cfg->overrides.enable_subvp_implicit_pmo) {
8642 // For dv, small frame tests will have very high refresh rate
8643 unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 /
8644 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
8645 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
8646 if (refresh_rate > 120)
8647 s->SubViewportMALLRefreshGreaterThan120Hz = true;
8648 }
8649 }
8650 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
8651 s->PhantomPipeMALLPStateMethod = true;
8652 }
8653 mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) ||
8654 (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
8655
8656 #ifdef __DML_VBA_DEBUG__
8657 dml2_printf("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod);
8658 dml2_printf("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod);
8659 dml2_printf("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod);
8660 dml2_printf("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz);
8661 dml2_printf("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState);
8662 dml2_printf("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index);
8663 dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
8664 dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
8665 dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
8666 dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)));
8667 #endif
8668
8669 mode_lib->ms.support.OutstandingRequestsSupport = true;
8670 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true;
8671
8672 mode_lib->ms.support.avg_urgent_latency_us
8673 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
8674 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
8675 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
8676 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
8677
8678 mode_lib->ms.support.avg_non_urgent_latency_us
8679 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
8680 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
8681 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
8682 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
8683
8684 mode_lib->ms.support.max_non_urgent_latency_us
8685 = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles
8686 / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
8687 + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock
8688 + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock
8689 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
8690
8691 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8692
8693 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
8694 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k]
8695 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
8696
8697 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
8698 mode_lib->ms.support.OutstandingRequestsSupport = false;
8699 }
8700
8701 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
8702 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
8703 }
8704
8705 #ifdef __DML_VBA_DEBUG__
8706 dml2_printf("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us);
8707 dml2_printf("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us);
8708 dml2_printf("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]);
8709 dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us);
8710 #endif
8711 }
8712
8713 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) {
8714 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k]
8715 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
8716
8717 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
8718 mode_lib->ms.support.OutstandingRequestsSupport = false;
8719 }
8720
8721 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
8722 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
8723 }
8724 #ifdef __DML_VBA_DEBUG__
8725 dml2_printf("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]);
8726 dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us);
8727 #endif
8728 }
8729 }
8730
8731 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
8732 if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) {
8733 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8734 mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0;
8735 mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0;
8736 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
8737 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
8738 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
8739 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
8740 }
8741 } else {
8742 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8743 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8744 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
8745 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
8746 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
8747 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
8748 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
8749 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
8750
8751 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
8752 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
8753 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
8754 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
8755 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
8756
8757 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
8758 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
8759 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
8760 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
8761 calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k];
8762 calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k];
8763 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
8764 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
8765 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
8766 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k];
8767
8768 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start;
8769 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8770 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
8771 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
8772 calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k];
8773 calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k];
8774 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
8775 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
8776 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
8777 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k];
8778
8779 // output
8780 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k];
8781 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k];
8782 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k];
8783 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k];
8784
8785 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k];
8786 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k];
8787 calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->ms.mcache_row_bytes_per_channel_l[k];
8788 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k];
8789 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k];
8790
8791 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k];
8792 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k];
8793 calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->ms.mcache_row_bytes_per_channel_c[k];
8794 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k];
8795 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k];
8796
8797 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k];
8798 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k];
8799 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k];
8800
8801 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
8802 }
8803
8804 calculate_mall_bw_overhead_factor(
8805 mode_lib->ms.mall_prefetch_sdp_overhead_factor,
8806 mode_lib->ms.mall_prefetch_dram_overhead_factor,
8807
8808 // input
8809 display_cfg,
8810 mode_lib->ms.num_active_planes);
8811 }
8812
8813 // Calculate all the bandwidth available
8814 // Need anothe bw for latency evaluation
8815 calculate_bandwidth_available(
8816 mode_lib->ms.support.avg_bandwidth_available_min, // not used
8817 mode_lib->ms.support.avg_bandwidth_available, // not used
8818 mode_lib->ms.support.urg_bandwidth_available_min_latency,
8819 mode_lib->ms.support.urg_bandwidth_available, // not used
8820 mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used
8821 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used
8822
8823 &mode_lib->soc,
8824 display_cfg->hostvm_enable,
8825 mode_lib->ms.DCFCLK,
8826 mode_lib->ms.FabricClock,
8827 mode_lib->ms.dram_bw_mbps);
8828
8829 calculate_bandwidth_available(
8830 mode_lib->ms.support.avg_bandwidth_available_min,
8831 mode_lib->ms.support.avg_bandwidth_available,
8832 mode_lib->ms.support.urg_bandwidth_available_min,
8833 mode_lib->ms.support.urg_bandwidth_available,
8834 mode_lib->ms.support.urg_bandwidth_available_vm_only,
8835 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm,
8836
8837 &mode_lib->soc,
8838 display_cfg->hostvm_enable,
8839 mode_lib->ms.MaxDCFCLK,
8840 mode_lib->ms.MaxFabricClock,
8841 #ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW
8842 mode_lib->ms.dram_bw_mbps);
8843 #else
8844 mode_lib->ms.max_dram_bw_mbps);
8845 #endif
8846
8847 // Average BW support check
8848 calculate_avg_bandwidth_required(
8849 mode_lib->ms.support.avg_bandwidth_required,
8850 // input
8851 display_cfg,
8852 mode_lib->ms.num_active_planes,
8853 mode_lib->ms.vactive_sw_bw_l,
8854 mode_lib->ms.vactive_sw_bw_c,
8855 mode_lib->ms.cursor_bw,
8856 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
8857 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
8858 mode_lib->ms.mall_prefetch_dram_overhead_factor,
8859 mode_lib->ms.mall_prefetch_sdp_overhead_factor);
8860
8861 for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram
8862 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1;
8863 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]);
8864 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]);
8865 }
8866
8867 mode_lib->ms.support.AvgBandwidthSupport = true;
8868 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true;
8869 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8870 if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) {
8871 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false;
8872 dml2_printf("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k);
8873
8874 }
8875 }
8876 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
8877 for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
8878 if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) {
8879 mode_lib->ms.support.AvgBandwidthSupport = false;
8880 #ifdef __DML_VBA_DEBUG__
8881 dml2_printf("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n));
8882 #endif
8883 }
8884 }
8885 }
8886
8887 /* Prefetch Check */
8888 {
8889 mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep;
8890
8891 calculate_hostvm_inefficiency_factor(
8892 &s->HostVMInefficiencyFactor,
8893 &s->HostVMInefficiencyFactorPrefetch,
8894
8895 display_cfg->gpuvm_enable,
8896 display_cfg->hostvm_enable,
8897 mode_lib->ip.remote_iommu_outstanding_translations,
8898 mode_lib->soc.max_outstanding_reqs,
8899 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
8900 mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
8901
8902 mode_lib->ms.Total3dlutActive = 0;
8903 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8904 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
8905 mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1;
8906
8907 // Calculate tdlut schedule related terms
8908 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK;
8909 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
8910 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
8911 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
8912 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
8913 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
8914 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
8915 calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag;
8916 calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling);
8917
8918 // output
8919 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
8920 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
8921 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
8922 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
8923 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
8924 calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
8925 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
8926
8927 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
8928 }
8929
8930 min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active];
8931
8932 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
8933 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
8934 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
8935 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
8936
8937 CalculateExtraLatency(
8938 display_cfg,
8939 mode_lib->ip.rob_buffer_size_kbytes,
8940 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
8941 s->ReorderingBytes,
8942 mode_lib->ms.DCFCLK,
8943 mode_lib->ms.FabricClock,
8944 mode_lib->ip.pixel_chunk_size_kbytes,
8945 min_return_bw_for_latency,
8946 mode_lib->ms.num_active_planes,
8947 mode_lib->ms.NoOfDPP,
8948 mode_lib->ms.dpte_group_bytes,
8949 s->tdlut_bytes_per_group,
8950 s->HostVMInefficiencyFactor,
8951 s->HostVMInefficiencyFactorPrefetch,
8952 mode_lib->soc.hostvm_min_page_size_kbytes,
8953 mode_lib->soc.qos_parameters.qos_type,
8954 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
8955 mode_lib->soc.max_outstanding_reqs,
8956 mode_lib->ms.support.request_size_bytes_luma,
8957 mode_lib->ms.support.request_size_bytes_chroma,
8958 mode_lib->ip.meta_chunk_size_kbytes,
8959 mode_lib->ip.dchub_arb_to_ret_delay,
8960 mode_lib->ms.TripToMemory,
8961 mode_lib->ip.hostvm_mode,
8962
8963 // output
8964 &mode_lib->ms.ExtraLatency,
8965 &mode_lib->ms.ExtraLatency_sr,
8966 &mode_lib->ms.ExtraLatencyPrefetch);
8967
8968 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
8969 s->impacted_dst_y_pre[k] = 0;
8970
8971 s->recalc_prefetch_schedule = 0;
8972 s->recalc_prefetch_done = 0;
8973 do {
8974 mode_lib->ms.support.PrefetchSupported = true;
8975
8976 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8977 s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8978 s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
8979
8980 s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
8981 mode_lib->ms.NoOfDPP[k],
8982 display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
8983 display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
8984 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
8985 display_cfg->plane_descriptors[k].composition.rotation_angle);
8986
8987 s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
8988 mode_lib->ms.NoOfDPP[k],
8989 display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
8990 display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
8991 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
8992 display_cfg->plane_descriptors[k].composition.rotation_angle);
8993
8994 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
8995
8996 mode_lib->ms.TWait[k] = CalculateTWait(
8997 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
8998 mode_lib->ms.UrgLatency,
8999 mode_lib->ms.TripToMemory,
9000 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
9001 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
9002
9003 myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k];
9004 myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK;
9005 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9006 myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
9007 myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k];
9008 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
9009 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9010 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9011 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
9012 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
9013 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
9014 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
9015 myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
9016 myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
9017 myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
9018 myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
9019 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
9020 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
9021 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
9022 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
9023 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
9024 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
9025 myPipe->ODMMode = mode_lib->ms.ODMMode[k];
9026 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
9027 myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
9028 myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
9029 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
9030
9031 #ifdef __DML_VBA_DEBUG__
9032 dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
9033 dml2_printf("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]);
9034 #endif
9035 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
9036 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
9037 CalculatePrefetchSchedule_params->myPipe = myPipe;
9038 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k];
9039 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
9040 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
9041 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
9042 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
9043 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
9044 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
9045 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
9046 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
9047 CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k];
9048 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
9049 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
9050 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
9051 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
9052 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
9053 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
9054 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch;
9055 CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
9056 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k];
9057 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k];
9058 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k];
9059 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
9060 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k];
9061 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k];
9062 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
9063 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k];
9064 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k];
9065 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k];
9066 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k];
9067 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k];
9068 CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k];
9069 CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory;
9070 CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency;
9071 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
9072 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
9073 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
9074 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
9075 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
9076 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
9077 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
9078 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
9079 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
9080 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
9081 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
9082 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
9083 CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
9084 CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k];
9085 CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k];
9086
9087 // output
9088 CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
9089 CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
9090 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k];
9091 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k];
9092 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k];
9093 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k];
9094 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
9095 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
9096 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
9097 CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &mode_lib->ms.RequiredPrefetchBWOTO[k];
9098 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
9099 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
9100 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
9101 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
9102 CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
9103 CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
9104 CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
9105 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
9106 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
9107 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
9108 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
9109 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
9110 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
9111 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
9112 CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
9113 CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
9114 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
9115 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
9116 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
9117 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
9118 CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->prefetch_swath_time_us[k];
9119
9120 mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
9121
9122 mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k];
9123 dml2_printf("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank);
9124 dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
9125 } // for k num_planes
9126
9127 CalculateDCFCLKDeepSleepTdlut(
9128 display_cfg,
9129 mode_lib->ms.num_active_planes,
9130 mode_lib->ms.BytePerPixelY,
9131 mode_lib->ms.BytePerPixelC,
9132 mode_lib->ms.SwathWidthY,
9133 mode_lib->ms.SwathWidthC,
9134 mode_lib->ms.NoOfDPP,
9135 mode_lib->ms.PSCL_FACTOR,
9136 mode_lib->ms.PSCL_FACTOR_CHROMA,
9137 mode_lib->ms.RequiredDPPCLK,
9138 mode_lib->ms.vactive_sw_bw_l,
9139 mode_lib->ms.vactive_sw_bw_c,
9140 mode_lib->soc.return_bus_width_bytes,
9141 mode_lib->ms.RequiredDISPCLK,
9142 s->tdlut_bytes_to_deliver,
9143 s->prefetch_swath_time_us,
9144
9145 /* Output */
9146 &mode_lib->ms.dcfclk_deepsleep);
9147
9148 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9149 if (mode_lib->ms.dst_y_prefetch[k] < 2.0
9150 || mode_lib->ms.LinesForVM[k] >= 32.0
9151 || mode_lib->ms.LinesForDPTERow[k] >= 16.0
9152 || mode_lib->ms.NoTimeForPrefetch[k] == true
9153 || s->DSTYAfterScaler[k] > 8) {
9154 mode_lib->ms.support.PrefetchSupported = false;
9155 dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]);
9156 dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]);
9157 dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]);
9158 dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]);
9159 dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]);
9160 }
9161 }
9162
9163 mode_lib->ms.support.DynamicMetadataSupported = true;
9164 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
9165 if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) {
9166 mode_lib->ms.support.DynamicMetadataSupported = false;
9167 }
9168 }
9169
9170 mode_lib->ms.support.VRatioInPrefetchSupported = true;
9171 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9172 if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
9173 mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
9174 mode_lib->ms.support.VRatioInPrefetchSupported = false;
9175 dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
9176 dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
9177 dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported);
9178 }
9179 }
9180
9181 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported;
9182
9183 // By default, do not recalc prefetch schedule
9184 s->recalc_prefetch_schedule = 0;
9185
9186 // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
9187 if (mode_lib->ms.support.PrefetchSupported) {
9188 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9189 // Calculate Urgent burst factor for prefetch
9190 #ifdef __DML_VBA_DEBUG__
9191 dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
9192 dml2_printf("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]);
9193 dml2_printf("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]);
9194 #endif
9195 CalculateUrgentBurstFactor(
9196 &display_cfg->plane_descriptors[k],
9197 mode_lib->ms.swath_width_luma_ub[k],
9198 mode_lib->ms.swath_width_chroma_ub[k],
9199 mode_lib->ms.SwathHeightY[k],
9200 mode_lib->ms.SwathHeightC[k],
9201 s->line_times[k],
9202 mode_lib->ms.UrgLatency,
9203 mode_lib->ms.VRatioPreY[k],
9204 mode_lib->ms.VRatioPreC[k],
9205 mode_lib->ms.BytePerPixelInDETY[k],
9206 mode_lib->ms.BytePerPixelInDETC[k],
9207 mode_lib->ms.DETBufferSizeY[k],
9208 mode_lib->ms.DETBufferSizeC[k],
9209 /* Output */
9210 &mode_lib->ms.UrgentBurstFactorLumaPre[k],
9211 &mode_lib->ms.UrgentBurstFactorChromaPre[k],
9212 &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
9213 }
9214
9215 // Calculate urgent bandwidth required, both urg and non urg peak bandwidth
9216 // assume flip bw is 0 at this point
9217 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
9218 mode_lib->ms.final_flip_bw[k] = 0;
9219
9220 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required;
9221 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required;
9222 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual;
9223 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required;
9224 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw;
9225 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
9226
9227 calculate_peak_bandwidth_params->display_cfg = display_cfg;
9228 calculate_peak_bandwidth_params->inc_flip_bw = 0;
9229 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
9230 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
9231 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
9232 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
9233 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
9234 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
9235 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
9236 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
9237
9238 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
9239 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
9240 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
9241 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
9242 calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO;
9243 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
9244 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
9245 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
9246 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
9247 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
9248 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
9249 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
9250 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
9251 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
9252 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
9253 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
9254 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
9255 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
9256 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
9257
9258 calculate_peak_bandwidth_required(
9259 &mode_lib->scratch,
9260 calculate_peak_bandwidth_params);
9261
9262 // Check urg peak bandwidth against available urg bw
9263 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
9264 check_urgent_bandwidth_support(
9265 &s->dummy_single[0], // double* frac_urg_bandwidth
9266 &s->dummy_single[1], // double* frac_urg_bandwidth_mall
9267 &mode_lib->ms.support.UrgVactiveBandwidthSupport,
9268 &mode_lib->ms.support.PrefetchBandwidthSupported,
9269
9270 mode_lib->soc.mall_allocated_for_dcn_mbytes,
9271 mode_lib->ms.support.non_urg_bandwidth_required,
9272 mode_lib->ms.support.urg_vactive_bandwidth_required,
9273 mode_lib->ms.support.urg_bandwidth_required,
9274 mode_lib->ms.support.urg_bandwidth_available);
9275
9276 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported;
9277 dml2_printf("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported);
9278
9279 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9280 if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) {
9281 mode_lib->ms.support.PrefetchSupported = false;
9282 dml2_printf("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
9283 }
9284 }
9285
9286 #ifdef DML_GLOBAL_PREFETCH_CHECK
9287 if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) {
9288 CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes;
9289 CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
9290 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
9291 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
9292 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
9293 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
9294 CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY;
9295 CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC;
9296 CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
9297 CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte;
9298 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY;
9299 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC;
9300 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
9301 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
9302 CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
9303 CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded;
9304 CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto;
9305 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
9306 CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
9307 CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch;
9308 if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024)
9309 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024;
9310
9311 CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) /
9312 ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0);
9313
9314 // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
9315 CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule;
9316 CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
9317 mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params);
9318 s->recalc_prefetch_done = 1;
9319 s->recalc_prefetch_schedule = 1;
9320 }
9321 #endif
9322 } // prefetch schedule ok, do urg bw and flip schedule
9323 } while (s->recalc_prefetch_schedule);
9324
9325 // Flip Schedule
9326 // Both prefetch schedule and BW okay
9327 if (mode_lib->ms.support.PrefetchSupported == true) {
9328 mode_lib->ms.BandwidthAvailableForImmediateFlip =
9329 get_bandwidth_available_for_immediate_flip(
9330 dml2_core_internal_soc_state_sys_active,
9331 mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
9332 mode_lib->ms.support.urg_bandwidth_available);
9333
9334 mode_lib->ms.TotImmediateFlipBytes = 0;
9335 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9336 if (display_cfg->plane_descriptors[k].immediate_flip) {
9337 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
9338 s->HostVMInefficiencyFactor,
9339 mode_lib->ms.vm_bytes[k],
9340 mode_lib->ms.DPTEBytesPerRow[k],
9341 mode_lib->ms.meta_row_bytes[k]);
9342 } else {
9343 s->per_pipe_flip_bytes[k] = 0;
9344 }
9345 mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
9346
9347 }
9348
9349 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9350 CalculateFlipSchedule(
9351 &mode_lib->scratch,
9352 display_cfg->plane_descriptors[k].immediate_flip,
9353 1, // use_lb_flip_bw
9354 s->HostVMInefficiencyFactor,
9355 s->Tvm_trips_flip[k],
9356 s->Tr0_trips_flip[k],
9357 s->Tvm_trips_flip_rounded[k],
9358 s->Tr0_trips_flip_rounded[k],
9359 display_cfg->gpuvm_enable,
9360 mode_lib->ms.vm_bytes[k],
9361 mode_lib->ms.DPTEBytesPerRow[k],
9362 mode_lib->ms.BandwidthAvailableForImmediateFlip,
9363 mode_lib->ms.TotImmediateFlipBytes,
9364 display_cfg->plane_descriptors[k].pixel_format,
9365 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
9366 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
9367 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
9368 mode_lib->ms.Tno_bw_flip[k],
9369 mode_lib->ms.dpte_row_height[k],
9370 mode_lib->ms.dpte_row_height_chroma[k],
9371 mode_lib->ms.use_one_row_for_frame_flip[k],
9372 mode_lib->ip.max_flip_time_us,
9373 mode_lib->ip.max_flip_time_lines,
9374 s->per_pipe_flip_bytes[k],
9375 mode_lib->ms.meta_row_bytes[k],
9376 s->meta_row_height_luma[k],
9377 s->meta_row_height_chroma[k],
9378 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
9379
9380 /* Output */
9381 &mode_lib->ms.dst_y_per_vm_flip[k],
9382 &mode_lib->ms.dst_y_per_row_flip[k],
9383 &mode_lib->ms.final_flip_bw[k],
9384 &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
9385 }
9386
9387 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
9388 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
9389 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
9390 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
9391 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
9392 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
9393
9394 calculate_peak_bandwidth_params->display_cfg = display_cfg;
9395 calculate_peak_bandwidth_params->inc_flip_bw = 1;
9396 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
9397 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
9398 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
9399 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
9400 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
9401 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
9402 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
9403 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
9404
9405 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l;
9406 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c;
9407 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
9408 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
9409 calculate_peak_bandwidth_params->prefetch_bandwidth_oto = mode_lib->ms.RequiredPrefetchBWOTO;
9410 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
9411 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
9412 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
9413 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
9414 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
9415 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
9416 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
9417 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
9418 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
9419 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
9420 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
9421 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
9422 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
9423 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
9424
9425 calculate_peak_bandwidth_required(
9426 &mode_lib->scratch,
9427 calculate_peak_bandwidth_params);
9428
9429 calculate_immediate_flip_bandwidth_support(
9430 &s->dummy_single[0], // double* frac_urg_bandwidth_flip
9431 &mode_lib->ms.support.ImmediateFlipSupport,
9432
9433 dml2_core_internal_soc_state_sys_active,
9434 mode_lib->ms.support.urg_bandwidth_required_flip,
9435 mode_lib->ms.support.non_urg_bandwidth_required_flip,
9436 mode_lib->ms.support.urg_bandwidth_available);
9437
9438 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9439 if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
9440 mode_lib->ms.support.ImmediateFlipSupport = false;
9441 }
9442
9443 } else { // if prefetch not support, assume iflip is not supported too
9444 mode_lib->ms.support.ImmediateFlipSupport = false;
9445 }
9446
9447 s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
9448 s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
9449 s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr;
9450 s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
9451 s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
9452 s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
9453 s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
9454 s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
9455 s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
9456 s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
9457 s->mSOCParameters.USRRetrainingLatency = 0;
9458 s->mSOCParameters.SMNLatency = 0;
9459 s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
9460 s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index);
9461 s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
9462 s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
9463
9464 CalculateWatermarks_params->display_cfg = display_cfg;
9465 CalculateWatermarks_params->USRRetrainingRequired = false;
9466 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
9467 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
9468 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
9469 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
9470 CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK;
9471 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
9472 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
9473 CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
9474 CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
9475 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
9476 CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
9477 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
9478 CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
9479 CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
9480 CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY;
9481 CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC;
9482 CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY;
9483 CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC;
9484 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
9485 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
9486 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
9487 CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
9488 CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
9489 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled;
9490 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte;
9491 CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma;
9492 CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma;
9493
9494 // Output
9495 CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark
9496 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport;
9497 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported;
9498 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[]
9499 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[]
9500 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport;
9501 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported;
9502 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported
9503 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport;
9504 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support;
9505 CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin;
9506 CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs;
9507
9508 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
9509
9510 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]);
9511 }
9512 dml2_printf("DML::%s: Done prefetch calculation\n", __func__);
9513 // End of Prefetch Check
9514
9515 mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us;
9516
9517 //Re-ordering Buffer Support Check
9518 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
9519 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
9520 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) {
9521 mode_lib->ms.support.ROBSupport = true;
9522 } else {
9523 mode_lib->ms.support.ROBSupport = false;
9524 }
9525 } else {
9526 if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) {
9527 mode_lib->ms.support.ROBSupport = true;
9528 } else {
9529 mode_lib->ms.support.ROBSupport = false;
9530 }
9531 }
9532
9533 /* VActive fill time calculations (informative) */
9534 calculate_vactive_det_fill_latency(
9535 display_cfg,
9536 mode_lib->ms.num_active_planes,
9537 s->pstate_bytes_required_l,
9538 s->pstate_bytes_required_c,
9539 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
9540 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
9541 mode_lib->ms.vactive_sw_bw_l,
9542 mode_lib->ms.vactive_sw_bw_c,
9543 mode_lib->ms.surface_avg_vactive_required_bw,
9544 mode_lib->ms.surface_peak_required_bw,
9545 /* outputs */
9546 mode_lib->ms.dram_change_vactive_det_fill_delay_us);
9547
9548 #ifdef __DML_VBA_DEBUG__
9549 dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us);
9550 dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport);
9551 #endif
9552
9553 /*Mode Support, Voltage State and SOC Configuration*/
9554 {
9555 if (mode_lib->ms.support.ScaleRatioAndTapsSupport
9556 && mode_lib->ms.support.SourceFormatPixelAndScanSupport
9557 && mode_lib->ms.support.ViewportSizeSupport
9558 && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
9559 && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
9560 && !mode_lib->ms.support.BPPForMultistreamNotIndicated
9561 && !mode_lib->ms.support.MultistreamWithHDMIOreDP
9562 && !mode_lib->ms.support.ExceededMultistreamSlots
9563 && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
9564 && !mode_lib->ms.support.NotEnoughLanesForMSO
9565 && !mode_lib->ms.support.P2IWith420
9566 && !mode_lib->ms.support.DSC422NativeNotSupported
9567 && mode_lib->ms.support.DSCSlicesODMModeSupported
9568 && !mode_lib->ms.support.NotEnoughDSCUnits
9569 && !mode_lib->ms.support.NotEnoughDSCSlices
9570 && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
9571 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
9572 && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported
9573 && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
9574 && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported
9575 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
9576 && mode_lib->ms.support.ROBSupport
9577 && mode_lib->ms.support.OutstandingRequestsSupport
9578 && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance
9579 && mode_lib->ms.support.DISPCLK_DPPCLK_Support
9580 && mode_lib->ms.support.TotalAvailablePipesSupport
9581 && mode_lib->ms.support.NumberOfOTGSupport
9582 && mode_lib->ms.support.NumberOfHDMIFRLSupport
9583 && mode_lib->ms.support.NumberOfDP2p0Support
9584 && mode_lib->ms.support.EnoughWritebackUnits
9585 && mode_lib->ms.support.WritebackLatencySupport
9586 && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport
9587 && mode_lib->ms.support.CursorSupport
9588 && mode_lib->ms.support.PitchSupport
9589 && !mode_lib->ms.support.ViewportExceedsSurface
9590 && mode_lib->ms.support.PrefetchSupported
9591 && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport
9592 && mode_lib->ms.support.AvgBandwidthSupport
9593 && mode_lib->ms.support.DynamicMetadataSupported
9594 && mode_lib->ms.support.VRatioInPrefetchSupported
9595 && mode_lib->ms.support.PTEBufferSizeNotExceeded
9596 && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded
9597 && !mode_lib->ms.support.ExceededMALLSize
9598 && mode_lib->ms.support.g6_temp_read_support
9599 && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) {
9600 dml2_printf("DML::%s: mode is supported\n", __func__);
9601 mode_lib->ms.support.ModeSupport = true;
9602 } else {
9603 dml2_printf("DML::%s: mode is NOT supported\n", __func__);
9604 mode_lib->ms.support.ModeSupport = false;
9605 }
9606 }
9607
9608 // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
9609 dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
9610 dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
9611
9612 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9613 mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k];
9614 mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k];
9615 }
9616
9617 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
9618 mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k];
9619 mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
9620 mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
9621 mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k];
9622 mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k];
9623 mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k];
9624
9625 #if defined(__DML_VBA_DEBUG__)
9626 dml2_printf("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]);
9627 dml2_printf("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]);
9628 #endif
9629 }
9630
9631 #if defined(__DML_VBA_DEBUG__)
9632 if (!mode_lib->ms.support.ModeSupport)
9633 dml2_print_mode_support_info(&mode_lib->ms.support, true);
9634
9635 dml2_printf("DML::%s: --- DONE --- \n", __func__);
9636 #endif
9637
9638 return mode_lib->ms.support.ModeSupport;
9639 }
9640
dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex * in_out_params)9641 unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params)
9642 {
9643 unsigned int result;
9644
9645 dml2_printf("DML::%s: ------------- START ----------\n", __func__);
9646 result = dml_core_mode_support(in_out_params);
9647
9648 if (result)
9649 *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
9650
9651 dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index);
9652
9653 for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++)
9654 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
9655
9656 dml2_printf("DML::%s: ------------- DONE ----------\n", __func__);
9657
9658 return result;
9659 }
9660
CalculatePixelDeliveryTimes(const struct dml2_display_cfg * display_cfg,const struct core_display_cfg_support_info * cfg_support_info,unsigned int NumberOfActiveSurfaces,double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[])9661 static void CalculatePixelDeliveryTimes(
9662 const struct dml2_display_cfg *display_cfg,
9663 const struct core_display_cfg_support_info *cfg_support_info,
9664 unsigned int NumberOfActiveSurfaces,
9665 double VRatioPrefetchY[],
9666 double VRatioPrefetchC[],
9667 unsigned int swath_width_luma_ub[],
9668 unsigned int swath_width_chroma_ub[],
9669 double PSCL_THROUGHPUT[],
9670 double PSCL_THROUGHPUT_CHROMA[],
9671 double Dppclk[],
9672 unsigned int BytePerPixelC[],
9673 unsigned int req_per_swath_ub_l[],
9674 unsigned int req_per_swath_ub_c[],
9675
9676 // Output
9677 double DisplayPipeLineDeliveryTimeLuma[],
9678 double DisplayPipeLineDeliveryTimeChroma[],
9679 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
9680 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
9681 double DisplayPipeRequestDeliveryTimeLuma[],
9682 double DisplayPipeRequestDeliveryTimeChroma[],
9683 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
9684 double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
9685 {
9686 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9687 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9688
9689 #ifdef __DML_VBA_DEBUG__
9690 dml2_printf("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
9691 dml2_printf("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
9692 dml2_printf("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio);
9693 dml2_printf("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
9694 dml2_printf("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]);
9695 dml2_printf("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]);
9696 dml2_printf("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
9697 dml2_printf("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
9698 dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
9699 dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
9700 dml2_printf("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used);
9701 dml2_printf("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz);
9702 dml2_printf("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
9703 #endif
9704 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
9705 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9706 } else {
9707 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9708 }
9709
9710 if (BytePerPixelC[k] == 0) {
9711 DisplayPipeLineDeliveryTimeChroma[k] = 0;
9712 } else {
9713 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
9714 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9715 } else {
9716 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9717 }
9718 }
9719
9720 if (VRatioPrefetchY[k] <= 1) {
9721 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9722 } else {
9723 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9724 }
9725
9726 if (BytePerPixelC[k] == 0) {
9727 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
9728 } else {
9729 if (VRatioPrefetchC[k] <= 1) {
9730 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9731 } else {
9732 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9733 }
9734 }
9735 #ifdef __DML_VBA_DEBUG__
9736 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
9737 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
9738 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
9739 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
9740 #endif
9741 }
9742
9743 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9744
9745 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k];
9746 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k];
9747 if (BytePerPixelC[k] == 0) {
9748 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
9749 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
9750 } else {
9751 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k];
9752 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k];
9753 }
9754 #ifdef __DML_VBA_DEBUG__
9755 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
9756 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
9757 dml2_printf("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]);
9758 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
9759 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
9760 dml2_printf("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]);
9761 #endif
9762 }
9763 }
9764
CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params * p)9765 static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p)
9766 {
9767 unsigned int meta_chunk_width;
9768 unsigned int min_meta_chunk_width;
9769 unsigned int meta_chunk_per_row_int;
9770 unsigned int meta_row_remainder;
9771 unsigned int meta_chunk_threshold;
9772 unsigned int meta_chunks_per_row_ub;
9773 unsigned int meta_chunk_width_chroma;
9774 unsigned int min_meta_chunk_width_chroma;
9775 unsigned int meta_chunk_per_row_int_chroma;
9776 unsigned int meta_row_remainder_chroma;
9777 unsigned int meta_chunk_threshold_chroma;
9778 unsigned int meta_chunks_per_row_ub_chroma;
9779 unsigned int dpte_group_width_luma;
9780 unsigned int dpte_groups_per_row_luma_ub;
9781 unsigned int dpte_group_width_chroma;
9782 unsigned int dpte_groups_per_row_chroma_ub;
9783 double pixel_clock_mhz;
9784
9785 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9786 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9787 if (p->BytePerPixelC[k] == 0) {
9788 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9789 } else {
9790 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9791 }
9792 p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9793 if (p->BytePerPixelC[k] == 0) {
9794 p->DST_Y_PER_META_ROW_NOM_C[k] = 0;
9795 } else {
9796 p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9797 }
9798 }
9799
9800 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9801 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) {
9802 meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9803 min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9804 meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width;
9805 meta_row_remainder = p->meta_row_width[k] % meta_chunk_width;
9806 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9807 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k];
9808 } else {
9809 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k];
9810 }
9811 if (meta_row_remainder <= meta_chunk_threshold) {
9812 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
9813 } else {
9814 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
9815 }
9816 p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio *
9817 p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9818 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9819 p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9820 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9821 p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9822 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9823 if (p->BytePerPixelC[k] == 0) {
9824 p->TimePerChromaMetaChunkNominal[k] = 0;
9825 p->TimePerChromaMetaChunkVBlank[k] = 0;
9826 p->TimePerChromaMetaChunkFlip[k] = 0;
9827 } else {
9828 meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9829 min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9830 meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma);
9831 meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma;
9832 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9833 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k];
9834 } else {
9835 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k];
9836 }
9837 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
9838 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
9839 } else {
9840 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
9841 }
9842 p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9843 p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9844 p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9845 }
9846 } else {
9847 p->TimePerMetaChunkNominal[k] = 0;
9848 p->TimePerMetaChunkVBlank[k] = 0;
9849 p->TimePerMetaChunkFlip[k] = 0;
9850 p->TimePerChromaMetaChunkNominal[k] = 0;
9851 p->TimePerChromaMetaChunkVBlank[k] = 0;
9852 p->TimePerChromaMetaChunkFlip[k] = 0;
9853 }
9854
9855 #ifdef __DML_VBA_DEBUG__
9856 dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]);
9857 dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]);
9858 dml2_printf("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]);
9859 dml2_printf("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]);
9860 dml2_printf("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]);
9861 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]);
9862 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]);
9863 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]);
9864 #endif
9865 }
9866
9867 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9868 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9869 if (p->BytePerPixelC[k] == 0) {
9870 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9871 } else {
9872 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9873 }
9874 }
9875
9876 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9877 pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9878
9879 if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
9880 p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k];
9881 else
9882 p->time_per_tdlut_group[k] = 0;
9883
9884 dml2_printf("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]);
9885
9886 if (p->display_cfg->gpuvm_enable == true) {
9887 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9888 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]);
9889 } else {
9890 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]);
9891 }
9892 if (p->use_one_row_for_frame[k]) {
9893 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0));
9894 } else {
9895 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0));
9896 }
9897 if (dpte_groups_per_row_luma_ub <= 2) {
9898 dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1;
9899 }
9900 dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
9901 dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]);
9902 dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]);
9903 dml2_printf("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]);
9904 dml2_printf("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]);
9905 dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
9906 dml2_printf("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
9907 dml2_printf("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
9908
9909 p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9910 p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9911 p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9912 if (p->BytePerPixelC[k] == 0) {
9913 p->time_per_pte_group_nom_chroma[k] = 0;
9914 p->time_per_pte_group_vblank_chroma[k] = 0;
9915 p->time_per_pte_group_flip_chroma[k] = 0;
9916 } else {
9917 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9918 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]);
9919 } else {
9920 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]);
9921 }
9922
9923 if (p->use_one_row_for_frame[k]) {
9924 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0));
9925 } else {
9926 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0));
9927 }
9928 if (dpte_groups_per_row_chroma_ub <= 2) {
9929 dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1;
9930 }
9931 dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
9932 dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
9933 dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
9934
9935 p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9936 p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9937 p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9938 }
9939 } else {
9940 p->time_per_pte_group_nom_luma[k] = 0;
9941 p->time_per_pte_group_vblank_luma[k] = 0;
9942 p->time_per_pte_group_flip_luma[k] = 0;
9943 p->time_per_pte_group_nom_chroma[k] = 0;
9944 p->time_per_pte_group_vblank_chroma[k] = 0;
9945 p->time_per_pte_group_flip_chroma[k] = 0;
9946 }
9947 #ifdef __DML_VBA_DEBUG__
9948 dml2_printf("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]);
9949 dml2_printf("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]);
9950
9951 dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]);
9952 dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]);
9953 dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]);
9954 dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]);
9955 dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]);
9956 dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]);
9957 dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]);
9958 dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]);
9959 #endif
9960 }
9961 } // CalculateMetaAndPTETimes
9962
CalculateVMGroupAndRequestTimes(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelC[],double dst_y_per_vm_vblank[],double dst_y_per_vm_flip[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int tdlut_pte_bytes_per_frame[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],bool mrq_present,double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])9963 static void CalculateVMGroupAndRequestTimes(
9964 const struct dml2_display_cfg *display_cfg,
9965 unsigned int NumberOfActiveSurfaces,
9966 unsigned int BytePerPixelC[],
9967 double dst_y_per_vm_vblank[],
9968 double dst_y_per_vm_flip[],
9969 unsigned int dpte_row_width_luma_ub[],
9970 unsigned int dpte_row_width_chroma_ub[],
9971 unsigned int vm_group_bytes[],
9972 unsigned int dpde0_bytes_per_frame_ub_l[],
9973 unsigned int dpde0_bytes_per_frame_ub_c[],
9974 unsigned int tdlut_pte_bytes_per_frame[],
9975 unsigned int meta_pte_bytes_per_frame_ub_l[],
9976 unsigned int meta_pte_bytes_per_frame_ub_c[],
9977 bool mrq_present,
9978
9979 // Output
9980 double TimePerVMGroupVBlank[],
9981 double TimePerVMGroupFlip[],
9982 double TimePerVMRequestVBlank[],
9983 double TimePerVMRequestFlip[])
9984 {
9985 unsigned int num_group_per_lower_vm_stage = 0;
9986 unsigned int num_req_per_lower_vm_stage = 0;
9987 unsigned int num_group_per_lower_vm_stage_flip;
9988 unsigned int num_group_per_lower_vm_stage_pref;
9989 unsigned int num_req_per_lower_vm_stage_flip;
9990 unsigned int num_req_per_lower_vm_stage_pref;
9991 double line_time;
9992
9993 #ifdef __DML_VBA_DEBUG__
9994 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
9995 #endif
9996 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9997 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9998 bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present;
9999 #ifdef __DML_VBA_DEBUG__
10000 dml2_printf("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable);
10001 dml2_printf("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
10002 dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
10003 dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
10004 dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
10005 dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
10006 #endif
10007
10008 if (display_cfg->gpuvm_enable) {
10009 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
10010 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
10011
10012 if (BytePerPixelC[k] > 0)
10013 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
10014 }
10015
10016 if (dcc_mrq_enable) {
10017 if (BytePerPixelC[k] > 0) {
10018 num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) +
10019 math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1));
10020 } else {
10021 num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1));
10022 }
10023 }
10024
10025 num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage;
10026 num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage;
10027
10028 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
10029 num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1);
10030 if (display_cfg->gpuvm_max_page_table_levels >= 2)
10031 num_group_per_lower_vm_stage_pref += 1; // tdpe0 group
10032 }
10033
10034 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
10035 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64;
10036 if (BytePerPixelC[k] > 0)
10037 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k];
10038 }
10039
10040 if (dcc_mrq_enable) {
10041 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64;
10042 if (BytePerPixelC[k] > 0)
10043 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64;
10044 }
10045
10046 num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage;
10047 num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage;
10048
10049 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
10050 num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64;
10051 }
10052
10053 line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz;
10054
10055 if (num_group_per_lower_vm_stage_pref > 0)
10056 TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref;
10057 else
10058 TimePerVMGroupVBlank[k] = 0;
10059
10060 if (num_group_per_lower_vm_stage_flip > 0)
10061 TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip;
10062 else
10063 TimePerVMGroupFlip[k] = 0;
10064
10065 if (num_req_per_lower_vm_stage_pref > 0)
10066 TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref;
10067 else
10068 TimePerVMRequestVBlank[k] = 0.0;
10069 if (num_req_per_lower_vm_stage_flip > 0)
10070 TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip;
10071 else
10072 TimePerVMRequestFlip[k] = 0.0;
10073
10074 dml2_printf("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]);
10075 dml2_printf("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]);
10076 dml2_printf("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time);
10077 dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %f\n", __func__, k, num_group_per_lower_vm_stage_pref);
10078 dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %f\n", __func__, k, num_group_per_lower_vm_stage_flip);
10079 dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %f\n", __func__, k, num_req_per_lower_vm_stage_pref);
10080 dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %f\n", __func__, k, num_req_per_lower_vm_stage_flip);
10081
10082 if (display_cfg->gpuvm_max_page_table_levels > 2) {
10083 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
10084 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
10085 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
10086 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
10087 }
10088
10089 } else {
10090 TimePerVMGroupVBlank[k] = 0;
10091 TimePerVMGroupFlip[k] = 0;
10092 TimePerVMRequestVBlank[k] = 0;
10093 TimePerVMRequestFlip[k] = 0;
10094 }
10095
10096 #ifdef __DML_VBA_DEBUG__
10097 dml2_printf("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
10098 dml2_printf("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
10099 dml2_printf("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
10100 dml2_printf("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
10101 #endif
10102 }
10103 }
10104
CalculateStutterEfficiency(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateStutterEfficiency_params * p)10105 static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch,
10106 struct dml2_core_calcs_CalculateStutterEfficiency_params *p)
10107 {
10108 struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals;
10109
10110 unsigned int TotalNumberOfActiveOTG = 0;
10111 double SinglePixelClock = 0;
10112 unsigned int SingleHTotal = 0;
10113 unsigned int SingleVTotal = 0;
10114 bool SameTiming = true;
10115 bool FoundCriticalSurface = false;
10116 double LastZ8StutterPeriod = 0;
10117
10118 memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals));
10119
10120 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10121 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10122 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
10123 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
10124 l->MaximumEffectiveCompressionLuma = 2;
10125 } else {
10126 l->MaximumEffectiveCompressionLuma = 4;
10127 }
10128 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma);
10129 #ifdef __DML_VBA_DEBUG__
10130 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
10131 dml2_printf("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0);
10132 dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma);
10133 #endif
10134 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0;
10135 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma;
10136
10137 if (p->ReadBandwidthSurfaceChroma[k] > 0) {
10138 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
10139 l->MaximumEffectiveCompressionChroma = 2;
10140 } else {
10141 l->MaximumEffectiveCompressionChroma = 4;
10142 }
10143 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma);
10144 #ifdef __DML_VBA_DEBUG__
10145 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
10146 dml2_printf("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1);
10147 dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma);
10148 #endif
10149 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1;
10150 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma;
10151 }
10152 } else {
10153 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
10154 }
10155 l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
10156 }
10157 }
10158
10159 l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth;
10160 l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
10161
10162 #ifdef __DML_VBA_DEBUG__
10163 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
10164 dml2_printf("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth);
10165 dml2_printf("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth);
10166 dml2_printf("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth);
10167 dml2_printf("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma);
10168 dml2_printf("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma);
10169 dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
10170 dml2_printf("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction);
10171
10172 dml2_printf("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0);
10173 dml2_printf("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
10174 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte);
10175 dml2_printf("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte);
10176 #endif
10177 if (l->AverageDCCZeroSizeFraction == 1) {
10178 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
10179 l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate;
10180
10181
10182 } else if (l->AverageDCCZeroSizeFraction > 0) {
10183 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
10184 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
10185 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) +
10186 (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate,
10187 ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate))
10188 : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
10189
10190
10191 #ifdef __DML_VBA_DEBUG__
10192 dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
10193 dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate));
10194 dml2_printf("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
10195 dml2_printf("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate));
10196 #endif
10197 } else {
10198 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
10199 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) +
10200 ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0);
10201
10202 #ifdef __DML_VBA_DEBUG__
10203 dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
10204 dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate);
10205 #endif
10206 }
10207
10208 #ifdef __DML_VBA_DEBUG__
10209 dml2_printf("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
10210 dml2_printf("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries);
10211 dml2_printf("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate);
10212 dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
10213 #endif
10214
10215 *p->StutterPeriod = 0;
10216
10217 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10218 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10219 l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
10220 l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]);
10221 l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10222 #ifdef __DML_VBA_DEBUG__
10223 dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024);
10224 dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
10225 dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
10226 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
10227 dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
10228 dml2_printf("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY);
10229 dml2_printf("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath);
10230 dml2_printf("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
10231 dml2_printf("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY);
10232 #endif
10233
10234 if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) {
10235 bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP;
10236
10237 FoundCriticalSurface = true;
10238 *p->StutterPeriod = l->DETBufferingTimeY;
10239 l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10240 l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10241 l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
10242 l->SwathWidthYCriticalSurface = p->SwathWidthY[k];
10243 l->SwathHeightYCriticalSurface = p->SwathHeightY[k];
10244 l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
10245 l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
10246 l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
10247 l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
10248 l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
10249
10250 #ifdef __DML_VBA_DEBUG__
10251 dml2_printf("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
10252 dml2_printf("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
10253 dml2_printf("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface);
10254 dml2_printf("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface);
10255 dml2_printf("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface);
10256 dml2_printf("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface);
10257 dml2_printf("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface);
10258 dml2_printf("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface);
10259 dml2_printf("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface);
10260 dml2_printf("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface);
10261 dml2_printf("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface);
10262 #endif
10263 }
10264 }
10265 }
10266
10267 // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer
10268 // stutter period is calculated only on the det sizing
10269 // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress
10270 // else
10271 // the cdb + rob part will be in compressed rate with urg bw (idea bw)
10272 // the det part will be return at uncompressed rate with 64B/dcfclk
10273 //
10274 // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer"
10275 // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk
10276
10277 l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize);
10278 #ifdef __DML_VBA_DEBUG__
10279 dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
10280 dml2_printf("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0);
10281 dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
10282 dml2_printf("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024);
10283 dml2_printf("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
10284 dml2_printf("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
10285 dml2_printf("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth);
10286 dml2_printf("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
10287 #endif
10288
10289 l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer
10290 / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
10291 (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer)
10292 / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
10293 *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW;
10294 #ifdef __DML_VBA_DEBUG__
10295 dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate));
10296 dml2_printf("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
10297 dml2_printf("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW);
10298 dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
10299 #endif
10300 l->TotalActiveWriteback = 0;
10301 memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
10302
10303 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
10304 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
10305 if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) {
10306
10307 if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0)
10308 l->TotalActiveWriteback = l->TotalActiveWriteback + 1;
10309
10310 if (TotalNumberOfActiveOTG == 0) { // first otg
10311 SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10312 SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10313 SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total;
10314 } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) ||
10315 SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total ||
10316 SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) {
10317 SameTiming = false;
10318 }
10319 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
10320 l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1;
10321 }
10322 }
10323 }
10324
10325 if (l->TotalActiveWriteback == 0) {
10326 #ifdef __DML_VBA_DEBUG__
10327 dml2_printf("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
10328 dml2_printf("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
10329 dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
10330 #endif
10331 *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100;
10332 *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100;
10333 *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
10334 *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
10335 } else {
10336 *p->StutterEfficiencyNotIncludingVBlank = 0.;
10337 *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
10338 *p->NumberOfStutterBurstsPerFrame = 0;
10339 *p->Z8NumberOfStutterBurstsPerFrame = 0;
10340 }
10341 #ifdef __DML_VBA_DEBUG__
10342 dml2_printf("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface);
10343 dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
10344 dml2_printf("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
10345 dml2_printf("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
10346 dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
10347 #endif
10348
10349 if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
10350 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
10351 *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
10352 } else {
10353 *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
10354 }
10355 } else {
10356 *p->StutterEfficiency = 0;
10357 *p->NumberOfStutterBurstsPerFrame = 0;
10358 }
10359
10360 if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
10361 LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
10362 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
10363 *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
10364 } else {
10365 *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
10366 }
10367 } else {
10368 *p->Z8StutterEfficiency = 0.;
10369 *p->Z8NumberOfStutterBurstsPerFrame = 0;
10370 }
10371
10372 #ifdef __DML_VBA_DEBUG__
10373 dml2_printf("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG);
10374 dml2_printf("DML::%s: SameTiming = %u\n", __func__, SameTiming);
10375 dml2_printf("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings);
10376 dml2_printf("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
10377 dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
10378 dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
10379 dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
10380 dml2_printf("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
10381 dml2_printf("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
10382 dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
10383 dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
10384 #endif
10385
10386 *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface);
10387
10388 #ifdef __DML_VBA_DEBUG__
10389 dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface);
10390 dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
10391 dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
10392 #endif
10393 }
10394
dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex * in_out_params)10395 static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params)
10396 {
10397 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
10398 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
10399 const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info;
10400 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
10401 struct dml2_display_cfg_programming *programming = in_out_params->programming;
10402
10403 struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals;
10404 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
10405 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
10406 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
10407 struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
10408 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
10409 struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params;
10410 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
10411 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
10412 struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params;
10413 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
10414 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
10415
10416 unsigned int k;
10417 bool must_support_iflip;
10418 const long min_return_uclk_cycles = 83;
10419 const long min_return_fclk_cycles = 75;
10420 const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0;
10421 double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0;
10422 double max_uclk_mhz = 0;
10423 double min_return_latency_in_DCFCLK_cycles = 0;
10424
10425 dml2_printf("DML::%s: --- START --- \n", __func__);
10426
10427 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
10428 memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program));
10429
10430 s->num_active_planes = display_cfg->num_planes;
10431 get_stream_output_bpp(s->OutputBpp, display_cfg);
10432
10433 mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info);
10434 dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane);
10435
10436 mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0;
10437 mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0;
10438 mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config);
10439 mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0;
10440 mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0;
10441 s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000;
10442 mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
10443 mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table);
10444
10445 for (k = 0; k < s->num_active_planes; ++k) {
10446 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
10447 DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
10448 DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
10449 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 ||
10450 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
10451
10452 if (cfg_support_info->stream_support_info[stream_index].odms_used > 1)
10453 DML2_ASSERT(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
10454
10455 switch (cfg_support_info->stream_support_info[stream_index].odms_used) {
10456 case (4):
10457 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1;
10458 break;
10459 case (3):
10460 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1;
10461 break;
10462 case (2):
10463 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1;
10464 break;
10465 default:
10466 if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4)
10467 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4;
10468 else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2)
10469 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2;
10470 else
10471 mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass;
10472 break;
10473 }
10474 }
10475
10476 for (k = 0; k < s->num_active_planes; ++k) {
10477 mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used;
10478 mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0;
10479 DML2_ASSERT(mode_lib->mp.Dppclk[k] > 0);
10480 }
10481
10482 for (k = 0; k < s->num_active_planes; ++k) {
10483 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
10484 mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0;
10485 dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]);
10486 }
10487
10488 mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0;
10489 mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0;
10490
10491 DML2_ASSERT(mode_lib->mp.Dcfclk > 0);
10492 DML2_ASSERT(mode_lib->mp.FabricClock > 0);
10493 DML2_ASSERT(mode_lib->mp.dram_bw_mbps > 0);
10494 DML2_ASSERT(mode_lib->mp.uclk_freq_mhz > 0);
10495 DML2_ASSERT(mode_lib->mp.GlobalDPPCLK > 0);
10496 DML2_ASSERT(mode_lib->mp.Dispclk > 0);
10497 DML2_ASSERT(mode_lib->mp.DCFCLKDeepSleep > 0);
10498 DML2_ASSERT(s->SOCCLK > 0);
10499
10500 #ifdef __DML_VBA_DEBUG__
10501 dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
10502 dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
10503 dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
10504 dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock);
10505 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps);
10506 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz);
10507 dml2_printf("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk);
10508 for (k = 0; k < s->num_active_planes; ++k) {
10509 dml2_printf("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]);
10510 }
10511 dml2_printf("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK);
10512 dml2_printf("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep);
10513 dml2_printf("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK);
10514 dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
10515 dml2_printf("DML::%s: min_clk_table min_fclk_khz = %d\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz);
10516 dml2_printf("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config));
10517 for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) {
10518 dml2_printf("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]);
10519 dml2_printf("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
10520 }
10521
10522 for (k = 0; k < s->num_active_planes; k++)
10523 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
10524 #endif
10525
10526 CalculateMaxDETAndMinCompressedBufferSize(
10527 mode_lib->ip.config_return_buffer_size_in_kbytes,
10528 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
10529 mode_lib->ip.rob_buffer_size_kbytes,
10530 mode_lib->ip.max_num_dpp,
10531 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
10532 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
10533 mode_lib->ip.dcn_mrq_present,
10534
10535 /* Output */
10536 &s->MaxTotalDETInKByte,
10537 &s->NomDETInKByte,
10538 &s->MinCompressedBufferSizeInKByte);
10539
10540
10541 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
10542
10543 for (k = 0; k < s->num_active_planes; ++k) {
10544 CalculateSinglePipeDPPCLKAndSCLThroughput(
10545 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
10546 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
10547 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
10548 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
10549 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
10550 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
10551 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10552 display_cfg->plane_descriptors[k].pixel_format,
10553 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
10554 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
10555 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
10556 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
10557
10558 /* Output */
10559 &mode_lib->mp.PSCL_THROUGHPUT[k],
10560 &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k],
10561 &mode_lib->mp.DPPCLKUsingSingleDPP[k]);
10562 }
10563
10564 for (k = 0; k < s->num_active_planes; ++k) {
10565 CalculateBytePerPixelAndBlockSizes(
10566 display_cfg->plane_descriptors[k].pixel_format,
10567 display_cfg->plane_descriptors[k].surface.tiling,
10568 display_cfg->plane_descriptors[k].surface.plane0.pitch,
10569 display_cfg->plane_descriptors[k].surface.plane1.pitch,
10570
10571 // Output
10572 &mode_lib->mp.BytePerPixelY[k],
10573 &mode_lib->mp.BytePerPixelC[k],
10574 &mode_lib->mp.BytePerPixelInDETY[k],
10575 &mode_lib->mp.BytePerPixelInDETC[k],
10576 &mode_lib->mp.Read256BlockHeightY[k],
10577 &mode_lib->mp.Read256BlockHeightC[k],
10578 &mode_lib->mp.Read256BlockWidthY[k],
10579 &mode_lib->mp.Read256BlockWidthC[k],
10580 &mode_lib->mp.MacroTileHeightY[k],
10581 &mode_lib->mp.MacroTileHeightC[k],
10582 &mode_lib->mp.MacroTileWidthY[k],
10583 &mode_lib->mp.MacroTileWidthC[k],
10584 &mode_lib->mp.surf_linear128_l[k],
10585 &mode_lib->mp.surf_linear128_c[k]);
10586 }
10587
10588 CalculateSwathWidth(
10589 display_cfg,
10590 false, // ForceSingleDPP
10591 s->num_active_planes,
10592 mode_lib->mp.ODMMode,
10593 mode_lib->mp.BytePerPixelY,
10594 mode_lib->mp.BytePerPixelC,
10595 mode_lib->mp.Read256BlockHeightY,
10596 mode_lib->mp.Read256BlockHeightC,
10597 mode_lib->mp.Read256BlockWidthY,
10598 mode_lib->mp.Read256BlockWidthC,
10599 mode_lib->mp.surf_linear128_l,
10600 mode_lib->mp.surf_linear128_c,
10601 mode_lib->mp.NoOfDPP,
10602
10603 /* Output */
10604 mode_lib->mp.req_per_swath_ub_l,
10605 mode_lib->mp.req_per_swath_ub_c,
10606 mode_lib->mp.SwathWidthSingleDPPY,
10607 mode_lib->mp.SwathWidthSingleDPPC,
10608 mode_lib->mp.SwathWidthY,
10609 mode_lib->mp.SwathWidthC,
10610 s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[]
10611 s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[]
10612 mode_lib->mp.swath_width_luma_ub,
10613 mode_lib->mp.swath_width_chroma_ub);
10614
10615 for (k = 0; k < s->num_active_planes; ++k) {
10616 mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 /
10617 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
10618 mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10619 mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10620 dml2_printf("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
10621 dml2_printf("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
10622 }
10623
10624 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
10625 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
10626 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte;
10627 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte;
10628 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10629 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10630 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10631 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10632 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
10633 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes;
10634 CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte;
10635 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
10636 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
10637 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l;
10638 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c;
10639 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
10640 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
10641 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY;
10642 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC;
10643 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY;
10644 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC;
10645 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l;
10646 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c;
10647 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode;
10648 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
10649 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY;
10650 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC;
10651 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY;
10652 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC;
10653 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10654
10655 // output
10656 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l;
10657 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c;
10658 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
10659 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
10660 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
10661 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
10662 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY;
10663 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC;
10664 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma;
10665 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma;
10666 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte;
10667 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
10668 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
10669 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
10670 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
10671 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled;
10672 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b;
10673 CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5;
10674 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte;
10675 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
10676 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
10677
10678 // Calculate DET size, swath height here.
10679 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
10680
10681 // DSC Delay
10682 for (k = 0; k < s->num_active_planes; ++k) {
10683 mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable,
10684 mode_lib->mp.ODMMode[k],
10685 mode_lib->ip.maximum_dsc_bits_per_component,
10686 s->OutputBpp[k],
10687 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
10688 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
10689 cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices,
10690 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
10691 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
10692 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10693 s->PixelClockBackEnd[k]);
10694 }
10695
10696 // Prefetch
10697 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
10698 for (k = 0; k < s->num_active_planes; ++k)
10699 mode_lib->mp.SurfaceSizeInTheMALL[k] = 0;
10700 } else {
10701 CalculateSurfaceSizeInMall(
10702 display_cfg,
10703 s->num_active_planes,
10704 mode_lib->soc.mall_allocated_for_dcn_mbytes,
10705 mode_lib->mp.BytePerPixelY,
10706 mode_lib->mp.BytePerPixelC,
10707 mode_lib->mp.Read256BlockWidthY,
10708 mode_lib->mp.Read256BlockWidthC,
10709 mode_lib->mp.Read256BlockHeightY,
10710 mode_lib->mp.Read256BlockHeightC,
10711 mode_lib->mp.MacroTileWidthY,
10712 mode_lib->mp.MacroTileWidthC,
10713 mode_lib->mp.MacroTileHeightY,
10714 mode_lib->mp.MacroTileHeightC,
10715
10716 /* Output */
10717 mode_lib->mp.SurfaceSizeInTheMALL,
10718 &s->dummy_boolean[0]); /* bool *ExceededMALLSize */
10719 }
10720
10721 for (k = 0; k < s->num_active_planes; ++k) {
10722 s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10723 s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k];
10724 s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
10725 s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10726 s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10727 s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
10728 s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
10729 s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
10730 s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
10731 s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k];
10732 s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k];
10733 s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k];
10734 s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k];
10735 s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
10736 s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10737 s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10738 s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
10739 s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
10740 s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
10741 s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
10742 s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
10743 s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10744 s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10745 s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
10746 s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
10747 s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
10748 s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
10749 s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10750 s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10751 s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10752 s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10753 s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10754 s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
10755 s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k];
10756 s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k];
10757 s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
10758 s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
10759 }
10760
10761 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
10762 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes;
10763 CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
10764 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL;
10765 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
10766 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
10767 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
10768 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY;
10769 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC;
10770 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
10771 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
10772 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10773
10774 // output
10775 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
10776 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
10777 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
10778 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height;
10779 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
10780 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear;
10781 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma;
10782 CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes;
10783 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
10784 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
10785 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
10786 CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
10787 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
10788 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
10789 CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
10790 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
10791 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
10792 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
10793 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
10794 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l;
10795 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c;
10796 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY;
10797 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC;
10798 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY;
10799 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC;
10800 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY;
10801 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC;
10802 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
10803 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow;
10804 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
10805 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
10806 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes;
10807 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
10808 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip;
10809 CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss;
10810 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE;
10811 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE;
10812 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
10813 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw;
10814 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes;
10815 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
10816 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
10817 CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width;
10818 CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height;
10819 CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width;
10820 CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height;
10821 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l;
10822 CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
10823 CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
10824 CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
10825 CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
10826 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c;
10827
10828 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
10829
10830 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
10831 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
10832 for (k = 0; k < s->num_active_planes; k++) {
10833 mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0;
10834 mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0;
10835 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
10836 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
10837 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
10838 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
10839 }
10840 } else {
10841 for (k = 0; k < s->num_active_planes; k++) {
10842 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10843 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
10844 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
10845 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
10846 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
10847 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10848 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10849
10850 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
10851 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
10852 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10853 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
10854 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
10855
10856 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10857 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10858 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
10859 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10860 calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k];
10861 calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k];
10862 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
10863 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
10864 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
10865 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k];
10866
10867 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10868 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10869 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
10870 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10871 calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k];
10872 calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k];
10873 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
10874 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
10875 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
10876 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k];
10877
10878 // output
10879 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k];
10880 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k];
10881 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k];
10882 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k];
10883
10884 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k];
10885 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k];
10886 calculate_mcache_setting_params->mcache_row_bytes_per_channel_l = &mode_lib->mp.mcache_row_bytes_per_channel_l[k];
10887 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k];
10888 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k];
10889
10890 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k];
10891 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k];
10892 calculate_mcache_setting_params->mcache_row_bytes_per_channel_c = &mode_lib->mp.mcache_row_bytes_per_channel_c[k];
10893 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k];
10894 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k];
10895
10896 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k];
10897 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k];
10898 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k];
10899 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
10900 }
10901
10902 calculate_mall_bw_overhead_factor(
10903 mode_lib->mp.mall_prefetch_sdp_overhead_factor,
10904 mode_lib->mp.mall_prefetch_dram_overhead_factor,
10905
10906 // input
10907 display_cfg,
10908 s->num_active_planes);
10909 }
10910
10911 // Calculate all the bandwidth availabe
10912 calculate_bandwidth_available(
10913 mode_lib->mp.avg_bandwidth_available_min,
10914 mode_lib->mp.avg_bandwidth_available,
10915 mode_lib->mp.urg_bandwidth_available_min,
10916 mode_lib->mp.urg_bandwidth_available,
10917 mode_lib->mp.urg_bandwidth_available_vm_only,
10918 mode_lib->mp.urg_bandwidth_available_pixel_and_vm,
10919
10920 &mode_lib->soc,
10921 display_cfg->hostvm_enable,
10922 mode_lib->mp.Dcfclk,
10923 mode_lib->mp.FabricClock,
10924 mode_lib->mp.dram_bw_mbps);
10925
10926
10927 calculate_hostvm_inefficiency_factor(
10928 &s->HostVMInefficiencyFactor,
10929 &s->HostVMInefficiencyFactorPrefetch,
10930
10931 display_cfg->gpuvm_enable,
10932 display_cfg->hostvm_enable,
10933 mode_lib->ip.remote_iommu_outstanding_translations,
10934 mode_lib->soc.max_outstanding_reqs,
10935 mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
10936 mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
10937
10938 s->TotalDCCActiveDPP = 0;
10939 s->TotalActiveDPP = 0;
10940 for (k = 0; k < s->num_active_planes; ++k) {
10941 s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k];
10942 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
10943 s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k];
10944 }
10945 // Calculate tdlut schedule related terms
10946 for (k = 0; k <= s->num_active_planes - 1; k++) {
10947 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk;
10948 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
10949 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
10950 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
10951 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
10952 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10953 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10954
10955 // output
10956 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
10957 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
10958 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
10959 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
10960 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
10961 calculate_tdlut_setting_params->tdlut_bytes_to_deliver = &s->tdlut_bytes_to_deliver[k];
10962 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
10963 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
10964 }
10965
10966 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
10967 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
10968 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
10969 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
10970
10971 CalculateExtraLatency(
10972 display_cfg,
10973 mode_lib->ip.rob_buffer_size_kbytes,
10974 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
10975 s->ReorderingBytes,
10976 mode_lib->mp.Dcfclk,
10977 mode_lib->mp.FabricClock,
10978 mode_lib->ip.pixel_chunk_size_kbytes,
10979 mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active],
10980 s->num_active_planes,
10981 mode_lib->mp.NoOfDPP,
10982 mode_lib->mp.dpte_group_bytes,
10983 s->tdlut_bytes_per_group,
10984 s->HostVMInefficiencyFactor,
10985 s->HostVMInefficiencyFactorPrefetch,
10986 mode_lib->soc.hostvm_min_page_size_kbytes,
10987 mode_lib->soc.qos_parameters.qos_type,
10988 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
10989 mode_lib->soc.max_outstanding_reqs,
10990 mode_lib->mp.request_size_bytes_luma,
10991 mode_lib->mp.request_size_bytes_chroma,
10992 mode_lib->ip.meta_chunk_size_kbytes,
10993 mode_lib->ip.dchub_arb_to_ret_delay,
10994 mode_lib->mp.TripToMemory,
10995 mode_lib->ip.hostvm_mode,
10996
10997 // output
10998 &mode_lib->mp.ExtraLatency,
10999 &mode_lib->mp.ExtraLatency_sr,
11000 &mode_lib->mp.ExtraLatencyPrefetch);
11001
11002 mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep;
11003
11004 for (k = 0; k < s->num_active_planes; ++k) {
11005 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
11006 mode_lib->mp.WritebackDelay[k] =
11007 mode_lib->soc.qos_parameters.writeback.base_latency_us
11008 + CalculateWriteBackDelay(
11009 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format,
11010 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio,
11011 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio,
11012 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps,
11013 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width,
11014 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height,
11015 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height,
11016 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk;
11017 } else
11018 mode_lib->mp.WritebackDelay[k] = 0;
11019 }
11020
11021 /* VActive bytes to fetch for UCLK P-State */
11022 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
11023 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11024
11025 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes;
11026 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11027 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height;
11028 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
11029 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
11030 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
11031 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height;
11032 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma;
11033 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
11034 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
11035 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY;
11036 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC;
11037 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY;
11038 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC;
11039 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY;
11040 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC;
11041 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
11042
11043 /* outputs */
11044 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l;
11045 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c;
11046
11047 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
11048
11049 /* Excess VActive bandwidth required to fill DET */
11050 calculate_excess_vactive_bandwidth_required(
11051 display_cfg,
11052 s->num_active_planes,
11053 s->pstate_bytes_required_l,
11054 s->pstate_bytes_required_c,
11055 /* outputs */
11056 mode_lib->mp.excess_vactive_fill_bw_l,
11057 mode_lib->mp.excess_vactive_fill_bw_c);
11058
11059 mode_lib->mp.UrgentLatency = CalculateUrgentLatency(
11060 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
11061 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
11062 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
11063 mode_lib->soc.do_urgent_latency_adjustment,
11064 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
11065 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
11066 mode_lib->mp.FabricClock,
11067 mode_lib->mp.uclk_freq_mhz,
11068 mode_lib->soc.qos_parameters.qos_type,
11069 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles,
11070 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
11071 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
11072 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
11073 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
11074 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11075
11076 mode_lib->mp.TripToMemory = CalculateTripToMemory(
11077 mode_lib->mp.UrgentLatency,
11078 mode_lib->mp.FabricClock,
11079 mode_lib->mp.uclk_freq_mhz,
11080 mode_lib->soc.qos_parameters.qos_type,
11081 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles,
11082 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
11083 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
11084 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
11085 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11086
11087 mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory);
11088
11089 mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory(
11090 mode_lib->mp.UrgentLatency,
11091 mode_lib->mp.FabricClock,
11092 mode_lib->mp.uclk_freq_mhz,
11093 mode_lib->soc.qos_parameters.qos_type,
11094 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles,
11095 mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles,
11096 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
11097 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
11098
11099 for (k = 0; k < s->num_active_planes; ++k) {
11100 bool cursor_not_enough_urgent_latency_hiding = 0;
11101 s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11102 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11103
11104 s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format;
11105
11106 s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
11107 mode_lib->mp.NoOfDPP[k],
11108 display_cfg->plane_descriptors[k].composition.viewport.plane0.width,
11109 display_cfg->plane_descriptors[k].composition.viewport.plane0.height,
11110 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
11111 display_cfg->plane_descriptors[k].composition.rotation_angle);
11112
11113 s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits,
11114 mode_lib->mp.NoOfDPP[k],
11115 display_cfg->plane_descriptors[k].composition.viewport.plane1.width,
11116 display_cfg->plane_descriptors[k].composition.viewport.plane1.height,
11117 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
11118 display_cfg->plane_descriptors[k].composition.rotation_angle);
11119
11120 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
11121 calculate_cursor_req_attributes(
11122 display_cfg->plane_descriptors[k].cursor.cursor_width,
11123 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
11124
11125 // output
11126 &s->cursor_lines_per_chunk[k],
11127 &s->cursor_bytes_per_line[k],
11128 &s->cursor_bytes_per_chunk[k],
11129 &s->cursor_bytes[k]);
11130
11131 calculate_cursor_urgent_burst_factor(
11132 mode_lib->ip.cursor_buffer_size,
11133 display_cfg->plane_descriptors[k].cursor.cursor_width,
11134 s->cursor_bytes_per_chunk[k],
11135 s->cursor_lines_per_chunk[k],
11136 s->line_times[k],
11137 mode_lib->mp.UrgentLatency,
11138
11139 // output
11140 &mode_lib->mp.UrgentBurstFactorCursor[k],
11141 &cursor_not_enough_urgent_latency_hiding);
11142 }
11143 mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k];
11144
11145 CalculateUrgentBurstFactor(
11146 &display_cfg->plane_descriptors[k],
11147 mode_lib->mp.swath_width_luma_ub[k],
11148 mode_lib->mp.swath_width_chroma_ub[k],
11149 mode_lib->mp.SwathHeightY[k],
11150 mode_lib->mp.SwathHeightC[k],
11151 s->line_times[k],
11152 mode_lib->mp.UrgentLatency,
11153 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
11154 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
11155 mode_lib->mp.BytePerPixelInDETY[k],
11156 mode_lib->mp.BytePerPixelInDETC[k],
11157 mode_lib->mp.DETBufferSizeY[k],
11158 mode_lib->mp.DETBufferSizeC[k],
11159
11160 /* output */
11161 &mode_lib->mp.UrgentBurstFactorLuma[k],
11162 &mode_lib->mp.UrgentBurstFactorChroma[k],
11163 &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
11164
11165 mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
11166 }
11167
11168 for (k = 0; k < s->num_active_planes; ++k) {
11169 s->MaxVStartupLines[k] = CalculateMaxVStartup(
11170 mode_lib->ip.ptoi_supported,
11171 mode_lib->ip.vblank_nom_default_us,
11172 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
11173 mode_lib->mp.WritebackDelay[k]);
11174
11175 #ifdef __DML_VBA_DEBUG__
11176 dml2_printf("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11177 dml2_printf("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]);
11178 #endif
11179 }
11180
11181 s->immediate_flip_required = false;
11182 for (k = 0; k < s->num_active_planes; ++k) {
11183 s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip;
11184 }
11185 #ifdef __DML_VBA_DEBUG__
11186 dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required);
11187 #endif
11188
11189 if (s->num_active_planes > 1) {
11190 CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes;
11191 CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format;
11192 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
11193 CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024;
11194 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l;
11195 CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c;
11196 CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY;
11197 CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC;
11198 CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
11199 CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte;
11200 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY;
11201 CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC;
11202 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l;
11203 CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c;
11204 CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes;
11205 CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care
11206 CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care
11207 CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
11208 CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk;
11209 CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times;
11210 CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch;
11211
11212 // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible
11213 CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0];
11214 CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre;
11215 CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming
11216 }
11217
11218 {
11219 s->DestinationLineTimesForPrefetchLessThan2 = false;
11220 s->VRatioPrefetchMoreThanMax = false;
11221
11222 dml2_printf("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__);
11223
11224 for (k = 0; k < s->num_active_planes; ++k) {
11225 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
11226
11227 dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11228 mode_lib->mp.TWait[k] = CalculateTWait(
11229 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
11230 mode_lib->mp.UrgentLatency,
11231 mode_lib->mp.TripToMemory,
11232 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
11233 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
11234
11235 myPipe->Dppclk = mode_lib->mp.Dppclk[k];
11236 myPipe->Dispclk = mode_lib->mp.Dispclk;
11237 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11238 myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
11239 myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k];
11240 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
11241 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
11242 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
11243 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
11244 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
11245 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
11246 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
11247 myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
11248 myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
11249 myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
11250 myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
11251 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
11252 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
11253 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
11254 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
11255 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
11256 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
11257 myPipe->ODMMode = mode_lib->mp.ODMMode[k];
11258 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
11259 myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
11260 myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
11261 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
11262
11263 #ifdef __DML_VBA_DEBUG__
11264 dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
11265 #endif
11266 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
11267 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
11268 CalculatePrefetchSchedule_params->myPipe = myPipe;
11269 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k];
11270 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
11271 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
11272 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
11273 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
11274 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
11275 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
11276 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
11277 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
11278 CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k];
11279 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
11280 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
11281 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
11282 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
11283 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
11284 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency;
11285 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch;
11286 CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc;
11287 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k];
11288 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k];
11289 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k];
11290 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k];
11291 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k];
11292 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k];
11293 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k];
11294 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k];
11295 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k];
11296 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k];
11297 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k];
11298 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k];
11299 CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k];
11300 CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory;
11301 CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency;
11302 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
11303 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
11304 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
11305 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
11306 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
11307 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
11308 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
11309 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
11310 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
11311 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11312 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k];
11313 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k];
11314 CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k];
11315 CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k];
11316 CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k];
11317
11318 // output
11319 CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k];
11320 CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k];
11321 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k];
11322 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k];
11323 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k];
11324 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k];
11325 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k];
11326 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k];
11327 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k];
11328 CalculatePrefetchSchedule_params->RequiredPrefetchBWOTO = &s->dummy_single_array[0][k];
11329 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k];
11330 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k];
11331 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k];
11332 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k];
11333 CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k];
11334 CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k];
11335 CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k];
11336 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
11337 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
11338 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
11339 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
11340 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
11341 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
11342 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k];
11343 CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k];
11344 CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k];
11345 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k];
11346 CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k];
11347 CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k];
11348 CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k];
11349 CalculatePrefetchSchedule_params->prefetch_swath_time_us = &s->dummy_single[0];
11350
11351 mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
11352
11353 if (s->impacted_dst_y_pre[k] > 0)
11354 mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k];
11355 else
11356 mode_lib->mp.impacted_prefetch_margin_us[k] = 0;
11357
11358 #ifdef __DML_VBA_DEBUG__
11359 dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
11360 #endif
11361 mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k];
11362 } // for k
11363
11364 mode_lib->mp.PrefetchModeSupported = true;
11365 for (k = 0; k < s->num_active_planes; ++k) {
11366 if (mode_lib->mp.NoTimeToPrefetch[k] == true ||
11367 mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] ||
11368 mode_lib->mp.DSTYAfterScaler[k] > 8) {
11369 dml2_printf("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
11370 dml2_printf("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]);
11371 dml2_printf("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]);
11372 mode_lib->mp.PrefetchModeSupported = false;
11373 }
11374 if (mode_lib->mp.dst_y_prefetch[k] < 2)
11375 s->DestinationLineTimesForPrefetchLessThan2 = true;
11376
11377 if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
11378 mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
11379 s->VRatioPrefetchMoreThanMax = true;
11380 dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
11381 dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
11382 dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
11383 }
11384
11385 if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) {
11386 dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
11387 mode_lib->mp.PrefetchModeSupported = false;
11388 }
11389 }
11390
11391 if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) {
11392 dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
11393 dml2_printf("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
11394 mode_lib->mp.PrefetchModeSupported = false;
11395 }
11396
11397 dml2_printf("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__,
11398 mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup);
11399
11400 // Prefetch schedule OK, now check prefetch bw
11401 if (mode_lib->mp.PrefetchModeSupported == true) {
11402 for (k = 0; k < s->num_active_planes; ++k) {
11403 double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11404 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11405 CalculateUrgentBurstFactor(
11406 &display_cfg->plane_descriptors[k],
11407 mode_lib->mp.swath_width_luma_ub[k],
11408 mode_lib->mp.swath_width_chroma_ub[k],
11409 mode_lib->mp.SwathHeightY[k],
11410 mode_lib->mp.SwathHeightC[k],
11411 line_time_us,
11412 mode_lib->mp.UrgentLatency,
11413 mode_lib->mp.VRatioPrefetchY[k],
11414 mode_lib->mp.VRatioPrefetchC[k],
11415 mode_lib->mp.BytePerPixelInDETY[k],
11416 mode_lib->mp.BytePerPixelInDETC[k],
11417 mode_lib->mp.DETBufferSizeY[k],
11418 mode_lib->mp.DETBufferSizeC[k],
11419 /* Output */
11420 &mode_lib->mp.UrgentBurstFactorLumaPre[k],
11421 &mode_lib->mp.UrgentBurstFactorChromaPre[k],
11422 &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
11423
11424 #ifdef __DML_VBA_DEBUG__
11425 dml2_printf("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
11426 dml2_printf("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]);
11427 dml2_printf("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]);
11428 dml2_printf("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]);
11429 dml2_printf("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]);
11430
11431 dml2_printf("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]);
11432 dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
11433
11434 dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]);
11435 dml2_printf("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
11436 dml2_printf("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
11437 dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]);
11438 dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]);
11439 dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]);
11440 dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]);
11441 dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]);
11442 dml2_printf("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]);
11443 #endif
11444 }
11445
11446 for (k = 0; k <= s->num_active_planes - 1; k++)
11447 mode_lib->mp.final_flip_bw[k] = 0;
11448
11449 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->mp.urg_vactive_bandwidth_required;
11450 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required;
11451 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->mp.urg_bandwidth_required_qual;
11452 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required;
11453 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
11454 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
11455
11456 calculate_peak_bandwidth_params->display_cfg = display_cfg;
11457 calculate_peak_bandwidth_params->inc_flip_bw = 0;
11458 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
11459 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11460 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
11461 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
11462 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
11463 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
11464 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
11465 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
11466
11467 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
11468 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
11469 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
11470 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
11471 calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[0];
11472 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
11473 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
11474 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
11475 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11476 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11477 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
11478 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
11479 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
11480 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
11481 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
11482 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
11483 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
11484 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
11485 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
11486
11487 calculate_peak_bandwidth_required(
11488 &mode_lib->scratch,
11489 calculate_peak_bandwidth_params);
11490
11491 // Check urg peak bandwidth against available urg bw
11492 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
11493 check_urgent_bandwidth_support(
11494 &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth
11495 &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall
11496 &s->dummy_boolean[1], // vactive bw ok
11497 &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok
11498
11499 mode_lib->soc.mall_allocated_for_dcn_mbytes,
11500 mode_lib->mp.non_urg_bandwidth_required,
11501 mode_lib->mp.urg_vactive_bandwidth_required,
11502 mode_lib->mp.urg_bandwidth_required,
11503 mode_lib->mp.urg_bandwidth_available);
11504
11505 if (!mode_lib->mp.PrefetchModeSupported)
11506 dml2_printf("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__);
11507
11508 for (k = 0; k < s->num_active_planes; ++k) {
11509 if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) {
11510 dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
11511 mode_lib->mp.PrefetchModeSupported = false;
11512 }
11513 }
11514 } // prefetch schedule ok
11515
11516 // Prefetch schedule and prefetch bw ok, now check flip bw
11517 if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw
11518
11519 mode_lib->mp.BandwidthAvailableForImmediateFlip =
11520 get_bandwidth_available_for_immediate_flip(
11521 dml2_core_internal_soc_state_sys_active,
11522 mode_lib->mp.urg_bandwidth_required_qual, // no flip
11523 mode_lib->mp.urg_bandwidth_available);
11524 mode_lib->mp.TotImmediateFlipBytes = 0;
11525 for (k = 0; k < s->num_active_planes; ++k) {
11526 if (display_cfg->plane_descriptors[k].immediate_flip) {
11527 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor,
11528 mode_lib->mp.vm_bytes[k],
11529 mode_lib->mp.PixelPTEBytesPerRow[k],
11530 mode_lib->mp.meta_row_bytes[k]);
11531 } else {
11532 s->per_pipe_flip_bytes[k] = 0;
11533 }
11534 mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k];
11535 #ifdef __DML_VBA_DEBUG__
11536 dml2_printf("DML::%s: k = %u\n", __func__, k);
11537 dml2_printf("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]);
11538 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]);
11539 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]);
11540 dml2_printf("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]);
11541 dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes);
11542 #endif
11543 }
11544 for (k = 0; k < s->num_active_planes; ++k) {
11545 CalculateFlipSchedule(
11546 &mode_lib->scratch,
11547 display_cfg->plane_descriptors[k].immediate_flip,
11548 0, // use_lb_flip_bw
11549 s->HostVMInefficiencyFactor,
11550 s->Tvm_trips_flip[k],
11551 s->Tr0_trips_flip[k],
11552 s->Tvm_trips_flip_rounded[k],
11553 s->Tr0_trips_flip_rounded[k],
11554 display_cfg->gpuvm_enable,
11555 mode_lib->mp.vm_bytes[k],
11556 mode_lib->mp.PixelPTEBytesPerRow[k],
11557 mode_lib->mp.BandwidthAvailableForImmediateFlip,
11558 mode_lib->mp.TotImmediateFlipBytes,
11559 display_cfg->plane_descriptors[k].pixel_format,
11560 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
11561 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
11562 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
11563 mode_lib->mp.Tno_bw[k],
11564 mode_lib->mp.dpte_row_height[k],
11565 mode_lib->mp.dpte_row_height_chroma[k],
11566 mode_lib->mp.use_one_row_for_frame_flip[k],
11567 mode_lib->ip.max_flip_time_us,
11568 mode_lib->ip.max_flip_time_lines,
11569 s->per_pipe_flip_bytes[k],
11570 mode_lib->mp.meta_row_bytes[k],
11571 mode_lib->mp.meta_row_height[k],
11572 mode_lib->mp.meta_row_height_chroma[k],
11573 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
11574
11575 // Output
11576 &mode_lib->mp.dst_y_per_vm_flip[k],
11577 &mode_lib->mp.dst_y_per_row_flip[k],
11578 &mode_lib->mp.final_flip_bw[k],
11579 &mode_lib->mp.ImmediateFlipSupportedForPipe[k]);
11580 }
11581
11582 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
11583 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required_flip;
11584 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
11585 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required_flip;
11586 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
11587 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
11588
11589 calculate_peak_bandwidth_params->display_cfg = display_cfg;
11590 calculate_peak_bandwidth_params->inc_flip_bw = 1;
11591 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
11592 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11593 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
11594 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
11595 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
11596 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
11597 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
11598 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
11599
11600 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l;
11601 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c;
11602 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
11603 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
11604 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
11605 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
11606 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
11607 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11608 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11609 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
11610 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
11611 calculate_peak_bandwidth_params->prefetch_bandwidth_oto = s->dummy_single_array[0];
11612 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
11613 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
11614 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
11615 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
11616 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
11617 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
11618 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
11619
11620 calculate_peak_bandwidth_required(
11621 &mode_lib->scratch,
11622 calculate_peak_bandwidth_params);
11623
11624 calculate_immediate_flip_bandwidth_support(
11625 &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip
11626 &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok
11627
11628 dml2_core_internal_soc_state_sys_active,
11629 mode_lib->mp.urg_bandwidth_required_flip,
11630 mode_lib->mp.non_urg_bandwidth_required_flip,
11631 mode_lib->mp.urg_bandwidth_available);
11632
11633 if (!mode_lib->mp.ImmediateFlipSupported)
11634 dml2_printf("DML::%s: Bandwidth not sufficient for flip!", __func__);
11635
11636 for (k = 0; k < s->num_active_planes; ++k) {
11637 if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) {
11638 mode_lib->mp.ImmediateFlipSupported = false;
11639 #ifdef __DML_VBA_DEBUG__
11640 dml2_printf("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k);
11641 #endif
11642 }
11643 }
11644 } else { // flip or prefetch not support
11645 mode_lib->mp.ImmediateFlipSupported = false;
11646 }
11647
11648 // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm)
11649 must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required;
11650 mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported));
11651
11652 #ifdef __DML_VBA_DEBUG__
11653 dml2_printf("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported);
11654 for (k = 0; k < s->num_active_planes; ++k)
11655 dml2_printf("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
11656 dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable);
11657 dml2_printf("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported);
11658 dml2_printf("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
11659 #endif
11660 dml2_printf("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]);
11661 }
11662
11663 for (k = 0; k < s->num_active_planes; ++k)
11664 dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11665
11666 if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) {
11667 dml2_printf("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__);
11668 } else {
11669 dml2_printf("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__);
11670
11671 // DCC Configuration
11672 for (k = 0; k < s->num_active_planes; ++k) {
11673 #ifdef __DML_VBA_DEBUG__
11674 dml2_printf("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
11675 #endif
11676 CalculateDCCConfiguration(
11677 display_cfg->plane_descriptors[k].surface.dcc.enable,
11678 display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown,
11679 display_cfg->plane_descriptors[k].pixel_format,
11680 display_cfg->plane_descriptors[k].surface.plane0.width,
11681 display_cfg->plane_descriptors[k].surface.plane1.width,
11682 display_cfg->plane_descriptors[k].surface.plane0.height,
11683 display_cfg->plane_descriptors[k].surface.plane1.height,
11684 s->NomDETInKByte,
11685 mode_lib->mp.Read256BlockHeightY[k],
11686 mode_lib->mp.Read256BlockHeightC[k],
11687 display_cfg->plane_descriptors[k].surface.tiling,
11688 mode_lib->mp.BytePerPixelY[k],
11689 mode_lib->mp.BytePerPixelC[k],
11690 mode_lib->mp.BytePerPixelInDETY[k],
11691 mode_lib->mp.BytePerPixelInDETC[k],
11692 display_cfg->plane_descriptors[k].composition.rotation_angle,
11693
11694 /* Output */
11695 &mode_lib->mp.RequestLuma[k],
11696 &mode_lib->mp.RequestChroma[k],
11697 &mode_lib->mp.DCCYMaxUncompressedBlock[k],
11698 &mode_lib->mp.DCCCMaxUncompressedBlock[k],
11699 &mode_lib->mp.DCCYMaxCompressedBlock[k],
11700 &mode_lib->mp.DCCCMaxCompressedBlock[k],
11701 &mode_lib->mp.DCCYIndependentBlock[k],
11702 &mode_lib->mp.DCCCIndependentBlock[k]);
11703 }
11704
11705 //Watermarks and NB P-State/DRAM Clock Change Support
11706 s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency;
11707 s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency;
11708 s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr;
11709 s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
11710 s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
11711 s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
11712 s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11713 s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
11714 s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11715 s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
11716 s->mmSOCParameters.USRRetrainingLatency = 0;
11717 s->mmSOCParameters.SMNLatency = 0;
11718 s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
11719 s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index);
11720 s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock;
11721 s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
11722
11723 CalculateWatermarks_params->display_cfg = display_cfg;
11724 CalculateWatermarks_params->USRRetrainingRequired = false;
11725 CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes;
11726 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
11727 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
11728 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
11729 CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk;
11730 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11731 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
11732 CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11733 CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
11734 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
11735 CalculateWatermarks_params->SOCCLK = s->SOCCLK;
11736 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
11737 CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
11738 CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
11739 CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY;
11740 CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC;
11741 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY;
11742 CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC;
11743 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
11744 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC;
11745 CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler;
11746 CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler;
11747 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11748 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11749 CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height;
11750 CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
11751 CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
11752
11753 // Output
11754 CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark;
11755 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport;
11756 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported;
11757 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported;
11758 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL;
11759 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport;
11760 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported;
11761 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported;
11762 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport;
11763 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support;
11764 CalculateWatermarks_params->VActiveLatencyHidingMargin = 0;
11765 CalculateWatermarks_params->VActiveLatencyHidingUs = 0;
11766
11767 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
11768
11769 for (k = 0; k < s->num_active_planes; ++k) {
11770 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) {
11771 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11772 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
11773 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11774 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
11775 } else {
11776 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0;
11777 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0;
11778 }
11779 }
11780
11781 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines);
11782
11783 dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index);
11784 dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz));
11785
11786 //Display Pipeline Delivery Time in Prefetch, Groups
11787 CalculatePixelDeliveryTimes(
11788 display_cfg,
11789 cfg_support_info,
11790 s->num_active_planes,
11791 mode_lib->mp.VRatioPrefetchY,
11792 mode_lib->mp.VRatioPrefetchC,
11793 mode_lib->mp.swath_width_luma_ub,
11794 mode_lib->mp.swath_width_chroma_ub,
11795 mode_lib->mp.PSCL_THROUGHPUT,
11796 mode_lib->mp.PSCL_THROUGHPUT_CHROMA,
11797 mode_lib->mp.Dppclk,
11798 mode_lib->mp.BytePerPixelC,
11799 mode_lib->mp.req_per_swath_ub_l,
11800 mode_lib->mp.req_per_swath_ub_c,
11801
11802 /* Output */
11803 mode_lib->mp.DisplayPipeLineDeliveryTimeLuma,
11804 mode_lib->mp.DisplayPipeLineDeliveryTimeChroma,
11805 mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch,
11806 mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch,
11807 mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma,
11808 mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma,
11809 mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch,
11810 mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
11811
11812 CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch;
11813 CalculateMetaAndPTETimes_params->display_cfg = display_cfg;
11814 CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes;
11815 CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
11816 CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank;
11817 CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip;
11818 CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
11819 CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC;
11820 CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height;
11821 CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
11822 CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11823 CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
11824 CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
11825 CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
11826 CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
11827 CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
11828 CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
11829 CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
11830 CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
11831 CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub;
11832 CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11833
11834 CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes;
11835 CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes;
11836 CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width;
11837 CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
11838 CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height;
11839 CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
11840 CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width;
11841 CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
11842 CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height;
11843 CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
11844
11845 CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group;
11846 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L;
11847 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C;
11848 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma;
11849 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma;
11850 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma;
11851 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma;
11852 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma;
11853 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma;
11854 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L;
11855 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C;
11856 CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal;
11857 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal;
11858 CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank;
11859 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank;
11860 CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip;
11861 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip;
11862
11863 CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params);
11864
11865 CalculateVMGroupAndRequestTimes(
11866 display_cfg,
11867 s->num_active_planes,
11868 mode_lib->mp.BytePerPixelC,
11869 mode_lib->mp.dst_y_per_vm_vblank,
11870 mode_lib->mp.dst_y_per_vm_flip,
11871 mode_lib->mp.dpte_row_width_luma_ub,
11872 mode_lib->mp.dpte_row_width_chroma_ub,
11873 mode_lib->mp.vm_group_bytes,
11874 mode_lib->mp.dpde0_bytes_per_frame_ub_l,
11875 mode_lib->mp.dpde0_bytes_per_frame_ub_c,
11876 s->tdlut_pte_bytes_per_frame,
11877 mode_lib->mp.meta_pte_bytes_per_frame_ub_l,
11878 mode_lib->mp.meta_pte_bytes_per_frame_ub_c,
11879 mode_lib->ip.dcn_mrq_present,
11880
11881 /* Output */
11882 mode_lib->mp.TimePerVMGroupVBlank,
11883 mode_lib->mp.TimePerVMGroupFlip,
11884 mode_lib->mp.TimePerVMRequestVBlank,
11885 mode_lib->mp.TimePerVMRequestFlip);
11886
11887 // VStartup Adjustment
11888 for (k = 0; k < s->num_active_planes; ++k) {
11889 bool isInterlaceTiming;
11890
11891 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency;
11892 if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable)
11893 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k];
11894
11895 #ifdef __DML_VBA_DEBUG__
11896 dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11897 #endif
11898 s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11899 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin;
11900
11901 #ifdef __DML_VBA_DEBUG__
11902 dml2_printf("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
11903 dml2_printf("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11904 dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11905 #endif
11906
11907 mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin;
11908 if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) {
11909 mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin;
11910 }
11911
11912 isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
11913
11914 // The actual positioning of the vstartup
11915 mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
11916
11917 s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) :
11918 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11919 s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0;
11920 s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k];
11921
11922 if (s->blank_lines_remaining < 0) {
11923 dml2_printf("ERROR: Vstartup is larger than vblank!?\n");
11924 s->blank_lines_remaining = 0;
11925 DML2_ASSERT(0);
11926 }
11927 mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
11928
11929 // debug only
11930 if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / (double) display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <=
11931 (isInterlaceTiming ?
11932 math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) :
11933 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) {
11934 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true;
11935 } else {
11936 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false;
11937 }
11938 #ifdef __DML_VBA_DEBUG__
11939 dml2_printf("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]);
11940 dml2_printf("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]);
11941 dml2_printf("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]);
11942 dml2_printf("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]);
11943 dml2_printf("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]);
11944 dml2_printf("DML::%s: k=%u, HTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total);
11945 dml2_printf("DML::%s: k=%u, VTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
11946 dml2_printf("DML::%s: k=%u, VActive = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active);
11947 dml2_printf("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11948 dml2_printf("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]);
11949 dml2_printf("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]);
11950 dml2_printf("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]);
11951 #endif
11952 }
11953
11954 //Maximum Bandwidth Used
11955 s->TotalWRBandwidth = 0;
11956 for (k = 0; k < display_cfg->num_streams; ++k) {
11957 s->WRBandwidth = 0;
11958 if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) {
11959 s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height
11960 * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width /
11961 (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height
11962 / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000))
11963 * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0);
11964 s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
11965 }
11966 }
11967
11968 mode_lib->mp.TotalDataReadBandwidth = 0;
11969 for (k = 0; k < s->num_active_planes; ++k) {
11970 mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k];
11971 #ifdef __DML_VBA_DEBUG__
11972 dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth);
11973 dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]);
11974 dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]);
11975 #endif
11976 }
11977
11978 CalculateStutterEfficiency_params->display_cfg = display_cfg;
11979 CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11980 CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11981 CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries;
11982 CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries;
11983 CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes;
11984 CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes;
11985 CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes;
11986 CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth;
11987 CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk;
11988 CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active];
11989 CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b;
11990 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs;
11991 CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11992 CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11993 CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11994 CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark;
11995 CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark;
11996 CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
11997 CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank;
11998 CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
11999 CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
12000 CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
12001 CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
12002 CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY;
12003 CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY;
12004 CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC;
12005 CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY;
12006 CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY;
12007 CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC;
12008 CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC;
12009 CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock;
12010 CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock;
12011 CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l;
12012 CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c;
12013 CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
12014 CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw;
12015 CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present;
12016
12017 // output
12018 CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank;
12019 CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency;
12020 CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame;
12021 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
12022 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency;
12023 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
12024 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod;
12025 CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
12026
12027 // Stutter Efficiency
12028 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
12029
12030 #ifdef __DML_VBA_ALLOW_DELTA__
12031 // Calculate z8 stutter eff assuming 0 reserved space
12032 CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0;
12033 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0;
12034
12035 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase;
12036 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase;
12037 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase;
12038 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase;
12039
12040 // Stutter Efficiency
12041 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
12042 #else
12043 mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
12044 mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency;
12045 mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
12046 mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod;
12047 #endif
12048 } // PrefetchAndImmediateFlipSupported
12049
12050 max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0;
12051 min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz;
12052 mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles;
12053 mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles);
12054 DML2_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256);
12055
12056 #ifdef __DML_VBA_DEBUG__
12057 dml2_printf("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz);
12058 dml2_printf("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz);
12059 dml2_printf("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz);
12060 dml2_printf("DML::%s: min_return_uclk_cycles = %d\n", __func__, min_return_uclk_cycles);
12061 dml2_printf("DML::%s: min_return_fclk_cycles = %d\n", __func__, min_return_fclk_cycles);
12062 dml2_printf("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles);
12063 dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
12064 dml2_printf("DML::%s: --- END --- \n", __func__);
12065 #endif
12066 return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported);
12067 }
12068
dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex * in_out_params)12069 bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params)
12070 {
12071 dml2_printf("DML::%s: ------------- START ----------\n", __func__);
12072 bool result = dml_core_mode_programming(in_out_params);
12073
12074 dml2_printf("DML::%s: result = %0d\n", __func__, result);
12075 dml2_printf("DML::%s: ------------- DONE ----------\n", __func__);
12076 return result;
12077 }
12078
dml2_core_calcs_get_dpte_row_height(unsigned int * dpte_row_height,struct dml2_core_internal_display_mode_lib * mode_lib,bool is_plane1,enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,enum dml2_rotation_angle ScanDirection,unsigned int pitch,unsigned int GPUVMMinPageSizeKBytes)12079 void dml2_core_calcs_get_dpte_row_height(
12080 unsigned int *dpte_row_height,
12081 struct dml2_core_internal_display_mode_lib *mode_lib,
12082 bool is_plane1,
12083 enum dml2_source_format_class SourcePixelFormat,
12084 enum dml2_swizzle_mode SurfaceTiling,
12085 enum dml2_rotation_angle ScanDirection,
12086 unsigned int pitch,
12087 unsigned int GPUVMMinPageSizeKBytes)
12088 {
12089 unsigned int BytePerPixelY;
12090 unsigned int BytePerPixelC;
12091 double BytePerPixelInDETY;
12092 double BytePerPixelInDETC;
12093 unsigned int BlockHeight256BytesY;
12094 unsigned int BlockHeight256BytesC;
12095 unsigned int BlockWidth256BytesY;
12096 unsigned int BlockWidth256BytesC;
12097 unsigned int MacroTileWidthY;
12098 unsigned int MacroTileWidthC;
12099 unsigned int MacroTileHeightY;
12100 unsigned int MacroTileHeightC;
12101 bool surf_linear_128_l = false;
12102 bool surf_linear_128_c = false;
12103
12104 CalculateBytePerPixelAndBlockSizes(
12105 SourcePixelFormat,
12106 SurfaceTiling,
12107 pitch,
12108 pitch,
12109
12110 /* Output */
12111 &BytePerPixelY,
12112 &BytePerPixelC,
12113 &BytePerPixelInDETY,
12114 &BytePerPixelInDETC,
12115 &BlockHeight256BytesY,
12116 &BlockHeight256BytesC,
12117 &BlockWidth256BytesY,
12118 &BlockWidth256BytesC,
12119 &MacroTileHeightY,
12120 &MacroTileHeightC,
12121 &MacroTileWidthY,
12122 &MacroTileWidthC,
12123 &surf_linear_128_l,
12124 &surf_linear_128_c);
12125
12126 unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
12127 unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
12128 unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
12129 unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
12130 unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
12131 unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
12132 #ifdef __DML_VBA_DEBUG__
12133 dml2_printf("DML: %s: is_plane1 = %u\n", __func__, is_plane1);
12134 dml2_printf("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
12135 dml2_printf("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
12136 dml2_printf("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
12137 dml2_printf("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
12138 dml2_printf("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
12139 dml2_printf("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
12140 dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
12141 dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
12142 dml2_printf("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
12143 #endif
12144 unsigned int dummy_integer[21];
12145
12146 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0;
12147 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0;
12148 mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1;
12149 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes;
12150 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes;
12151 mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat;
12152 mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling;
12153 mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel;
12154 mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection;
12155 mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0;
12156 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0;
12157 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0;
12158 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0;
12159 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1;
12160 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4;
12161 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes;
12162 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests;
12163 mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch;
12164 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth;
12165 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight;
12166 mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0;
12167 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0;
12168 mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0;
12169
12170 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1];
12171 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2];
12172 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3];
12173 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height;
12174 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4];
12175 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5];
12176 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6];
12177 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7];
12178 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8];
12179 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9];
12180 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11];
12181 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12];
12182 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13];
12183 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14];
12184
12185 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15];
12186 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16];
12187 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17];
12188 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18];
12189 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19];
12190 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20];
12191
12192 // just supply with enough parameters to calculate dpte
12193 CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params);
12194
12195 #ifdef __DML_VBA_DEBUG__
12196 dml2_printf("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
12197 #endif
12198 }
12199
is_dual_plane(enum dml2_source_format_class source_format)12200 static bool is_dual_plane(enum dml2_source_format_class source_format)
12201 {
12202 bool ret_val = 0;
12203
12204 if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha))
12205 ret_val = 1;
12206
12207 return ret_val;
12208 }
12209
dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)12210 static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
12211 {
12212 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
12213 return plane_idx;
12214 }
12215
rq_dlg_get_wm_regs(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * wm_regs)12216 static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs)
12217 {
12218 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12219
12220 wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
12221 wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
12222 wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
12223 wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz);
12224 wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
12225 wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
12226 wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
12227 wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz);
12228 wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz);
12229 wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000);
12230 wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000);
12231 wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000);
12232 }
12233
log_and_substract_if_non_zero(unsigned int a,unsigned int subtrahend)12234 static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend)
12235 {
12236 if (a == 0)
12237 return 0;
12238
12239 return (math_log2_approx(a) - subtrahend);
12240 }
12241
dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs * cursor_dlg_regs,const struct dml2_get_cursor_dlg_reg * p)12242 void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p)
12243 {
12244 int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) -
12245 (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio);
12246 cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0);
12247
12248 #ifdef __DML_VBA_DEBUG__
12249 dml2_printf("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position);
12250 dml2_printf("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz);
12251 dml2_printf("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz);
12252 dml2_printf("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset);
12253 dml2_printf("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset);
12254 #endif
12255
12256 cursor_dlg_regs->chunk_hdl_adjust = 3;
12257 cursor_dlg_regs->dst_y_offset = 0;
12258
12259 cursor_dlg_regs->qos_level_fixed = 8;
12260 cursor_dlg_regs->qos_ramp_disable = 0;
12261 }
12262
rq_dlg_get_rq_reg(struct dml2_display_rq_regs * rq_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)12263 static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
12264 const struct dml2_display_cfg *display_cfg,
12265 const struct dml2_core_internal_display_mode_lib *mode_lib,
12266 unsigned int pipe_idx)
12267 {
12268 unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
12269 enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format;
12270 enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling;
12271 bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format));
12272
12273 unsigned int pixel_chunk_bytes = 0;
12274 unsigned int min_pixel_chunk_bytes = 0;
12275 unsigned int dpte_group_bytes = 0;
12276 unsigned int mpte_group_bytes = 0;
12277
12278 unsigned int p1_pixel_chunk_bytes = 0;
12279 unsigned int p1_min_pixel_chunk_bytes = 0;
12280 unsigned int p1_dpte_group_bytes = 0;
12281 unsigned int p1_mpte_group_bytes = 0;
12282
12283 unsigned int detile_buf_plane1_addr = 0;
12284 unsigned int detile_buf_size_in_bytes;
12285 double stored_swath_l_bytes;
12286 double stored_swath_c_bytes;
12287 bool is_phantom_pipe;
12288
12289 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx);
12290
12291 pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024);
12292 min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes);
12293
12294 if (pixel_chunk_bytes == 64 * 1024)
12295 min_pixel_chunk_bytes = 0;
12296
12297 dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx));
12298 mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx));
12299
12300 p1_pixel_chunk_bytes = pixel_chunk_bytes;
12301 p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
12302 p1_dpte_group_bytes = dpte_group_bytes;
12303 p1_mpte_group_bytes = mpte_group_bytes;
12304
12305 if (source_format == dml2_rgbe_alpha)
12306 p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024);
12307
12308 rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib);
12309 rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10);
12310 rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10);
12311
12312 if (min_pixel_chunk_bytes == 0)
12313 rq_regs->rq_regs_l.min_chunk_size = 0;
12314 else
12315 rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1);
12316
12317 if (p1_min_pixel_chunk_bytes == 0)
12318 rq_regs->rq_regs_c.min_chunk_size = 0;
12319 else
12320 rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1);
12321
12322 rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6);
12323 rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6);
12324 rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6);
12325 rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6);
12326
12327 detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024);
12328
12329 if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) {
12330 unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx));
12331 #ifdef __DML_VBA_DEBUG__
12332 dml2_printf("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear);
12333 #endif
12334 DML2_ASSERT(p0_pte_row_height_linear >= 8);
12335
12336 rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3;
12337 if (dual_plane) {
12338 unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx));
12339
12340 #ifdef __DML_VBA_DEBUG__
12341 dml2_printf("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear);
12342 #endif
12343 if (sw_mode == dml2_sw_linear) {
12344 DML2_ASSERT(p1_pte_row_height_linear >= 8);
12345 }
12346 rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3;
12347 }
12348 } else {
12349 rq_regs->rq_regs_l.pte_row_height_linear = 0;
12350 rq_regs->rq_regs_c.pte_row_height_linear = 0;
12351 }
12352
12353 rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0);
12354 rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0);
12355
12356 // FIXME_DCN4, programming guide has dGPU condition
12357 if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
12358 rq_regs->drq_expansion_mode = 0;
12359 } else {
12360 rq_regs->drq_expansion_mode = 2;
12361 }
12362 rq_regs->prq_expansion_mode = 1;
12363 rq_regs->crq_expansion_mode = 1;
12364 rq_regs->mrq_expansion_mode = 1;
12365
12366 stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx);
12367 stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx);
12368 is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx);
12369
12370 // Note: detile_buf_plane1_addr is in unit of 1KB
12371 if (dual_plane) {
12372 if (is_phantom_pipe) {
12373 detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma
12374 } else {
12375 if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
12376 detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
12377 #ifdef __DML_VBA_DEBUG__
12378 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
12379 #endif
12380 } else {
12381 detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma
12382 #ifdef __DML_VBA_DEBUG__
12383 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
12384 #endif
12385 }
12386 }
12387 }
12388 rq_regs->plane1_base_address = detile_buf_plane1_addr;
12389
12390 #ifdef __DML_VBA_DEBUG__
12391 dml2_printf("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
12392 dml2_printf("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
12393 dml2_printf("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
12394 dml2_printf("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
12395 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
12396 dml2_printf("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
12397 #endif
12398 //dml2_printf_rq_regs_st(rq_regs);
12399 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
12400 }
12401
rq_dlg_get_dlg_reg(struct dml2_core_internal_scratch * s,struct dml2_display_dlg_regs * disp_dlg_regs,struct dml2_display_ttu_regs * disp_ttu_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,const unsigned int pipe_idx)12402 static void rq_dlg_get_dlg_reg(
12403 struct dml2_core_internal_scratch *s,
12404 struct dml2_display_dlg_regs *disp_dlg_regs,
12405 struct dml2_display_ttu_regs *disp_ttu_regs,
12406 const struct dml2_display_cfg *display_cfg,
12407 const struct dml2_core_internal_display_mode_lib *mode_lib,
12408 const unsigned int pipe_idx)
12409 {
12410 struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals;
12411
12412 memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals));
12413
12414 dml2_printf("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);
12415
12416 l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
12417 DML2_ASSERT(l->plane_idx < DML2_MAX_PLANES);
12418
12419 l->source_format = dml2_444_8;
12420 l->odm_mode = dml2_odm_mode_bypass;
12421 l->dual_plane = false;
12422 l->htotal = 0;
12423 l->hactive = 0;
12424 l->hblank_end = 0;
12425 l->vblank_end = 0;
12426 l->interlaced = false;
12427 l->pclk_freq_in_mhz = 0.0;
12428 l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12429 l->ref_freq_to_pix_freq = 0.0;
12430
12431 if (l->plane_idx < DML2_MAX_PLANES) {
12432
12433 l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing;
12434 l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format;
12435 l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx];
12436
12437 l->dual_plane = is_dual_plane(l->source_format);
12438
12439 l->htotal = l->timing->h_total;
12440 l->hactive = l->timing->h_active;
12441 l->hblank_end = l->timing->h_blank_end;
12442 l->vblank_end = l->timing->v_blank_end;
12443 l->interlaced = l->timing->interlaced;
12444 l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000;
12445 l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz;
12446
12447 dml2_printf("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx);
12448 dml2_printf("DML_DLG: %s: htotal = %d\n", __func__, l->htotal);
12449 dml2_printf("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz);
12450 dml2_printf("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz);
12451 dml2_printf("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.dchub_refclk_mhz);
12452 dml2_printf("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz);
12453 dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
12454 dml2_printf("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced);
12455
12456 DML2_ASSERT(l->refclk_freq_in_mhz != 0);
12457 DML2_ASSERT(l->pclk_freq_in_mhz != 0);
12458 DML2_ASSERT(l->ref_freq_to_pix_freq < 4.0);
12459
12460 // Need to figure out which side of odm combine we're in
12461 // Assume the pipe instance under the same plane is in order
12462
12463 if (l->odm_mode == dml2_odm_mode_bypass) {
12464 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq);
12465 } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) {
12466 // find out how many pipe are in this plane
12467 l->num_active_pipes = mode_lib->mp.num_active_pipes;
12468 l->first_pipe_idx_in_plane = DML2_MAX_PLANES;
12469 l->pipe_idx_in_combine = 0; // pipe index within the plane
12470 l->odm_combine_factor = 2;
12471
12472 if (l->odm_mode == dml2_odm_mode_combine_3to1)
12473 l->odm_combine_factor = 3;
12474 else if (l->odm_mode == dml2_odm_mode_combine_4to1)
12475 l->odm_combine_factor = 4;
12476
12477 for (unsigned int i = 0; i < l->num_active_pipes; i++) {
12478 if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) {
12479 if (i < l->first_pipe_idx_in_plane) {
12480 l->first_pipe_idx_in_plane = i;
12481 }
12482 }
12483 }
12484 l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.)
12485
12486 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq);
12487 dml2_printf("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx);
12488 dml2_printf("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane);
12489 dml2_printf("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine);
12490 dml2_printf("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor);
12491 }
12492 dml2_printf("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end);
12493
12494 DML2_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13));
12495
12496 disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19));
12497 disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8));
12498 disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits
12499
12500 l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]];
12501 l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]);
12502
12503 dml2_printf("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank);
12504 dml2_printf("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start);
12505 dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
12506
12507 l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]);
12508 disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0;
12509
12510 dml2_printf("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
12511
12512 l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
12513 l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
12514
12515 dml2_printf("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler);
12516 dml2_printf("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler);
12517
12518 l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]];
12519 l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
12520 l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
12521 l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]];
12522 l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]];
12523
12524 dml2_printf("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch);
12525 dml2_printf("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip);
12526 dml2_printf("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip);
12527 dml2_printf("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank);
12528 dml2_printf("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank);
12529
12530 if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) {
12531 DML2_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank));
12532 }
12533
12534 l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]];
12535 l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]];
12536
12537 dml2_printf("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l);
12538 dml2_printf("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c);
12539
12540 // Active
12541 l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12542 l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12543
12544 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l);
12545 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l);
12546
12547 l->refcyc_per_line_delivery_pre_c = 0.0;
12548 l->refcyc_per_line_delivery_c = 0.0;
12549
12550 if (l->dual_plane) {
12551 l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12552 l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12553
12554 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c);
12555 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c);
12556 }
12557
12558 disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12559 disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12560
12561 l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12562 l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12563
12564 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l);
12565 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l);
12566
12567 l->refcyc_per_req_delivery_pre_c = 0.0;
12568 l->refcyc_per_req_delivery_c = 0.0;
12569 if (l->dual_plane) {
12570 l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12571 l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12572
12573 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c);
12574 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c);
12575 }
12576
12577 // TTU - Cursor
12578 DML2_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1);
12579
12580 // Assign to register structures
12581 disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2));
12582 DML2_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18));
12583
12584 disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line
12585 disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk
12586 disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2));
12587 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2));
12588 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2));
12589 disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2));
12590 disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2));
12591
12592 disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19));
12593 disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19));
12594
12595 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
12596 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
12597 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
12598 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
12599
12600 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12601 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12602 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
12603 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
12604
12605 l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
12606 l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
12607 l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12608 l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12609 l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12610 l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12611 l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12612 l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12613 l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12614
12615 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2));
12616 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2));
12617
12618 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l);
12619 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c);
12620 disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l);
12621 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c);
12622 disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l);
12623 disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c);
12624 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1);
12625 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1);
12626 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1);
12627 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1);
12628
12629 l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
12630 l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
12631 l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12632 l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12633 l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12634 l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12635 l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12636 l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12637
12638 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2));
12639 disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2));
12640 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l);
12641 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c);
12642 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l);
12643 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c);
12644 disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l);
12645 disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c);
12646
12647 disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group);
12648 disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
12649
12650 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10));
12651 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10));
12652 disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10));
12653 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10));
12654 disp_ttu_regs->qos_level_low_wm = 0;
12655
12656 disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq);
12657
12658 disp_ttu_regs->qos_level_flip = 14;
12659 disp_ttu_regs->qos_level_fixed_l = 8;
12660 disp_ttu_regs->qos_level_fixed_c = 8;
12661 disp_ttu_regs->qos_ramp_disable_l = 0;
12662 disp_ttu_regs->qos_ramp_disable_c = 0;
12663 disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz);
12664
12665 // CHECK for HW registers' range, DML2_ASSERT or clamp
12666 DML2_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13));
12667 DML2_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13));
12668 DML2_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13));
12669 DML2_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13));
12670 if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23))
12671 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1);
12672
12673 if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23))
12674 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1);
12675
12676 if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23))
12677 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1);
12678
12679 if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23))
12680 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1);
12681
12682
12683 DML2_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8);
12684 DML2_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13));
12685
12686 if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) {
12687 dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1);
12688 l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1;
12689 }
12690 if (l->dual_plane) {
12691 if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) {
12692 dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1);
12693 l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1;
12694 }
12695 }
12696
12697 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23))
12698 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1);
12699 if (l->dual_plane) {
12700 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23))
12701 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1);
12702 }
12703 DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13));
12704 if (l->dual_plane) {
12705 DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13));
12706 }
12707
12708 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13));
12709 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13));
12710 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13));
12711 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13));
12712 DML2_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14));
12713 DML2_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14));
12714 DML2_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24));
12715
12716 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
12717
12718 }
12719 }
12720
rq_dlg_get_arb_params(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * arb_param)12721 static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param)
12722 {
12723 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
12724
12725 arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs;
12726 arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max
12727 arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4;
12728 arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit;
12729 arb_param->sat_level_us = 60;
12730 arb_param->hvm_max_qos_commit_threshold = 0xf;
12731 arb_param->hvm_min_req_outstand_commit_threshold = 0xa;
12732 arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024;
12733 arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
12734 arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib);
12735 arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
12736 arb_param->pstate_stall_threshold = (unsigned int)(mode_lib->ip_caps.fams2.max_allow_delay_us * refclk_freq_in_mhz);
12737
12738 #ifdef __DML_VBA_DEBUG__
12739 dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding);
12740 dml2_printf("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit);
12741 dml2_printf("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes);
12742 dml2_printf("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req);
12743 dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis);
12744 #endif
12745
12746 }
12747
dml2_core_calcs_get_watermarks(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * out)12748 void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out)
12749 {
12750 rq_dlg_get_wm_regs(display_cfg, mode_lib, out);
12751 }
12752
dml2_core_calcs_get_arb_params(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * out)12753 void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out)
12754 {
12755 rq_dlg_get_arb_params(display_cfg, mode_lib, out);
12756 }
12757
dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg * display_cfg,struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_per_pipe_register_set * out,int pipe_index)12758 void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg,
12759 struct dml2_core_internal_display_mode_lib *mode_lib,
12760 struct dml2_dchub_per_pipe_register_set *out, int pipe_index)
12761 {
12762 rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index);
12763 rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index);
12764 out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
12765 }
12766
dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,union dml2_global_sync_programming * out,int pipe_index)12767 void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index)
12768 {
12769 out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index);
12770 out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index);
12771 out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index);
12772 out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index);
12773 out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index);
12774 }
12775
dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_per_stream_programming * out,int pipe_index)12776 void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index)
12777 {
12778 dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index);
12779 }
12780
dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,const struct display_configuation_with_meta * display_cfg,struct dmub_cmd_fams2_global_config * fams2_global_config)12781 void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
12782 const struct display_configuation_with_meta *display_cfg,
12783 struct dmub_cmd_fams2_global_config *fams2_global_config)
12784 {
12785 fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required;
12786
12787 if (fams2_global_config->features.bits.enable) {
12788 fams2_global_config->features.bits.enable_stall_recovery = true;
12789 fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START;
12790
12791 fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us;
12792 fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us;
12793 fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us;
12794 fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us;
12795
12796 fams2_global_config->num_streams = display_cfg->display_config.num_streams;
12797 }
12798 }
12799
dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,const struct display_configuation_with_meta * display_cfg,union dmub_cmd_fams2_config * fams2_base_programming,union dmub_cmd_fams2_config * fams2_sub_programming,enum dml2_pstate_method pstate_method,int plane_index)12800 void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
12801 const struct display_configuation_with_meta *display_cfg,
12802 union dmub_cmd_fams2_config *fams2_base_programming,
12803 union dmub_cmd_fams2_config *fams2_sub_programming,
12804 enum dml2_pstate_method pstate_method,
12805 int plane_index)
12806 {
12807 const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index];
12808 const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index];
12809 const struct dml2_fams2_meta *stream_fams2_meta = &display_cfg->stage3.stream_fams2_meta[plane_descriptor->stream_index];
12810
12811 struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base;
12812 union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state;
12813
12814 unsigned int i;
12815
12816 if (display_cfg->display_config.overrides.all_streams_blanked) {
12817 /* stream is blanked, so do nothing */
12818 return;
12819 }
12820
12821 /* from display configuration */
12822 base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total;
12823 base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total;
12824 base_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal -
12825 stream_descriptor->timing.v_front_porch);
12826 base_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal -
12827 stream_descriptor->timing.v_front_porch -
12828 stream_descriptor->timing.v_active);
12829 base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled;
12830
12831 /* from meta */
12832 base_programming->otg_vline_time_ns =
12833 (unsigned int)(stream_fams2_meta->otg_vline_time_us * 1000.0);
12834 base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines;
12835 base_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines;
12836 base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines;
12837 base_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal -
12838 stream_descriptor->timing.v_front_porch -
12839 stream_fams2_meta->method_drr.programming_delay_otg_vlines);
12840 base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines;
12841 base_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal;
12842
12843 /* from core */
12844 base_programming->config.bits.min_ttu_vblank_usable = true;
12845 for (i = 0; i < display_cfg->display_config.num_planes; i++) {
12846 /* check if all planes support p-state in blank */
12847 if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index &&
12848 mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) {
12849 base_programming->config.bits.min_ttu_vblank_usable = false;
12850 break;
12851 }
12852 }
12853
12854 switch (pstate_method) {
12855 case dml2_pstate_method_vactive:
12856 case dml2_pstate_method_fw_vactive_drr:
12857 /* legacy vactive */
12858 base_programming->type = FAMS2_STREAM_TYPE_VACTIVE;
12859 sub_programming->legacy.vactive_det_fill_delay_otg_vlines =
12860 (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines;
12861 base_programming->allow_start_otg_vline =
12862 (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline;
12863 base_programming->allow_end_otg_vline =
12864 (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline;
12865 base_programming->config.bits.clamp_vtotal_min = true;
12866 break;
12867 case dml2_pstate_method_vblank:
12868 case dml2_pstate_method_fw_vblank_drr:
12869 /* legacy vblank */
12870 base_programming->type = FAMS2_STREAM_TYPE_VBLANK;
12871 base_programming->allow_start_otg_vline =
12872 (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline;
12873 base_programming->allow_end_otg_vline =
12874 (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline;
12875 base_programming->config.bits.clamp_vtotal_min = true;
12876 break;
12877 case dml2_pstate_method_fw_drr:
12878 /* drr */
12879 base_programming->type = FAMS2_STREAM_TYPE_DRR;
12880 sub_programming->drr.programming_delay_otg_vlines =
12881 (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines;
12882 sub_programming->drr.nom_stretched_vtotal =
12883 (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal;
12884 base_programming->allow_start_otg_vline =
12885 (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline;
12886 base_programming->allow_end_otg_vline =
12887 (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline;
12888 /* drr only clamps to vtotal min for single display */
12889 base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1;
12890 sub_programming->drr.only_stretch_if_required = true;
12891 break;
12892 case dml2_pstate_method_fw_svp:
12893 case dml2_pstate_method_fw_svp_drr:
12894 /* subvp */
12895 base_programming->type = FAMS2_STREAM_TYPE_SUBVP;
12896 sub_programming->subvp.vratio_numerator =
12897 (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0);
12898 sub_programming->subvp.vratio_denominator = 1000;
12899 sub_programming->subvp.programming_delay_otg_vlines =
12900 (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines;
12901 sub_programming->subvp.prefetch_to_mall_otg_vlines =
12902 (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines;
12903 sub_programming->subvp.phantom_vtotal =
12904 (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal;
12905 sub_programming->subvp.phantom_vactive =
12906 (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive;
12907 sub_programming->subvp.config.bits.is_multi_planar =
12908 plane_descriptor->surface.plane1.height > 0;
12909 sub_programming->subvp.config.bits.is_yuv420 =
12910 plane_descriptor->pixel_format == dml2_420_8 ||
12911 plane_descriptor->pixel_format == dml2_420_10 ||
12912 plane_descriptor->pixel_format == dml2_420_12;
12913
12914 base_programming->allow_start_otg_vline =
12915 (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline;
12916 base_programming->allow_end_otg_vline =
12917 (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline;
12918 base_programming->config.bits.clamp_vtotal_min = true;
12919 break;
12920 case dml2_pstate_method_reserved_hw:
12921 case dml2_pstate_method_reserved_fw:
12922 case dml2_pstate_method_reserved_fw_drr_clamped:
12923 case dml2_pstate_method_reserved_fw_drr_var:
12924 case dml2_pstate_method_na:
12925 case dml2_pstate_method_count:
12926 default:
12927 /* this should never happen */
12928 break;
12929 }
12930 }
12931
dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_mcache_surface_allocation * out,int plane_idx)12932 void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx)
12933 {
12934 unsigned int n;
12935
12936 out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx);
12937 out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx);
12938 out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx);
12939 out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx);
12940
12941 for (n = 0; n < out->num_mcaches_plane0; n++)
12942 out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n);
12943
12944 for (n = 0; n < out->num_mcaches_plane1; n++)
12945 out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n);
12946
12947 out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx);
12948 out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx);
12949 out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx);
12950 out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx);
12951 out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx);
12952
12953 out->valid = true;
12954 }
12955
dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int * out,int pipe_index)12956 void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index)
12957 {
12958 *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index);
12959 }
12960
dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_plane_support_info * out,int plane_idx)12961 void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx)
12962 {
12963 out->mall_svp_size_requirement_ways = 0;
12964
12965 out->nominal_vblank_pstate_latency_hiding_us =
12966 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total /
12967 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]);
12968
12969 out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx];
12970
12971 out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx];
12972
12973 out->dram_change_vactive_det_fill_delay_us = (unsigned int)math_ceil(mode_lib->ms.dram_change_vactive_det_fill_delay_us[plane_idx]);
12974 }
12975
dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_stream_support_info * out,int plane_index)12976 void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index)
12977 {
12978 double phantom_processing_delay_pix;
12979 unsigned int phantom_processing_delay_lines;
12980 unsigned int phantom_min_v_active_lines;
12981 unsigned int phantom_v_active_lines;
12982 unsigned int phantom_v_startup_lines;
12983 unsigned int phantom_v_blank_lines;
12984 unsigned int main_v_blank_lines;
12985 unsigned int rem;
12986
12987 phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) *
12988 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000));
12989 phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total);
12990 dml2_core_div_rem(phantom_processing_delay_pix,
12991 display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total,
12992 &rem);
12993 if (rem)
12994 phantom_processing_delay_lines++;
12995
12996 phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index);
12997 phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) /
12998 display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio);
12999 phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines;
13000
13001 // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank)
13002 phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1;
13003 main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active;
13004 if (phantom_v_blank_lines > main_v_blank_lines)
13005 phantom_v_blank_lines = main_v_blank_lines;
13006
13007 out->phantom_v_active = phantom_v_active_lines;
13008 // phantom_vtotal = vactive + vblank
13009 out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines;
13010
13011 out->phantom_min_v_active = phantom_min_v_active_lines;
13012 out->phantom_v_startup = phantom_v_startup_lines;
13013
13014 out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000;
13015 #if defined(__DML_VBA_DEBUG__)
13016 dml2_printf("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us);
13017 dml2_printf("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us);
13018 dml2_printf("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines);
13019 dml2_printf("DML::%s: vblank_reserved_time_us = %f\n", __func__, out->vblank_reserved_time_us);
13020 #endif
13021 }
13022
dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_cfg_programming * out)13023 void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out)
13024 {
13025 unsigned int k, n;
13026
13027 out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport;
13028 out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport;
13029 out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport;
13030 out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport;
13031 out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport;
13032 out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420;
13033 out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false;
13034 out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported;
13035 out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion;
13036 out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated;
13037 out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated;
13038 out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP;
13039 out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink;
13040 out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO;
13041 out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport;
13042 out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport;
13043 out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support;
13044 out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport;
13045 out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport;
13046 out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport;
13047 out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface;
13048 out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
13049 out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
13050 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen;
13051 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState;
13052 out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize;
13053 out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits;
13054 out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.temp_read_or_ppt_support;
13055 out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support;
13056
13057 out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots;
13058 out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits;
13059 out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices;
13060 out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport;
13061 out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported;
13062 out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported;
13063 out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport;
13064
13065 out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport;
13066 out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport;
13067 out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance;
13068 out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded;
13069 out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded;
13070
13071 out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport;
13072 out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport;
13073 out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport;
13074
13075 out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported;
13076 out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported;
13077 out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported;
13078 out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support;
13079 out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport;
13080 out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport;
13081
13082 for (k = 0; k < out->display_config.num_planes; k++) {
13083
13084 out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k];
13085 out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k];
13086 out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k];
13087 out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k];
13088 out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k];
13089 out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k];
13090 out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k];
13091 out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k];
13092
13093 if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown)
13094 out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown;
13095 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp)
13096 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp;
13097 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp)
13098 out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp;
13099 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0)
13100 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0;
13101 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi)
13102 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi;
13103 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl)
13104 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl;
13105
13106 if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown)
13107 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown;
13108 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr)
13109 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr;
13110 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2)
13111 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2;
13112 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3)
13113 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3;
13114 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10)
13115 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10;
13116 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5)
13117 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5;
13118 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20)
13119 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20;
13120 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3)
13121 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3;
13122 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3)
13123 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3;
13124 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4)
13125 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4;
13126 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4)
13127 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4;
13128 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4)
13129 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4;
13130 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4)
13131 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4;
13132
13133 out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k];
13134 out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k];
13135 }
13136
13137 out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib);
13138 out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib);
13139 out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib);
13140 out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib);
13141
13142 out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib);
13143 out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib);
13144 out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib);
13145 out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib);
13146 out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib);
13147 out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib);
13148 out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib);
13149 out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib);
13150
13151 out->informative.mall.total_surface_size_in_mall_bytes = 0;
13152 out->informative.dpp.total_num_dpps_required = 0;
13153 for (k = 0; k < out->display_config.num_planes; ++k) {
13154 out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k];
13155 out->informative.dpp.total_num_dpps_required += mode_lib->mp.NoOfDPP[k];
13156 }
13157
13158 out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk;
13159 out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib);
13160
13161 out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib);
13162 out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib);
13163 out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib);
13164
13165 out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib);
13166 out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib);
13167 out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib);
13168 out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib);
13169 out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib);
13170
13171 out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib);
13172 out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib);
13173 out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib);
13174 out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib);
13175
13176 out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib);
13177 out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib);
13178 out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib);
13179 out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib);
13180
13181 out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib);
13182 out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib);
13183 out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib);
13184
13185 out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib);
13186 out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib);
13187 out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib);
13188
13189 out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib);
13190 out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib);
13191 out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib);
13192 out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib);
13193
13194 out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib);
13195 out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib);
13196 out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib);
13197 out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib);
13198
13199 out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib);
13200 out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib);
13201 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib);
13202 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib);
13203
13204 out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib);
13205 out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib);
13206 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib);
13207 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib);
13208
13209 out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib);
13210 out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib);
13211
13212 out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib);
13213 out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib);
13214 out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
13215
13216 out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib);
13217 out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib);
13218 out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib);
13219
13220 out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_z8(mode_lib);
13221 out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib);
13222 out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib);
13223 out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib);
13224
13225 out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib);
13226 out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib);
13227 out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib);
13228
13229 out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib);
13230
13231 out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib);
13232
13233 out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib);
13234
13235 out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000;
13236
13237 for (k = 0; k < out->display_config.num_planes; k++) {
13238
13239 if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us)
13240 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
13241 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
13242 out->informative.misc.PrefetchMode[k] = 0;
13243 else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
13244 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
13245 out->informative.misc.PrefetchMode[k] = 1;
13246 else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)
13247 out->informative.misc.PrefetchMode[k] = 2;
13248 else
13249 out->informative.misc.PrefetchMode[k] = 3;
13250
13251 out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k];
13252 out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k];
13253 out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k];
13254 out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k];
13255 out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k];
13256 out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k];
13257 out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k];
13258 out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k];
13259 out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k];
13260 out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k];
13261 out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k];
13262 out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k];
13263 out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k];
13264 out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k];
13265 out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k];
13266 out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k];
13267 out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k];
13268 out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k];
13269 out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k];
13270 out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k];
13271 out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k];
13272 out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k];
13273 out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k];
13274 out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k];
13275 out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k];
13276 out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k];
13277 out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k];
13278 out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k];
13279 out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k];
13280 out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k];
13281 out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k];
13282 out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k];
13283 out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k];
13284 out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k];
13285 out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k];
13286 out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k];
13287 out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k];
13288 out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k];
13289 out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k];
13290 out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k];
13291 out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k];
13292 out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k];
13293 out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k];
13294 out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k];
13295
13296 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k];
13297 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k];
13298 out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k];
13299 out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k];
13300 out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k];
13301 out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k];
13302 out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k];
13303 out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k];
13304 out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k];
13305 out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k];
13306 out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k];
13307 out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k];
13308 out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k];
13309 out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k];
13310 out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k];
13311 out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k];
13312 out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k];
13313 out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k];
13314 out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k];
13315
13316 out->informative.misc.WritebackRequiredBandwidth = mode_lib->scratch.dml_core_mode_programming_locals.TotalWRBandwidth / 1000.0;
13317 out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k];
13318 out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k];
13319 out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k];
13320 out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k];
13321 out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k];
13322 out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k];
13323 out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k];
13324
13325 if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin)
13326 out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k];
13327 }
13328
13329 // For this DV informative layer, all pipes in the same planes will just use the same id
13330 // will have the optimization and helper layer later on
13331 // only work when we can have high "mcache" that fit everything without thrashing the cache
13332 for (k = 0; k < out->display_config.num_planes; k++) {
13333 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k);
13334 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k);
13335
13336 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) {
13337 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n);
13338 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k;
13339 }
13340
13341 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k);
13342 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k);
13343
13344 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) {
13345 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n);
13346 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k;
13347 }
13348 }
13349 out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib);
13350
13351 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
13352 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
13353 / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) {
13354 out->informative.misc.ROBUrgencyAvoidance = true;
13355 } else {
13356 out->informative.misc.ROBUrgencyAvoidance = false;
13357 }
13358 } else {
13359 out->informative.misc.ROBUrgencyAvoidance = true;
13360 }
13361 }
13362