14e4bbe73SMonk Liu /*
24e4bbe73SMonk Liu * Copyright 2016 Advanced Micro Devices, Inc.
34e4bbe73SMonk Liu *
44e4bbe73SMonk Liu * Permission is hereby granted, free of charge, to any person obtaining a
54e4bbe73SMonk Liu * copy of this software and associated documentation files (the "Software"),
64e4bbe73SMonk Liu * to deal in the Software without restriction, including without limitation
74e4bbe73SMonk Liu * the rights to use, copy, modify, merge, publish, distribute, sublicense,
84e4bbe73SMonk Liu * and/or sell copies of the Software, and to permit persons to whom the
94e4bbe73SMonk Liu * Software is furnished to do so, subject to the following conditions:
104e4bbe73SMonk Liu *
114e4bbe73SMonk Liu * The above copyright notice and this permission notice shall be included in
124e4bbe73SMonk Liu * all copies or substantial portions of the Software.
134e4bbe73SMonk Liu *
144e4bbe73SMonk Liu * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
154e4bbe73SMonk Liu * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
164e4bbe73SMonk Liu * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
174e4bbe73SMonk Liu * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
184e4bbe73SMonk Liu * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
194e4bbe73SMonk Liu * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
204e4bbe73SMonk Liu * OTHER DEALINGS IN THE SOFTWARE.
214e4bbe73SMonk Liu *
224e4bbe73SMonk Liu */
234e4bbe73SMonk Liu
24f867723bSSam Ravnborg #include <linux/module.h>
25f867723bSSam Ravnborg
26eb85fc23SYongqiang Sun #ifdef CONFIG_X86
27eb85fc23SYongqiang Sun #include <asm/hypervisor.h>
28eb85fc23SYongqiang Sun #endif
29eb85fc23SYongqiang Sun
30f867723bSSam Ravnborg #include <drm/drm_drv.h>
3178b12008SMarek Marczykowski-Górecki #include <xen/xen.h>
32f867723bSSam Ravnborg
334e4bbe73SMonk Liu #include "amdgpu.h"
345278a159SStanley.Yang #include "amdgpu_ras.h"
35ab66c832SZhigang Luo #include "amdgpu_reset.h"
36f83cec3bSVictor Skvortsov #include "amdgpu_dpm.h"
37c1299461SWenhui Sheng #include "vi.h"
38c1299461SWenhui Sheng #include "soc15.h"
39c1299461SWenhui Sheng #include "nv.h"
404e4bbe73SMonk Liu
41519b8b76SBokun Zhang #define POPULATE_UCODE_INFO(vf2pf_info, ucode, ver) \
42519b8b76SBokun Zhang do { \
43519b8b76SBokun Zhang vf2pf_info->ucode_info[ucode].id = ucode; \
44519b8b76SBokun Zhang vf2pf_info->ucode_info[ucode].version = ver; \
45519b8b76SBokun Zhang } while (0)
46519b8b76SBokun Zhang
amdgpu_virt_mmio_blocked(struct amdgpu_device * adev)47a16f8f11Spding bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
48a16f8f11Spding {
49a16f8f11Spding /* By now all MMIO pages except mailbox are blocked */
50a16f8f11Spding /* if blocking is enabled in hypervisor. Choose the */
51a16f8f11Spding /* SCRATCH_REG0 to test. */
52a16f8f11Spding return RREG32_NO_KIQ(0xc040) == 0xffffffff;
53a16f8f11Spding }
54a16f8f11Spding
amdgpu_virt_init_setting(struct amdgpu_device * adev)55bc992ba5SXiangliang Yu void amdgpu_virt_init_setting(struct amdgpu_device *adev)
56bc992ba5SXiangliang Yu {
57e431eb80SAlex Deucher struct drm_device *ddev = adev_to_drm(adev);
58e431eb80SAlex Deucher
5906465d8eSXiangliang Yu /* enable virtual display */
60e7de0d84SZhigang Luo if (adev->asic_type != CHIP_ALDEBARAN &&
61731b4846SYang Wang adev->asic_type != CHIP_ARCTURUS &&
629d65b1b4SShiwu Zhang ((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) {
6302f6efb4SEmily Deng if (adev->mode_info.num_crtc == 0)
6406465d8eSXiangliang Yu adev->mode_info.num_crtc = 1;
6506465d8eSXiangliang Yu adev->enable_virtual_display = true;
66e7de0d84SZhigang Luo }
67e431eb80SAlex Deucher ddev->driver_features &= ~DRIVER_ATOMIC;
68213cacefSXiangliang Yu adev->cg_flags = 0;
69213cacefSXiangliang Yu adev->pg_flags = 0;
708a1fbb4aSYiqing Yao
71db5dcd47SYuBiao Wang /* Reduce kcq number to 2 to reduce latency */
72db5dcd47SYuBiao Wang if (amdgpu_num_kcq == -1)
73db5dcd47SYuBiao Wang amdgpu_num_kcq = 2;
74bc992ba5SXiangliang Yu }
75bc992ba5SXiangliang Yu
761e9f1392SXiangliang Yu /**
771e9f1392SXiangliang Yu * amdgpu_virt_request_full_gpu() - request full gpu access
78f59bf24eSLee Jones * @adev: amdgpu device.
791e9f1392SXiangliang Yu * @init: is driver init time.
801e9f1392SXiangliang Yu * When start to init/fini driver, first need to request full gpu access.
811e9f1392SXiangliang Yu * Return: Zero if request success, otherwise will return error.
821e9f1392SXiangliang Yu */
amdgpu_virt_request_full_gpu(struct amdgpu_device * adev,bool init)831e9f1392SXiangliang Yu int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init)
841e9f1392SXiangliang Yu {
851e9f1392SXiangliang Yu struct amdgpu_virt *virt = &adev->virt;
861e9f1392SXiangliang Yu int r;
871e9f1392SXiangliang Yu
881e9f1392SXiangliang Yu if (virt->ops && virt->ops->req_full_gpu) {
891e9f1392SXiangliang Yu r = virt->ops->req_full_gpu(adev, init);
9033f23fc3SYifan Zha if (r) {
9133f23fc3SYifan Zha adev->no_hw_access = true;
921e9f1392SXiangliang Yu return r;
9333f23fc3SYifan Zha }
941e9f1392SXiangliang Yu
951e9f1392SXiangliang Yu adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
961e9f1392SXiangliang Yu }
971e9f1392SXiangliang Yu
981e9f1392SXiangliang Yu return 0;
991e9f1392SXiangliang Yu }
1001e9f1392SXiangliang Yu
1011e9f1392SXiangliang Yu /**
1021e9f1392SXiangliang Yu * amdgpu_virt_release_full_gpu() - release full gpu access
103f59bf24eSLee Jones * @adev: amdgpu device.
1041e9f1392SXiangliang Yu * @init: is driver init time.
1051e9f1392SXiangliang Yu * When finishing driver init/fini, need to release full gpu access.
1061e9f1392SXiangliang Yu * Return: Zero if release success, otherwise will returen error.
1071e9f1392SXiangliang Yu */
amdgpu_virt_release_full_gpu(struct amdgpu_device * adev,bool init)1081e9f1392SXiangliang Yu int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init)
1091e9f1392SXiangliang Yu {
1101e9f1392SXiangliang Yu struct amdgpu_virt *virt = &adev->virt;
1111e9f1392SXiangliang Yu int r;
1121e9f1392SXiangliang Yu
1131e9f1392SXiangliang Yu if (virt->ops && virt->ops->rel_full_gpu) {
1141e9f1392SXiangliang Yu r = virt->ops->rel_full_gpu(adev, init);
1151e9f1392SXiangliang Yu if (r)
1161e9f1392SXiangliang Yu return r;
1171e9f1392SXiangliang Yu
1181e9f1392SXiangliang Yu adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
1191e9f1392SXiangliang Yu }
1201e9f1392SXiangliang Yu return 0;
1211e9f1392SXiangliang Yu }
1221e9f1392SXiangliang Yu
1231e9f1392SXiangliang Yu /**
1241e9f1392SXiangliang Yu * amdgpu_virt_reset_gpu() - reset gpu
125f59bf24eSLee Jones * @adev: amdgpu device.
1261e9f1392SXiangliang Yu * Send reset command to GPU hypervisor to reset GPU that VM is using
1271e9f1392SXiangliang Yu * Return: Zero if reset success, otherwise will return error.
1281e9f1392SXiangliang Yu */
amdgpu_virt_reset_gpu(struct amdgpu_device * adev)1291e9f1392SXiangliang Yu int amdgpu_virt_reset_gpu(struct amdgpu_device *adev)
1301e9f1392SXiangliang Yu {
1311e9f1392SXiangliang Yu struct amdgpu_virt *virt = &adev->virt;
1321e9f1392SXiangliang Yu int r;
1331e9f1392SXiangliang Yu
1341e9f1392SXiangliang Yu if (virt->ops && virt->ops->reset_gpu) {
1351e9f1392SXiangliang Yu r = virt->ops->reset_gpu(adev);
1361e9f1392SXiangliang Yu if (r)
1371e9f1392SXiangliang Yu return r;
1381e9f1392SXiangliang Yu
1391e9f1392SXiangliang Yu adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
1401e9f1392SXiangliang Yu }
1411e9f1392SXiangliang Yu
1421e9f1392SXiangliang Yu return 0;
1431e9f1392SXiangliang Yu }
144904cd389SXiangliang Yu
amdgpu_virt_request_init_data(struct amdgpu_device * adev)145aa53bc2eSMonk Liu void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
146aa53bc2eSMonk Liu {
147aa53bc2eSMonk Liu struct amdgpu_virt *virt = &adev->virt;
148aa53bc2eSMonk Liu
149aa53bc2eSMonk Liu if (virt->ops && virt->ops->req_init_data)
150aa53bc2eSMonk Liu virt->ops->req_init_data(adev);
151aa53bc2eSMonk Liu
152aa53bc2eSMonk Liu if (adev->virt.req_init_data_ver > 0)
153aa53bc2eSMonk Liu DRM_INFO("host supports REQ_INIT_DATA handshake\n");
154aa53bc2eSMonk Liu else
155aa53bc2eSMonk Liu DRM_WARN("host doesn't support REQ_INIT_DATA handshake\n");
156aa53bc2eSMonk Liu }
157aa53bc2eSMonk Liu
158904cd389SXiangliang Yu /**
1595c0a1cddSYunxiang Li * amdgpu_virt_ready_to_reset() - send ready to reset to host
1605c0a1cddSYunxiang Li * @adev: amdgpu device.
1615c0a1cddSYunxiang Li * Send ready to reset message to GPU hypervisor to signal we have stopped GPU
1625c0a1cddSYunxiang Li * activity and is ready for host FLR
1635c0a1cddSYunxiang Li */
amdgpu_virt_ready_to_reset(struct amdgpu_device * adev)1645c0a1cddSYunxiang Li void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev)
1655c0a1cddSYunxiang Li {
1665c0a1cddSYunxiang Li struct amdgpu_virt *virt = &adev->virt;
1675c0a1cddSYunxiang Li
1685c0a1cddSYunxiang Li if (virt->ops && virt->ops->reset_gpu)
1695c0a1cddSYunxiang Li virt->ops->ready_to_reset(adev);
1705c0a1cddSYunxiang Li }
1715c0a1cddSYunxiang Li
1725c0a1cddSYunxiang Li /**
173b636176eSpding * amdgpu_virt_wait_reset() - wait for reset gpu completed
174f59bf24eSLee Jones * @adev: amdgpu device.
175b636176eSpding * Wait for GPU reset completed.
176b636176eSpding * Return: Zero if reset success, otherwise will return error.
177b636176eSpding */
amdgpu_virt_wait_reset(struct amdgpu_device * adev)178b636176eSpding int amdgpu_virt_wait_reset(struct amdgpu_device *adev)
179b636176eSpding {
180b636176eSpding struct amdgpu_virt *virt = &adev->virt;
181b636176eSpding
182b636176eSpding if (!virt->ops || !virt->ops->wait_reset)
183b636176eSpding return -EINVAL;
184b636176eSpding
185b636176eSpding return virt->ops->wait_reset(adev);
186b636176eSpding }
187b636176eSpding
188b636176eSpding /**
189904cd389SXiangliang Yu * amdgpu_virt_alloc_mm_table() - alloc memory for mm table
190f59bf24eSLee Jones * @adev: amdgpu device.
191904cd389SXiangliang Yu * MM table is used by UVD and VCE for its initialization
192904cd389SXiangliang Yu * Return: Zero if allocate success.
193904cd389SXiangliang Yu */
amdgpu_virt_alloc_mm_table(struct amdgpu_device * adev)194904cd389SXiangliang Yu int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
195904cd389SXiangliang Yu {
196904cd389SXiangliang Yu int r;
197904cd389SXiangliang Yu
198904cd389SXiangliang Yu if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr)
199904cd389SXiangliang Yu return 0;
200904cd389SXiangliang Yu
201904cd389SXiangliang Yu r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
20258ab2c08SChristian König AMDGPU_GEM_DOMAIN_VRAM |
20358ab2c08SChristian König AMDGPU_GEM_DOMAIN_GTT,
204904cd389SXiangliang Yu &adev->virt.mm_table.bo,
205904cd389SXiangliang Yu &adev->virt.mm_table.gpu_addr,
206904cd389SXiangliang Yu (void *)&adev->virt.mm_table.cpu_addr);
207904cd389SXiangliang Yu if (r) {
208904cd389SXiangliang Yu DRM_ERROR("failed to alloc mm table and error = %d.\n", r);
209904cd389SXiangliang Yu return r;
210904cd389SXiangliang Yu }
211904cd389SXiangliang Yu
212904cd389SXiangliang Yu memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
213904cd389SXiangliang Yu DRM_INFO("MM table gpu addr = 0x%llx, cpu addr = %p.\n",
214904cd389SXiangliang Yu adev->virt.mm_table.gpu_addr,
215904cd389SXiangliang Yu adev->virt.mm_table.cpu_addr);
216904cd389SXiangliang Yu return 0;
217904cd389SXiangliang Yu }
218904cd389SXiangliang Yu
219904cd389SXiangliang Yu /**
220904cd389SXiangliang Yu * amdgpu_virt_free_mm_table() - free mm table memory
221f59bf24eSLee Jones * @adev: amdgpu device.
222904cd389SXiangliang Yu * Free MM table memory
223904cd389SXiangliang Yu */
amdgpu_virt_free_mm_table(struct amdgpu_device * adev)224904cd389SXiangliang Yu void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
225904cd389SXiangliang Yu {
226904cd389SXiangliang Yu if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr)
227904cd389SXiangliang Yu return;
228904cd389SXiangliang Yu
229904cd389SXiangliang Yu amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
230904cd389SXiangliang Yu &adev->virt.mm_table.gpu_addr,
231904cd389SXiangliang Yu (void *)&adev->virt.mm_table.cpu_addr);
232904cd389SXiangliang Yu adev->virt.mm_table.gpu_addr = 0;
233904cd389SXiangliang Yu }
2342dc8f81eSHorace Chen
235cbda2758SVignesh Chander /**
236cbda2758SVignesh Chander * amdgpu_virt_rcvd_ras_interrupt() - receive ras interrupt
237cbda2758SVignesh Chander * @adev: amdgpu device.
238cbda2758SVignesh Chander * Check whether host sent RAS error message
239cbda2758SVignesh Chander * Return: true if found, otherwise false
240cbda2758SVignesh Chander */
amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device * adev)241cbda2758SVignesh Chander bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev)
242cbda2758SVignesh Chander {
243cbda2758SVignesh Chander struct amdgpu_virt *virt = &adev->virt;
244cbda2758SVignesh Chander
245cbda2758SVignesh Chander if (!virt->ops || !virt->ops->rcvd_ras_intr)
246cbda2758SVignesh Chander return false;
247cbda2758SVignesh Chander
248cbda2758SVignesh Chander return virt->ops->rcvd_ras_intr(adev);
249cbda2758SVignesh Chander }
250cbda2758SVignesh Chander
2512dc8f81eSHorace Chen
amd_sriov_msg_checksum(void * obj,unsigned long obj_size,unsigned int key,unsigned int checksum)252519b8b76SBokun Zhang unsigned int amd_sriov_msg_checksum(void *obj,
2532dc8f81eSHorace Chen unsigned long obj_size,
2542dc8f81eSHorace Chen unsigned int key,
255519b8b76SBokun Zhang unsigned int checksum)
2562dc8f81eSHorace Chen {
2572dc8f81eSHorace Chen unsigned int ret = key;
2582dc8f81eSHorace Chen unsigned long i = 0;
2592dc8f81eSHorace Chen unsigned char *pos;
2602dc8f81eSHorace Chen
2612dc8f81eSHorace Chen pos = (char *)obj;
2622dc8f81eSHorace Chen /* calculate checksum */
2632dc8f81eSHorace Chen for (i = 0; i < obj_size; ++i)
2642dc8f81eSHorace Chen ret += *(pos + i);
265519b8b76SBokun Zhang /* minus the checksum itself */
266519b8b76SBokun Zhang pos = (char *)&checksum;
267519b8b76SBokun Zhang for (i = 0; i < sizeof(checksum); ++i)
2682dc8f81eSHorace Chen ret -= *(pos + i);
2692dc8f81eSHorace Chen return ret;
2702dc8f81eSHorace Chen }
2712dc8f81eSHorace Chen
amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device * adev)2725278a159SStanley.Yang static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
2735278a159SStanley.Yang {
2745278a159SStanley.Yang struct amdgpu_virt *virt = &adev->virt;
2755278a159SStanley.Yang struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data;
2765278a159SStanley.Yang /* GPU will be marked bad on host if bp count more then 10,
2775278a159SStanley.Yang * so alloc 512 is enough.
2785278a159SStanley.Yang */
2795278a159SStanley.Yang unsigned int align_space = 512;
2805278a159SStanley.Yang void *bps = NULL;
2815278a159SStanley.Yang struct amdgpu_bo **bps_bo = NULL;
2825278a159SStanley.Yang
2835278a159SStanley.Yang *data = kmalloc(sizeof(struct amdgpu_virt_ras_err_handler_data), GFP_KERNEL);
2845278a159SStanley.Yang if (!*data)
2858b11e14bSBernard Zhao goto data_failure;
2865278a159SStanley.Yang
287362936d6SVictor Skvortsov bps = kmalloc_array(align_space, sizeof(*(*data)->bps), GFP_KERNEL);
2888b11e14bSBernard Zhao if (!bps)
2898b11e14bSBernard Zhao goto bps_failure;
2905278a159SStanley.Yang
291362936d6SVictor Skvortsov bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL);
2928b11e14bSBernard Zhao if (!bps_bo)
2938b11e14bSBernard Zhao goto bps_bo_failure;
2945278a159SStanley.Yang
2955278a159SStanley.Yang (*data)->bps = bps;
2965278a159SStanley.Yang (*data)->bps_bo = bps_bo;
2975278a159SStanley.Yang (*data)->count = 0;
2985278a159SStanley.Yang (*data)->last_reserved = 0;
2995278a159SStanley.Yang
3005278a159SStanley.Yang virt->ras_init_done = true;
3015278a159SStanley.Yang
3025278a159SStanley.Yang return 0;
3038b11e14bSBernard Zhao
3048b11e14bSBernard Zhao bps_bo_failure:
3058b11e14bSBernard Zhao kfree(bps);
3068b11e14bSBernard Zhao bps_failure:
3078b11e14bSBernard Zhao kfree(*data);
3088b11e14bSBernard Zhao data_failure:
3098b11e14bSBernard Zhao return -ENOMEM;
3105278a159SStanley.Yang }
3115278a159SStanley.Yang
amdgpu_virt_ras_release_bp(struct amdgpu_device * adev)3125278a159SStanley.Yang static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
3135278a159SStanley.Yang {
3145278a159SStanley.Yang struct amdgpu_virt *virt = &adev->virt;
3155278a159SStanley.Yang struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
3165278a159SStanley.Yang struct amdgpu_bo *bo;
3175278a159SStanley.Yang int i;
3185278a159SStanley.Yang
3195278a159SStanley.Yang if (!data)
3205278a159SStanley.Yang return;
3215278a159SStanley.Yang
3225278a159SStanley.Yang for (i = data->last_reserved - 1; i >= 0; i--) {
3235278a159SStanley.Yang bo = data->bps_bo[i];
324362936d6SVictor Skvortsov if (bo) {
3255278a159SStanley.Yang amdgpu_bo_free_kernel(&bo, NULL, NULL);
3265278a159SStanley.Yang data->bps_bo[i] = bo;
327362936d6SVictor Skvortsov }
3285278a159SStanley.Yang data->last_reserved = i;
3295278a159SStanley.Yang }
3305278a159SStanley.Yang }
3315278a159SStanley.Yang
amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device * adev)3325278a159SStanley.Yang void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
3335278a159SStanley.Yang {
3345278a159SStanley.Yang struct amdgpu_virt *virt = &adev->virt;
3355278a159SStanley.Yang struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
3365278a159SStanley.Yang
3375278a159SStanley.Yang virt->ras_init_done = false;
3385278a159SStanley.Yang
3395278a159SStanley.Yang if (!data)
3405278a159SStanley.Yang return;
3415278a159SStanley.Yang
3425278a159SStanley.Yang amdgpu_virt_ras_release_bp(adev);
3435278a159SStanley.Yang
3445278a159SStanley.Yang kfree(data->bps);
3455278a159SStanley.Yang kfree(data->bps_bo);
3465278a159SStanley.Yang kfree(data);
3475278a159SStanley.Yang virt->virt_eh_data = NULL;
3485278a159SStanley.Yang }
3495278a159SStanley.Yang
amdgpu_virt_ras_add_bps(struct amdgpu_device * adev,struct eeprom_table_record * bps,int pages)3505278a159SStanley.Yang static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
3515278a159SStanley.Yang struct eeprom_table_record *bps, int pages)
3525278a159SStanley.Yang {
3535278a159SStanley.Yang struct amdgpu_virt *virt = &adev->virt;
3545278a159SStanley.Yang struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
3555278a159SStanley.Yang
3565278a159SStanley.Yang if (!data)
3575278a159SStanley.Yang return;
3585278a159SStanley.Yang
3595278a159SStanley.Yang memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
3605278a159SStanley.Yang data->count += pages;
3615278a159SStanley.Yang }
3625278a159SStanley.Yang
amdgpu_virt_ras_reserve_bps(struct amdgpu_device * adev)3635278a159SStanley.Yang static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
3645278a159SStanley.Yang {
3655278a159SStanley.Yang struct amdgpu_virt *virt = &adev->virt;
3665278a159SStanley.Yang struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
367362936d6SVictor Skvortsov struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
368362936d6SVictor Skvortsov struct ttm_resource_manager *man = &mgr->manager;
3695278a159SStanley.Yang struct amdgpu_bo *bo = NULL;
3705278a159SStanley.Yang uint64_t bp;
3715278a159SStanley.Yang int i;
3725278a159SStanley.Yang
3735278a159SStanley.Yang if (!data)
3745278a159SStanley.Yang return;
3755278a159SStanley.Yang
3765278a159SStanley.Yang for (i = data->last_reserved; i < data->count; i++) {
3775278a159SStanley.Yang bp = data->bps[i].retired_page;
3785278a159SStanley.Yang
3795278a159SStanley.Yang /* There are two cases of reserve error should be ignored:
3805278a159SStanley.Yang * 1) a ras bad page has been allocated (used by someone);
3815278a159SStanley.Yang * 2) a ras bad page has been reserved (duplicate error injection
3825278a159SStanley.Yang * for one page);
3835278a159SStanley.Yang */
384362936d6SVictor Skvortsov if (ttm_resource_manager_used(man)) {
385362936d6SVictor Skvortsov amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
386362936d6SVictor Skvortsov bp << AMDGPU_GPU_PAGE_SHIFT,
387362936d6SVictor Skvortsov AMDGPU_GPU_PAGE_SIZE);
388362936d6SVictor Skvortsov data->bps_bo[i] = NULL;
389362936d6SVictor Skvortsov } else {
3905278a159SStanley.Yang if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
3915278a159SStanley.Yang AMDGPU_GPU_PAGE_SIZE,
3925278a159SStanley.Yang &bo, NULL))
3935278a159SStanley.Yang DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
3945278a159SStanley.Yang data->bps_bo[i] = bo;
395362936d6SVictor Skvortsov }
3965278a159SStanley.Yang data->last_reserved = i + 1;
3975278a159SStanley.Yang bo = NULL;
3985278a159SStanley.Yang }
3995278a159SStanley.Yang }
4005278a159SStanley.Yang
amdgpu_virt_ras_check_bad_page(struct amdgpu_device * adev,uint64_t retired_page)4015278a159SStanley.Yang static bool amdgpu_virt_ras_check_bad_page(struct amdgpu_device *adev,
4025278a159SStanley.Yang uint64_t retired_page)
4035278a159SStanley.Yang {
4045278a159SStanley.Yang struct amdgpu_virt *virt = &adev->virt;
4055278a159SStanley.Yang struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
4065278a159SStanley.Yang int i;
4075278a159SStanley.Yang
4085278a159SStanley.Yang if (!data)
4095278a159SStanley.Yang return true;
4105278a159SStanley.Yang
4115278a159SStanley.Yang for (i = 0; i < data->count; i++)
4125278a159SStanley.Yang if (retired_page == data->bps[i].retired_page)
4135278a159SStanley.Yang return true;
4145278a159SStanley.Yang
4155278a159SStanley.Yang return false;
4165278a159SStanley.Yang }
4175278a159SStanley.Yang
amdgpu_virt_add_bad_page(struct amdgpu_device * adev,uint64_t bp_block_offset,uint32_t bp_block_size)4185278a159SStanley.Yang static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
4195278a159SStanley.Yang uint64_t bp_block_offset, uint32_t bp_block_size)
4205278a159SStanley.Yang {
4215278a159SStanley.Yang struct eeprom_table_record bp;
4225278a159SStanley.Yang uint64_t retired_page;
4235278a159SStanley.Yang uint32_t bp_idx, bp_cnt;
4246d96ced7STong Liu01 void *vram_usage_va = NULL;
4256d96ced7STong Liu01
4266d96ced7STong Liu01 if (adev->mman.fw_vram_usage_va)
4276d96ced7STong Liu01 vram_usage_va = adev->mman.fw_vram_usage_va;
4286d96ced7STong Liu01 else
4296d96ced7STong Liu01 vram_usage_va = adev->mman.drv_vram_usage_va;
4305278a159SStanley.Yang
4310fa4c25dSTim Huang memset(&bp, 0, sizeof(bp));
4320fa4c25dSTim Huang
4335278a159SStanley.Yang if (bp_block_size) {
4345278a159SStanley.Yang bp_cnt = bp_block_size / sizeof(uint64_t);
4355278a159SStanley.Yang for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
4366d96ced7STong Liu01 retired_page = *(uint64_t *)(vram_usage_va +
4375278a159SStanley.Yang bp_block_offset + bp_idx * sizeof(uint64_t));
4385278a159SStanley.Yang bp.retired_page = retired_page;
4395278a159SStanley.Yang
4405278a159SStanley.Yang if (amdgpu_virt_ras_check_bad_page(adev, retired_page))
4415278a159SStanley.Yang continue;
4425278a159SStanley.Yang
4435278a159SStanley.Yang amdgpu_virt_ras_add_bps(adev, &bp, 1);
4445278a159SStanley.Yang
4455278a159SStanley.Yang amdgpu_virt_ras_reserve_bps(adev);
4465278a159SStanley.Yang }
4475278a159SStanley.Yang }
4485278a159SStanley.Yang }
4495278a159SStanley.Yang
amdgpu_virt_read_pf2vf_data(struct amdgpu_device * adev)450519b8b76SBokun Zhang static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
451519b8b76SBokun Zhang {
452519b8b76SBokun Zhang struct amd_sriov_msg_pf2vf_info_header *pf2vf_info = adev->virt.fw_reserve.p_pf2vf;
453519b8b76SBokun Zhang uint32_t checksum;
454519b8b76SBokun Zhang uint32_t checkval;
455519b8b76SBokun Zhang
456ed9d2053SBokun Zhang uint32_t i;
457ed9d2053SBokun Zhang uint32_t tmp;
458ed9d2053SBokun Zhang
459519b8b76SBokun Zhang if (adev->virt.fw_reserve.p_pf2vf == NULL)
460519b8b76SBokun Zhang return -EINVAL;
461519b8b76SBokun Zhang
462519b8b76SBokun Zhang if (pf2vf_info->size > 1024) {
463ab66c832SZhigang Luo dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size);
464519b8b76SBokun Zhang return -EINVAL;
465519b8b76SBokun Zhang }
466519b8b76SBokun Zhang
467519b8b76SBokun Zhang switch (pf2vf_info->version) {
468519b8b76SBokun Zhang case 1:
469519b8b76SBokun Zhang checksum = ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->checksum;
470519b8b76SBokun Zhang checkval = amd_sriov_msg_checksum(
471519b8b76SBokun Zhang adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
472519b8b76SBokun Zhang adev->virt.fw_reserve.checksum_key, checksum);
473519b8b76SBokun Zhang if (checksum != checkval) {
474ab66c832SZhigang Luo dev_err(adev->dev,
475ab66c832SZhigang Luo "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
476ab66c832SZhigang Luo checksum, checkval);
477519b8b76SBokun Zhang return -EINVAL;
478519b8b76SBokun Zhang }
479519b8b76SBokun Zhang
480519b8b76SBokun Zhang adev->virt.gim_feature =
481519b8b76SBokun Zhang ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->feature_flags;
482519b8b76SBokun Zhang break;
483519b8b76SBokun Zhang case 2:
484519b8b76SBokun Zhang /* TODO: missing key, need to add it later */
485519b8b76SBokun Zhang checksum = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->checksum;
486519b8b76SBokun Zhang checkval = amd_sriov_msg_checksum(
487519b8b76SBokun Zhang adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
488519b8b76SBokun Zhang 0, checksum);
489519b8b76SBokun Zhang if (checksum != checkval) {
490ab66c832SZhigang Luo dev_err(adev->dev,
491ab66c832SZhigang Luo "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
492ab66c832SZhigang Luo checksum, checkval);
493519b8b76SBokun Zhang return -EINVAL;
494519b8b76SBokun Zhang }
495519b8b76SBokun Zhang
496519b8b76SBokun Zhang adev->virt.vf2pf_update_interval_ms =
497519b8b76SBokun Zhang ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->vf2pf_update_interval_ms;
498519b8b76SBokun Zhang adev->virt.gim_feature =
499519b8b76SBokun Zhang ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->feature_flags.all;
5005d238510SPeng Ju Zhou adev->virt.reg_access =
5015d238510SPeng Ju Zhou ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->reg_access_flags.all;
502519b8b76SBokun Zhang
503ed9d2053SBokun Zhang adev->virt.decode_max_dimension_pixels = 0;
504ed9d2053SBokun Zhang adev->virt.decode_max_frame_pixels = 0;
505ed9d2053SBokun Zhang adev->virt.encode_max_dimension_pixels = 0;
506ed9d2053SBokun Zhang adev->virt.encode_max_frame_pixels = 0;
507ed9d2053SBokun Zhang adev->virt.is_mm_bw_enabled = false;
508ed9d2053SBokun Zhang for (i = 0; i < AMD_SRIOV_MSG_RESERVE_VCN_INST; i++) {
509ed9d2053SBokun Zhang tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_dimension_pixels;
510ed9d2053SBokun Zhang adev->virt.decode_max_dimension_pixels = max(tmp, adev->virt.decode_max_dimension_pixels);
511ed9d2053SBokun Zhang
512ed9d2053SBokun Zhang tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_frame_pixels;
513ed9d2053SBokun Zhang adev->virt.decode_max_frame_pixels = max(tmp, adev->virt.decode_max_frame_pixels);
514ed9d2053SBokun Zhang
515ed9d2053SBokun Zhang tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_dimension_pixels;
516ed9d2053SBokun Zhang adev->virt.encode_max_dimension_pixels = max(tmp, adev->virt.encode_max_dimension_pixels);
517ed9d2053SBokun Zhang
518ed9d2053SBokun Zhang tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_frame_pixels;
519ed9d2053SBokun Zhang adev->virt.encode_max_frame_pixels = max(tmp, adev->virt.encode_max_frame_pixels);
520ed9d2053SBokun Zhang }
521ed9d2053SBokun Zhang if ((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0))
522ed9d2053SBokun Zhang adev->virt.is_mm_bw_enabled = true;
523ed9d2053SBokun Zhang
5245228cd65SJiawei Gu adev->unique_id =
5255228cd65SJiawei Gu ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid;
526907fec2dSVictor Skvortsov adev->virt.ras_en_caps.all = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_en_caps.all;
52784a2947eSVictor Skvortsov adev->virt.ras_telemetry_en_caps.all =
52884a2947eSVictor Skvortsov ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_telemetry_en_caps.all;
529519b8b76SBokun Zhang break;
530519b8b76SBokun Zhang default:
531ab66c832SZhigang Luo dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version);
532519b8b76SBokun Zhang return -EINVAL;
533519b8b76SBokun Zhang }
534519b8b76SBokun Zhang
535519b8b76SBokun Zhang /* correct too large or too little interval value */
536519b8b76SBokun Zhang if (adev->virt.vf2pf_update_interval_ms < 200 || adev->virt.vf2pf_update_interval_ms > 10000)
537519b8b76SBokun Zhang adev->virt.vf2pf_update_interval_ms = 2000;
538519b8b76SBokun Zhang
539519b8b76SBokun Zhang return 0;
540519b8b76SBokun Zhang }
541519b8b76SBokun Zhang
amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device * adev)542519b8b76SBokun Zhang static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
543519b8b76SBokun Zhang {
544519b8b76SBokun Zhang struct amd_sriov_msg_vf2pf_info *vf2pf_info;
545519b8b76SBokun Zhang vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
546519b8b76SBokun Zhang
547519b8b76SBokun Zhang if (adev->virt.fw_reserve.p_vf2pf == NULL)
548519b8b76SBokun Zhang return;
549519b8b76SBokun Zhang
550519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCE, adev->vce.fw_version);
551519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_UVD, adev->uvd.fw_version);
552519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MC, adev->gmc.fw_version);
553519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ME, adev->gfx.me_fw_version);
554519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_PFP, adev->gfx.pfp_fw_version);
555519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_CE, adev->gfx.ce_fw_version);
556519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC, adev->gfx.rlc_fw_version);
557519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLC, adev->gfx.rlc_srlc_fw_version);
558519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLG, adev->gfx.rlc_srlg_fw_version);
559519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version);
560519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version);
561519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version);
562222e0a71SCandice Li POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version);
563de3a1e33SCandice Li POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD,
564de3a1e33SCandice Li adev->psp.asd_context.bin_desc.fw_version);
565de3a1e33SCandice Li POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS,
5664320e6f8SCandice Li adev->psp.ras_context.context.bin_desc.fw_version);
567de3a1e33SCandice Li POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI,
5684320e6f8SCandice Li adev->psp.xgmi_context.context.bin_desc.fw_version);
569519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SMC, adev->pm.fw_version);
570519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA, adev->sdma.instance[0].fw_version);
571519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA2, adev->sdma.instance[1].fw_version);
572519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCN, adev->vcn.fw_version);
573519b8b76SBokun Zhang POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_DMCU, adev->dm.dmcu_fw_version);
574519b8b76SBokun Zhang }
575519b8b76SBokun Zhang
amdgpu_virt_write_vf2pf_data(struct amdgpu_device * adev)576519b8b76SBokun Zhang static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
577519b8b76SBokun Zhang {
578519b8b76SBokun Zhang struct amd_sriov_msg_vf2pf_info *vf2pf_info;
579519b8b76SBokun Zhang
580519b8b76SBokun Zhang vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
581519b8b76SBokun Zhang
582519b8b76SBokun Zhang if (adev->virt.fw_reserve.p_vf2pf == NULL)
583519b8b76SBokun Zhang return -EINVAL;
584519b8b76SBokun Zhang
585519b8b76SBokun Zhang memset(vf2pf_info, 0, sizeof(struct amd_sriov_msg_vf2pf_info));
586519b8b76SBokun Zhang
587519b8b76SBokun Zhang vf2pf_info->header.size = sizeof(struct amd_sriov_msg_vf2pf_info);
588519b8b76SBokun Zhang vf2pf_info->header.version = AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER;
589519b8b76SBokun Zhang
590519b8b76SBokun Zhang #ifdef MODULE
591519b8b76SBokun Zhang if (THIS_MODULE->version != NULL)
592519b8b76SBokun Zhang strcpy(vf2pf_info->driver_version, THIS_MODULE->version);
593519b8b76SBokun Zhang else
594519b8b76SBokun Zhang #endif
595519b8b76SBokun Zhang strcpy(vf2pf_info->driver_version, "N/A");
596519b8b76SBokun Zhang
597519b8b76SBokun Zhang vf2pf_info->pf2vf_version_required = 0; // no requirement, guest understands all
598519b8b76SBokun Zhang vf2pf_info->driver_cert = 0;
599519b8b76SBokun Zhang vf2pf_info->os_info.all = 0;
600519b8b76SBokun Zhang
6017db47b83SChristian König vf2pf_info->fb_usage =
6027db47b83SChristian König ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20;
6037db47b83SChristian König vf2pf_info->fb_vis_usage =
6047db47b83SChristian König amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
605519b8b76SBokun Zhang vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;
606519b8b76SBokun Zhang vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20;
607519b8b76SBokun Zhang
608519b8b76SBokun Zhang amdgpu_virt_populate_vf2pf_ucode_info(adev);
609519b8b76SBokun Zhang
610519b8b76SBokun Zhang /* TODO: read dynamic info */
611519b8b76SBokun Zhang vf2pf_info->gfx_usage = 0;
612519b8b76SBokun Zhang vf2pf_info->compute_usage = 0;
613519b8b76SBokun Zhang vf2pf_info->encode_usage = 0;
614519b8b76SBokun Zhang vf2pf_info->decode_usage = 0;
615519b8b76SBokun Zhang
616e77f0f5cSJingwen Chen vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
617*f81cd793SShaoyun Liu if (amdgpu_sriov_is_mes_info_enable(adev)) {
618*f81cd793SShaoyun Liu vf2pf_info->mes_info_addr =
619*f81cd793SShaoyun Liu (uint64_t)(adev->mes.resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE);
620*f81cd793SShaoyun Liu vf2pf_info->mes_info_size =
621*f81cd793SShaoyun Liu adev->mes.resource_1[0]->tbo.base.size - AMDGPU_GPU_PAGE_SIZE;
622f6ac0842Schongli2 }
623519b8b76SBokun Zhang vf2pf_info->checksum =
624519b8b76SBokun Zhang amd_sriov_msg_checksum(
6253bcc0ee1SZhigang Luo vf2pf_info, sizeof(*vf2pf_info), 0, 0);
626519b8b76SBokun Zhang
627519b8b76SBokun Zhang return 0;
628519b8b76SBokun Zhang }
629519b8b76SBokun Zhang
amdgpu_virt_update_vf2pf_work_item(struct work_struct * work)63005ed830eSLee Jones static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
631519b8b76SBokun Zhang {
632519b8b76SBokun Zhang struct amdgpu_device *adev = container_of(work, struct amdgpu_device, virt.vf2pf_work.work);
63364dcf2f0SJingwen Chen int ret;
634519b8b76SBokun Zhang
63564dcf2f0SJingwen Chen ret = amdgpu_virt_read_pf2vf_data(adev);
636ab66c832SZhigang Luo if (ret) {
637ab66c832SZhigang Luo adev->virt.vf2pf_update_retry_cnt++;
638cbda2758SVignesh Chander
639cbda2758SVignesh Chander if ((amdgpu_virt_rcvd_ras_interrupt(adev) ||
640cbda2758SVignesh Chander adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
641f4322b9fSYunxiang Li amdgpu_sriov_runtime(adev)) {
642cbda2758SVignesh Chander
643d1999b40SZhigang Luo amdgpu_ras_set_fed(adev, true);
644ab66c832SZhigang Luo if (amdgpu_reset_domain_schedule(adev->reset_domain,
6455434bc03SVictor Skvortsov &adev->kfd.reset_work))
646ab66c832SZhigang Luo return;
647ab66c832SZhigang Luo else
648ab66c832SZhigang Luo dev_err(adev->dev, "Failed to queue work! at %s", __func__);
649ab66c832SZhigang Luo }
650ab66c832SZhigang Luo
65164dcf2f0SJingwen Chen goto out;
652ab66c832SZhigang Luo }
653ab66c832SZhigang Luo
654ab66c832SZhigang Luo adev->virt.vf2pf_update_retry_cnt = 0;
655519b8b76SBokun Zhang amdgpu_virt_write_vf2pf_data(adev);
656519b8b76SBokun Zhang
65764dcf2f0SJingwen Chen out:
658519b8b76SBokun Zhang schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
659519b8b76SBokun Zhang }
660519b8b76SBokun Zhang
amdgpu_virt_fini_data_exchange(struct amdgpu_device * adev)661519b8b76SBokun Zhang void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
662519b8b76SBokun Zhang {
663519b8b76SBokun Zhang if (adev->virt.vf2pf_update_interval_ms != 0) {
664519b8b76SBokun Zhang DRM_INFO("clean up the vf2pf work item\n");
665519b8b76SBokun Zhang cancel_delayed_work_sync(&adev->virt.vf2pf_work);
6663c2a01cbSJack Zhang adev->virt.vf2pf_update_interval_ms = 0;
667519b8b76SBokun Zhang }
668519b8b76SBokun Zhang }
669519b8b76SBokun Zhang
amdgpu_virt_init_data_exchange(struct amdgpu_device * adev)6702dc8f81eSHorace Chen void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
6712dc8f81eSHorace Chen {
6722dc8f81eSHorace Chen adev->virt.fw_reserve.p_pf2vf = NULL;
6732dc8f81eSHorace Chen adev->virt.fw_reserve.p_vf2pf = NULL;
674519b8b76SBokun Zhang adev->virt.vf2pf_update_interval_ms = 0;
675ab66c832SZhigang Luo adev->virt.vf2pf_update_retry_cnt = 0;
6762dc8f81eSHorace Chen
6776d96ced7STong Liu01 if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
6786d96ced7STong Liu01 DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
6796d96ced7STong Liu01 } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
68022c16d25SJingwen Chen /* go through this logic in ip_init and reset to init workqueue*/
68122c16d25SJingwen Chen amdgpu_virt_exchange_data(adev);
6822dc8f81eSHorace Chen
68322c16d25SJingwen Chen INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
68422c16d25SJingwen Chen schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
68522c16d25SJingwen Chen } else if (adev->bios != NULL) {
68622c16d25SJingwen Chen /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
687519b8b76SBokun Zhang adev->virt.fw_reserve.p_pf2vf =
688519b8b76SBokun Zhang (struct amd_sriov_msg_pf2vf_info_header *)
689892deb48SVictor Skvortsov (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
690892deb48SVictor Skvortsov
691892deb48SVictor Skvortsov amdgpu_virt_read_pf2vf_data(adev);
692892deb48SVictor Skvortsov }
693892deb48SVictor Skvortsov }
694892deb48SVictor Skvortsov
695892deb48SVictor Skvortsov
amdgpu_virt_exchange_data(struct amdgpu_device * adev)696892deb48SVictor Skvortsov void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
697892deb48SVictor Skvortsov {
698892deb48SVictor Skvortsov uint64_t bp_block_offset = 0;
699892deb48SVictor Skvortsov uint32_t bp_block_size = 0;
700892deb48SVictor Skvortsov struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
701892deb48SVictor Skvortsov
7026d96ced7STong Liu01 if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
7036d96ced7STong Liu01 if (adev->mman.fw_vram_usage_va) {
704892deb48SVictor Skvortsov adev->virt.fw_reserve.p_pf2vf =
705892deb48SVictor Skvortsov (struct amd_sriov_msg_pf2vf_info_header *)
706519b8b76SBokun Zhang (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
707519b8b76SBokun Zhang adev->virt.fw_reserve.p_vf2pf =
708519b8b76SBokun Zhang (struct amd_sriov_msg_vf2pf_info_header *)
709519b8b76SBokun Zhang (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
71084a2947eSVictor Skvortsov adev->virt.fw_reserve.ras_telemetry =
71184a2947eSVictor Skvortsov (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
7126d96ced7STong Liu01 } else if (adev->mman.drv_vram_usage_va) {
7136d96ced7STong Liu01 adev->virt.fw_reserve.p_pf2vf =
7146d96ced7STong Liu01 (struct amd_sriov_msg_pf2vf_info_header *)
7156d96ced7STong Liu01 (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
7166d96ced7STong Liu01 adev->virt.fw_reserve.p_vf2pf =
7176d96ced7STong Liu01 (struct amd_sriov_msg_vf2pf_info_header *)
7186d96ced7STong Liu01 (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
71984a2947eSVictor Skvortsov adev->virt.fw_reserve.ras_telemetry =
72084a2947eSVictor Skvortsov (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB << 10));
7216d96ced7STong Liu01 }
722519b8b76SBokun Zhang
723519b8b76SBokun Zhang amdgpu_virt_read_pf2vf_data(adev);
724519b8b76SBokun Zhang amdgpu_virt_write_vf2pf_data(adev);
725519b8b76SBokun Zhang
726519b8b76SBokun Zhang /* bad page handling for version 2 */
7275278a159SStanley.Yang if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
728519b8b76SBokun Zhang pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
729519b8b76SBokun Zhang
730519b8b76SBokun Zhang bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
731519b8b76SBokun Zhang ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
7325278a159SStanley.Yang bp_block_size = pf2vf_v2->bp_block_size;
7335278a159SStanley.Yang
7345278a159SStanley.Yang if (bp_block_size && !adev->virt.ras_init_done)
7355278a159SStanley.Yang amdgpu_virt_init_ras_err_handler_data(adev);
7365278a159SStanley.Yang
7375278a159SStanley.Yang if (adev->virt.ras_init_done)
7385278a159SStanley.Yang amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
7395278a159SStanley.Yang }
740892deb48SVictor Skvortsov }
741519b8b76SBokun Zhang }
7425278a159SStanley.Yang
amdgpu_virt_init_detect_asic(struct amdgpu_device * adev)743a91d91b6STony Yi static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev)
7443aa0115dSMonk Liu {
7453aa0115dSMonk Liu uint32_t reg;
7463aa0115dSMonk Liu
7473aa0115dSMonk Liu switch (adev->asic_type) {
7483aa0115dSMonk Liu case CHIP_TONGA:
7493aa0115dSMonk Liu case CHIP_FIJI:
7503aa0115dSMonk Liu reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
7513aa0115dSMonk Liu break;
7523aa0115dSMonk Liu case CHIP_VEGA10:
7533aa0115dSMonk Liu case CHIP_VEGA20:
7543aa0115dSMonk Liu case CHIP_NAVI10:
7553aa0115dSMonk Liu case CHIP_NAVI12:
7567cf70047Sshaoyunl case CHIP_SIENNA_CICHLID:
7573aa0115dSMonk Liu case CHIP_ARCTURUS:
7581b15bac7SHawking Zhang case CHIP_ALDEBARAN:
75909872b1cSHorace Chen case CHIP_IP_DISCOVERY:
7603aa0115dSMonk Liu reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
7613aa0115dSMonk Liu break;
7623aa0115dSMonk Liu default: /* other chip doesn't support SRIOV */
7633aa0115dSMonk Liu reg = 0;
7643aa0115dSMonk Liu break;
7653aa0115dSMonk Liu }
7663aa0115dSMonk Liu
7673aa0115dSMonk Liu if (reg & 1)
7683aa0115dSMonk Liu adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
7693aa0115dSMonk Liu
7703aa0115dSMonk Liu if (reg & 0x80000000)
7713aa0115dSMonk Liu adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
7723aa0115dSMonk Liu
7733aa0115dSMonk Liu if (!reg) {
77478b12008SMarek Marczykowski-Górecki /* passthrough mode exclus sriov mod */
77578b12008SMarek Marczykowski-Górecki if (is_virtual_machine() && !xen_initial_domain())
7763aa0115dSMonk Liu adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
7773aa0115dSMonk Liu }
778c1299461SWenhui Sheng
779a91d91b6STony Yi return reg;
780a91d91b6STony Yi }
781a91d91b6STony Yi
amdgpu_virt_init_req_data(struct amdgpu_device * adev,u32 reg)782a91d91b6STony Yi static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
783a91d91b6STony Yi {
784a91d91b6STony Yi bool is_sriov = false;
785a91d91b6STony Yi
786c1299461SWenhui Sheng /* we have the ability to check now */
787c1299461SWenhui Sheng if (amdgpu_sriov_vf(adev)) {
788a91d91b6STony Yi is_sriov = true;
789a91d91b6STony Yi
790c1299461SWenhui Sheng switch (adev->asic_type) {
791c1299461SWenhui Sheng case CHIP_TONGA:
792c1299461SWenhui Sheng case CHIP_FIJI:
793c1299461SWenhui Sheng vi_set_virt_ops(adev);
794c1299461SWenhui Sheng break;
795c1299461SWenhui Sheng case CHIP_VEGA10:
796216a9873SJames Yao soc15_set_virt_ops(adev);
797eb85fc23SYongqiang Sun #ifdef CONFIG_X86
798eb85fc23SYongqiang Sun /* not send GPU_INIT_DATA with MS_HYPERV*/
799eb85fc23SYongqiang Sun if (!hypervisor_is_type(X86_HYPER_MS_HYPERV))
800eb85fc23SYongqiang Sun #endif
801216a9873SJames Yao /* send a dummy GPU_INIT_DATA request to host on vega10 */
802216a9873SJames Yao amdgpu_virt_request_init_data(adev);
803216a9873SJames Yao break;
804c1299461SWenhui Sheng case CHIP_VEGA20:
805c1299461SWenhui Sheng case CHIP_ARCTURUS:
806e7de0d84SZhigang Luo case CHIP_ALDEBARAN:
807c1299461SWenhui Sheng soc15_set_virt_ops(adev);
808c1299461SWenhui Sheng break;
809c1299461SWenhui Sheng case CHIP_NAVI10:
810c1299461SWenhui Sheng case CHIP_NAVI12:
811c1299461SWenhui Sheng case CHIP_SIENNA_CICHLID:
81209872b1cSHorace Chen case CHIP_IP_DISCOVERY:
813c1299461SWenhui Sheng nv_set_virt_ops(adev);
814c1299461SWenhui Sheng /* try send GPU_INIT_DATA request to host */
815c1299461SWenhui Sheng amdgpu_virt_request_init_data(adev);
816c1299461SWenhui Sheng break;
817c1299461SWenhui Sheng default: /* other chip doesn't support SRIOV */
818a91d91b6STony Yi is_sriov = false;
819c1299461SWenhui Sheng DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type);
820c1299461SWenhui Sheng break;
821c1299461SWenhui Sheng }
822c1299461SWenhui Sheng }
823a91d91b6STony Yi
824a91d91b6STony Yi return is_sriov;
825a91d91b6STony Yi }
826a91d91b6STony Yi
amdgpu_virt_init_ras(struct amdgpu_device * adev)827a91d91b6STony Yi static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
828a91d91b6STony Yi {
829a91d91b6STony Yi ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1);
830a91d91b6STony Yi ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1);
831a91d91b6STony Yi
832a91d91b6STony Yi ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs,
833a91d91b6STony Yi RATELIMIT_MSG_ON_RELEASE);
834a91d91b6STony Yi ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs,
835a91d91b6STony Yi RATELIMIT_MSG_ON_RELEASE);
836a91d91b6STony Yi
837a91d91b6STony Yi mutex_init(&adev->virt.ras.ras_telemetry_mutex);
838a91d91b6STony Yi
839a91d91b6STony Yi adev->virt.ras.cper_rptr = 0;
840a91d91b6STony Yi }
841a91d91b6STony Yi
amdgpu_virt_init(struct amdgpu_device * adev)842a91d91b6STony Yi void amdgpu_virt_init(struct amdgpu_device *adev)
843a91d91b6STony Yi {
844a91d91b6STony Yi bool is_sriov = false;
845a91d91b6STony Yi uint32_t reg = amdgpu_virt_init_detect_asic(adev);
846a91d91b6STony Yi
847a91d91b6STony Yi is_sriov = amdgpu_virt_init_req_data(adev, reg);
848a91d91b6STony Yi
849a91d91b6STony Yi if (is_sriov)
850a91d91b6STony Yi amdgpu_virt_init_ras(adev);
8513aa0115dSMonk Liu }
85295a2f917SYintian Tao
amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device * adev)853f3167919SNirmoy Das static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
85495a2f917SYintian Tao {
85595a2f917SYintian Tao return amdgpu_sriov_is_debug(adev) ? true : false;
85695a2f917SYintian Tao }
85795a2f917SYintian Tao
amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device * adev)858f3167919SNirmoy Das static bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev)
859d32709daSYintian Tao {
860d32709daSYintian Tao return amdgpu_sriov_is_normal(adev) ? true : false;
861d32709daSYintian Tao }
862d32709daSYintian Tao
amdgpu_virt_enable_access_debugfs(struct amdgpu_device * adev)86395a2f917SYintian Tao int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
86495a2f917SYintian Tao {
865d32709daSYintian Tao if (!amdgpu_sriov_vf(adev) ||
866d32709daSYintian Tao amdgpu_virt_access_debugfs_is_kiq(adev))
86795a2f917SYintian Tao return 0;
86895a2f917SYintian Tao
869d32709daSYintian Tao if (amdgpu_virt_access_debugfs_is_mmio(adev))
87095a2f917SYintian Tao adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
87195a2f917SYintian Tao else
87295a2f917SYintian Tao return -EPERM;
87395a2f917SYintian Tao
87495a2f917SYintian Tao return 0;
87595a2f917SYintian Tao }
87695a2f917SYintian Tao
amdgpu_virt_disable_access_debugfs(struct amdgpu_device * adev)87795a2f917SYintian Tao void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
87895a2f917SYintian Tao {
87995a2f917SYintian Tao if (amdgpu_sriov_vf(adev))
88095a2f917SYintian Tao adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
88195a2f917SYintian Tao }
882a7f28103SKevin Wang
amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device * adev)883a7f28103SKevin Wang enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev)
884a7f28103SKevin Wang {
885a7f28103SKevin Wang enum amdgpu_sriov_vf_mode mode;
886a7f28103SKevin Wang
887a7f28103SKevin Wang if (amdgpu_sriov_vf(adev)) {
888a7f28103SKevin Wang if (amdgpu_sriov_is_pp_one_vf(adev))
889a7f28103SKevin Wang mode = SRIOV_VF_MODE_ONE_VF;
890a7f28103SKevin Wang else
891a7f28103SKevin Wang mode = SRIOV_VF_MODE_MULTI_VF;
892a7f28103SKevin Wang } else {
893a7f28103SKevin Wang mode = SRIOV_VF_MODE_BARE_METAL;
894a7f28103SKevin Wang }
895a7f28103SKevin Wang
896a7f28103SKevin Wang return mode;
897a7f28103SKevin Wang }
898ed9d2053SBokun Zhang
amdgpu_virt_pre_reset(struct amdgpu_device * adev)899f83cec3bSVictor Skvortsov void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
900f83cec3bSVictor Skvortsov {
901f83cec3bSVictor Skvortsov /* stop the data exchange thread */
902f83cec3bSVictor Skvortsov amdgpu_virt_fini_data_exchange(adev);
903f83cec3bSVictor Skvortsov amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
904f83cec3bSVictor Skvortsov }
905f83cec3bSVictor Skvortsov
amdgpu_virt_post_reset(struct amdgpu_device * adev)90683f24a8fSHorace Chen void amdgpu_virt_post_reset(struct amdgpu_device *adev)
90783f24a8fSHorace Chen {
9084e8303cfSLijo Lazar if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) {
90983f24a8fSHorace Chen /* force set to GFXOFF state after reset,
91083f24a8fSHorace Chen * to avoid some invalid operation before GC enable
91183f24a8fSHorace Chen */
91283f24a8fSHorace Chen adev->gfx.is_poweron = false;
91383f24a8fSHorace Chen }
914c8ad1bbbSLin.Cao
915c7d43556SJack Xiao adev->mes.ring[0].sched.ready = false;
91683f24a8fSHorace Chen }
91783f24a8fSHorace Chen
amdgpu_virt_fw_load_skip_check(struct amdgpu_device * adev,uint32_t ucode_id)918d9d86d08SHorace Chen bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id)
919d9d86d08SHorace Chen {
9204e8303cfSLijo Lazar switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
921f8bd7321SHorace Chen case IP_VERSION(13, 0, 0):
922f8bd7321SHorace Chen /* no vf autoload, white list */
923d9d86d08SHorace Chen if (ucode_id == AMDGPU_UCODE_ID_VCN1 ||
924d9d86d08SHorace Chen ucode_id == AMDGPU_UCODE_ID_VCN)
925d9d86d08SHorace Chen return false;
926d9d86d08SHorace Chen else
927d9d86d08SHorace Chen return true;
92830b59910SYuanShang case IP_VERSION(11, 0, 9):
92930b59910SYuanShang case IP_VERSION(11, 0, 7):
93030b59910SYuanShang /* black list for CHIP_NAVI12 and CHIP_SIENNA_CICHLID */
93130b59910SYuanShang if (ucode_id == AMDGPU_UCODE_ID_RLC_G
93230b59910SYuanShang || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
93330b59910SYuanShang || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
93430b59910SYuanShang || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
93530b59910SYuanShang || ucode_id == AMDGPU_UCODE_ID_SMC)
93630b59910SYuanShang return true;
93730b59910SYuanShang else
93830b59910SYuanShang return false;
939f8bd7321SHorace Chen case IP_VERSION(13, 0, 10):
940f8bd7321SHorace Chen /* white list */
941f8bd7321SHorace Chen if (ucode_id == AMDGPU_UCODE_ID_CAP
942f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP
943f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME
944f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC
945f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK
946f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK
947f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK
948f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK
949f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK
950f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK
951f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK
952f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK
953f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_MES
954f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA
955f8bd7321SHorace Chen || ucode_id == AMDGPU_UCODE_ID_CP_MES1
95660e9c7eeSJane Jian || ucode_id == AMDGPU_UCODE_ID_CP_MES1_DATA
95760e9c7eeSJane Jian || ucode_id == AMDGPU_UCODE_ID_VCN1
95860e9c7eeSJane Jian || ucode_id == AMDGPU_UCODE_ID_VCN)
959f8bd7321SHorace Chen return false;
960f8bd7321SHorace Chen else
961f8bd7321SHorace Chen return true;
962f8bd7321SHorace Chen default:
963f8bd7321SHorace Chen /* lagacy black list */
964d9d86d08SHorace Chen if (ucode_id == AMDGPU_UCODE_ID_SDMA0
965d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_SDMA1
966d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_SDMA2
967d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_SDMA3
968d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_SDMA4
969d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_SDMA5
970d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_SDMA6
971d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_SDMA7
972d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_RLC_G
973d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
974d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
975d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
976d9d86d08SHorace Chen || ucode_id == AMDGPU_UCODE_ID_SMC)
977d9d86d08SHorace Chen return true;
978f8bd7321SHorace Chen else
979d9d86d08SHorace Chen return false;
980d9d86d08SHorace Chen }
981f8bd7321SHorace Chen }
982d9d86d08SHorace Chen
amdgpu_virt_update_sriov_video_codec(struct amdgpu_device * adev,struct amdgpu_video_codec_info * encode,uint32_t encode_array_size,struct amdgpu_video_codec_info * decode,uint32_t decode_array_size)983ed9d2053SBokun Zhang void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
984ed9d2053SBokun Zhang struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
985ed9d2053SBokun Zhang struct amdgpu_video_codec_info *decode, uint32_t decode_array_size)
986ed9d2053SBokun Zhang {
987ed9d2053SBokun Zhang uint32_t i;
988ed9d2053SBokun Zhang
989ed9d2053SBokun Zhang if (!adev->virt.is_mm_bw_enabled)
990ed9d2053SBokun Zhang return;
991ed9d2053SBokun Zhang
992ed9d2053SBokun Zhang if (encode) {
993ed9d2053SBokun Zhang for (i = 0; i < encode_array_size; i++) {
994ed9d2053SBokun Zhang encode[i].max_width = adev->virt.encode_max_dimension_pixels;
995ed9d2053SBokun Zhang encode[i].max_pixels_per_frame = adev->virt.encode_max_frame_pixels;
996ed9d2053SBokun Zhang if (encode[i].max_width > 0)
997ed9d2053SBokun Zhang encode[i].max_height = encode[i].max_pixels_per_frame / encode[i].max_width;
998ed9d2053SBokun Zhang else
999ed9d2053SBokun Zhang encode[i].max_height = 0;
1000ed9d2053SBokun Zhang }
1001ed9d2053SBokun Zhang }
1002ed9d2053SBokun Zhang
1003ed9d2053SBokun Zhang if (decode) {
1004ed9d2053SBokun Zhang for (i = 0; i < decode_array_size; i++) {
1005ed9d2053SBokun Zhang decode[i].max_width = adev->virt.decode_max_dimension_pixels;
1006ed9d2053SBokun Zhang decode[i].max_pixels_per_frame = adev->virt.decode_max_frame_pixels;
1007ed9d2053SBokun Zhang if (decode[i].max_width > 0)
1008ed9d2053SBokun Zhang decode[i].max_height = decode[i].max_pixels_per_frame / decode[i].max_width;
1009ed9d2053SBokun Zhang else
1010ed9d2053SBokun Zhang decode[i].max_height = 0;
1011ed9d2053SBokun Zhang }
1012ed9d2053SBokun Zhang }
1013ed9d2053SBokun Zhang }
101429dbcac8SHawking Zhang
amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device * adev,u32 acc_flags,u32 hwip,bool write,u32 * rlcg_flag)101585150626SVictor Lu bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
1016381519dfSHawking Zhang u32 acc_flags, u32 hwip,
1017381519dfSHawking Zhang bool write, u32 *rlcg_flag)
101829dbcac8SHawking Zhang {
101929dbcac8SHawking Zhang bool ret = false;
102029dbcac8SHawking Zhang
102129dbcac8SHawking Zhang switch (hwip) {
102229dbcac8SHawking Zhang case GC_HWIP:
102329dbcac8SHawking Zhang if (amdgpu_sriov_reg_indirect_gc(adev)) {
102429dbcac8SHawking Zhang *rlcg_flag =
102529dbcac8SHawking Zhang write ? AMDGPU_RLCG_GC_WRITE : AMDGPU_RLCG_GC_READ;
102629dbcac8SHawking Zhang ret = true;
102729dbcac8SHawking Zhang /* only in new version, AMDGPU_REGS_NO_KIQ and
102829dbcac8SHawking Zhang * AMDGPU_REGS_RLC are enabled simultaneously */
102929dbcac8SHawking Zhang } else if ((acc_flags & AMDGPU_REGS_RLC) &&
103022b1df28SGuchun Chen !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) {
103129dbcac8SHawking Zhang *rlcg_flag = AMDGPU_RLCG_GC_WRITE_LEGACY;
103229dbcac8SHawking Zhang ret = true;
103329dbcac8SHawking Zhang }
103429dbcac8SHawking Zhang break;
103529dbcac8SHawking Zhang case MMHUB_HWIP:
103629dbcac8SHawking Zhang if (amdgpu_sriov_reg_indirect_mmhub(adev) &&
103729dbcac8SHawking Zhang (acc_flags & AMDGPU_REGS_RLC) && write) {
103829dbcac8SHawking Zhang *rlcg_flag = AMDGPU_RLCG_MMHUB_WRITE;
103929dbcac8SHawking Zhang ret = true;
104029dbcac8SHawking Zhang }
104129dbcac8SHawking Zhang break;
104229dbcac8SHawking Zhang default:
104329dbcac8SHawking Zhang break;
104429dbcac8SHawking Zhang }
104529dbcac8SHawking Zhang return ret;
104629dbcac8SHawking Zhang }
10475d447e29SHawking Zhang
amdgpu_virt_rlcg_reg_rw(struct amdgpu_device * adev,u32 offset,u32 v,u32 flag,u32 xcc_id)104885150626SVictor Lu u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
10495d447e29SHawking Zhang {
10505d447e29SHawking Zhang struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
10515d447e29SHawking Zhang uint32_t timeout = 50000;
10525d447e29SHawking Zhang uint32_t i, tmp;
10535d447e29SHawking Zhang uint32_t ret = 0;
1054d68cf992SGavin Wan void *scratch_reg0;
1055d68cf992SGavin Wan void *scratch_reg1;
1056d68cf992SGavin Wan void *scratch_reg2;
1057d68cf992SGavin Wan void *scratch_reg3;
1058d68cf992SGavin Wan void *spare_int;
1059dc0297f3SSrinivasan Shanmugam unsigned long flags;
10605d447e29SHawking Zhang
10615d447e29SHawking Zhang if (!adev->gfx.rlc.rlcg_reg_access_supported) {
10625d447e29SHawking Zhang dev_err(adev->dev,
10635d447e29SHawking Zhang "indirect registers access through rlcg is not available\n");
10645d447e29SHawking Zhang return 0;
10655d447e29SHawking Zhang }
10665d447e29SHawking Zhang
10678ed49dd1SVictor Lu if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) {
10688ed49dd1SVictor Lu dev_err(adev->dev, "invalid xcc\n");
10698ed49dd1SVictor Lu return 0;
10708ed49dd1SVictor Lu }
10718ed49dd1SVictor Lu
1072b3948ad1SYunxiang Li if (amdgpu_device_skip_hw_access(adev))
1073b3948ad1SYunxiang Li return 0;
1074b3948ad1SYunxiang Li
10758ed49dd1SVictor Lu reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id];
10765d447e29SHawking Zhang scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0;
10775d447e29SHawking Zhang scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1;
10785d447e29SHawking Zhang scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
10795d447e29SHawking Zhang scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3;
1080e864180eSVictor Skvortsov
1081dc0297f3SSrinivasan Shanmugam spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags);
1082e864180eSVictor Skvortsov
10835d447e29SHawking Zhang if (reg_access_ctrl->spare_int)
10845d447e29SHawking Zhang spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int;
10855d447e29SHawking Zhang
10865d447e29SHawking Zhang if (offset == reg_access_ctrl->grbm_cntl) {
10875d447e29SHawking Zhang /* if the target reg offset is grbm_cntl, write to scratch_reg2 */
10885d447e29SHawking Zhang writel(v, scratch_reg2);
1089fa9b4155SYifan Zha if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
1090fa9b4155SYifan Zha writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
10915d447e29SHawking Zhang } else if (offset == reg_access_ctrl->grbm_idx) {
10925d447e29SHawking Zhang /* if the target reg offset is grbm_idx, write to scratch_reg3 */
10935d447e29SHawking Zhang writel(v, scratch_reg3);
1094fa9b4155SYifan Zha if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
1095fa9b4155SYifan Zha writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
10965d447e29SHawking Zhang } else {
10975d447e29SHawking Zhang /*
10985d447e29SHawking Zhang * SCRATCH_REG0 = read/write value
10995d447e29SHawking Zhang * SCRATCH_REG1[30:28] = command
11005d447e29SHawking Zhang * SCRATCH_REG1[19:0] = address in dword
11018093383aSVictor Lu * SCRATCH_REG1[27:24] = Error reporting
11025d447e29SHawking Zhang */
11035d447e29SHawking Zhang writel(v, scratch_reg0);
11045d447e29SHawking Zhang writel((offset | flag), scratch_reg1);
11055d447e29SHawking Zhang if (reg_access_ctrl->spare_int)
11065d447e29SHawking Zhang writel(1, spare_int);
11075d447e29SHawking Zhang
11085d447e29SHawking Zhang for (i = 0; i < timeout; i++) {
11095d447e29SHawking Zhang tmp = readl(scratch_reg1);
1110aa79d380SVictor Skvortsov if (!(tmp & AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK))
11115d447e29SHawking Zhang break;
11125d447e29SHawking Zhang udelay(10);
11135d447e29SHawking Zhang }
11145d447e29SHawking Zhang
11158093383aSVictor Lu tmp = readl(scratch_reg1);
11168093383aSVictor Lu if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) {
11175d447e29SHawking Zhang if (amdgpu_sriov_rlcg_error_report_enabled(adev)) {
11185d447e29SHawking Zhang if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) {
11195d447e29SHawking Zhang dev_err(adev->dev,
11205d447e29SHawking Zhang "vfgate is disabled, rlcg failed to program reg: 0x%05x\n", offset);
11215d447e29SHawking Zhang } else if (tmp & AMDGPU_RLCG_WRONG_OPERATION_TYPE) {
11225d447e29SHawking Zhang dev_err(adev->dev,
11235d447e29SHawking Zhang "wrong operation type, rlcg failed to program reg: 0x%05x\n", offset);
11245d447e29SHawking Zhang } else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) {
11255d447e29SHawking Zhang dev_err(adev->dev,
11262f78f0d3SColin Ian King "register is not in range, rlcg failed to program reg: 0x%05x\n", offset);
11275d447e29SHawking Zhang } else {
11285d447e29SHawking Zhang dev_err(adev->dev,
11295d447e29SHawking Zhang "unknown error type, rlcg failed to program reg: 0x%05x\n", offset);
11305d447e29SHawking Zhang }
11315d447e29SHawking Zhang } else {
11325d447e29SHawking Zhang dev_err(adev->dev,
11335d447e29SHawking Zhang "timeout: rlcg faled to program reg: 0x%05x\n", offset);
11345d447e29SHawking Zhang }
11355d447e29SHawking Zhang }
11365d447e29SHawking Zhang }
11375d447e29SHawking Zhang
11385d447e29SHawking Zhang ret = readl(scratch_reg0);
1139e864180eSVictor Skvortsov
1140dc0297f3SSrinivasan Shanmugam spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags);
1141e864180eSVictor Skvortsov
11425d447e29SHawking Zhang return ret;
11435d447e29SHawking Zhang }
11445d447e29SHawking Zhang
amdgpu_sriov_wreg(struct amdgpu_device * adev,u32 offset,u32 value,u32 acc_flags,u32 hwip,u32 xcc_id)11455d447e29SHawking Zhang void amdgpu_sriov_wreg(struct amdgpu_device *adev,
11465d447e29SHawking Zhang u32 offset, u32 value,
11478ed49dd1SVictor Lu u32 acc_flags, u32 hwip, u32 xcc_id)
11485d447e29SHawking Zhang {
11495d447e29SHawking Zhang u32 rlcg_flag;
11505d447e29SHawking Zhang
1151b3948ad1SYunxiang Li if (amdgpu_device_skip_hw_access(adev))
1152b3948ad1SYunxiang Li return;
1153b3948ad1SYunxiang Li
11545d447e29SHawking Zhang if (!amdgpu_sriov_runtime(adev) &&
11555d447e29SHawking Zhang amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) {
11568ed49dd1SVictor Lu amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
11575d447e29SHawking Zhang return;
11585d447e29SHawking Zhang }
11595d447e29SHawking Zhang
11605d447e29SHawking Zhang if (acc_flags & AMDGPU_REGS_NO_KIQ)
11615d447e29SHawking Zhang WREG32_NO_KIQ(offset, value);
11625d447e29SHawking Zhang else
11635d447e29SHawking Zhang WREG32(offset, value);
11645d447e29SHawking Zhang }
11655d447e29SHawking Zhang
amdgpu_sriov_rreg(struct amdgpu_device * adev,u32 offset,u32 acc_flags,u32 hwip,u32 xcc_id)11665d447e29SHawking Zhang u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
11678ed49dd1SVictor Lu u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id)
11685d447e29SHawking Zhang {
11695d447e29SHawking Zhang u32 rlcg_flag;
11705d447e29SHawking Zhang
1171b3948ad1SYunxiang Li if (amdgpu_device_skip_hw_access(adev))
1172b3948ad1SYunxiang Li return 0;
1173b3948ad1SYunxiang Li
11745d447e29SHawking Zhang if (!amdgpu_sriov_runtime(adev) &&
11755d447e29SHawking Zhang amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag))
11768ed49dd1SVictor Lu return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, xcc_id);
11775d447e29SHawking Zhang
11785d447e29SHawking Zhang if (acc_flags & AMDGPU_REGS_NO_KIQ)
11795d447e29SHawking Zhang return RREG32_NO_KIQ(offset);
11805d447e29SHawking Zhang else
11815d447e29SHawking Zhang return RREG32(offset);
11825d447e29SHawking Zhang }
11839256e8d4SSurbhi Kakarya
amdgpu_sriov_xnack_support(struct amdgpu_device * adev)11849256e8d4SSurbhi Kakarya bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
11859256e8d4SSurbhi Kakarya {
11869256e8d4SSurbhi Kakarya bool xnack_mode = true;
11879256e8d4SSurbhi Kakarya
1188ed342a2eSLijo Lazar if (amdgpu_sriov_vf(adev) &&
1189ed342a2eSLijo Lazar amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
11909256e8d4SSurbhi Kakarya xnack_mode = false;
11919256e8d4SSurbhi Kakarya
11929256e8d4SSurbhi Kakarya return xnack_mode;
11939256e8d4SSurbhi Kakarya }
1194907fec2dSVictor Skvortsov
amdgpu_virt_get_ras_capability(struct amdgpu_device * adev)1195907fec2dSVictor Skvortsov bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev)
1196907fec2dSVictor Skvortsov {
1197907fec2dSVictor Skvortsov struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1198907fec2dSVictor Skvortsov
1199907fec2dSVictor Skvortsov if (!amdgpu_sriov_ras_caps_en(adev))
1200907fec2dSVictor Skvortsov return false;
1201907fec2dSVictor Skvortsov
1202907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_umc)
1203907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__UMC);
1204907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_sdma)
1205907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SDMA);
1206907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_gfx)
1207907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__GFX);
1208907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_mmhub)
1209907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MMHUB);
1210907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_athub)
1211907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__ATHUB);
1212907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_pcie_bif)
1213907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__PCIE_BIF);
1214907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_hdp)
1215907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__HDP);
1216907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_xgmi_wafl)
1217907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__XGMI_WAFL);
1218907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_df)
1219907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__DF);
1220907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_smn)
1221907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SMN);
1222907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_sem)
1223907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SEM);
1224907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_mp0)
1225907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP0);
1226907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_mp1)
1227907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP1);
1228907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_fuse)
1229907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__FUSE);
1230907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_mca)
1231907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MCA);
1232907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_vcn)
1233907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__VCN);
1234907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_jpeg)
1235907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__JPEG);
1236907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_ih)
1237907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__IH);
1238907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.block_mpio)
1239907fec2dSVictor Skvortsov adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MPIO);
1240907fec2dSVictor Skvortsov
1241907fec2dSVictor Skvortsov if (adev->virt.ras_en_caps.bits.poison_propogation_mode)
1242907fec2dSVictor Skvortsov con->poison_supported = true; /* Poison is handled by host */
1243907fec2dSVictor Skvortsov
1244907fec2dSVictor Skvortsov return true;
1245907fec2dSVictor Skvortsov }
124684a2947eSVictor Skvortsov
124784a2947eSVictor Skvortsov static inline enum amd_sriov_ras_telemetry_gpu_block
amdgpu_ras_block_to_sriov(struct amdgpu_device * adev,enum amdgpu_ras_block block)124884a2947eSVictor Skvortsov amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block block) {
124984a2947eSVictor Skvortsov switch (block) {
125084a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__UMC:
125184a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_UMC;
125284a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__SDMA:
125384a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_SDMA;
125484a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__GFX:
125584a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_GFX;
125684a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__MMHUB:
125784a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_MMHUB;
125884a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__ATHUB:
125984a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_ATHUB;
126084a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__PCIE_BIF:
126184a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF;
126284a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__HDP:
126384a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_HDP;
126484a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__XGMI_WAFL:
126584a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL;
126684a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__DF:
126784a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_DF;
126884a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__SMN:
126984a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_SMN;
127084a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__SEM:
127184a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_SEM;
127284a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__MP0:
127384a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_MP0;
127484a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__MP1:
127584a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_MP1;
127684a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__FUSE:
127784a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_FUSE;
127884a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__MCA:
127984a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_MCA;
128084a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__VCN:
128184a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_VCN;
128284a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__JPEG:
128384a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_JPEG;
128484a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__IH:
128584a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_IH;
128684a2947eSVictor Skvortsov case AMDGPU_RAS_BLOCK__MPIO:
128784a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_MPIO;
128884a2947eSVictor Skvortsov default:
128904893397SVictor Skvortsov DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n",
129004893397SVictor Skvortsov block);
129184a2947eSVictor Skvortsov return RAS_TELEMETRY_GPU_BLOCK_COUNT;
129284a2947eSVictor Skvortsov }
129384a2947eSVictor Skvortsov }
129484a2947eSVictor Skvortsov
amdgpu_virt_cache_host_error_counts(struct amdgpu_device * adev,struct amdsriov_ras_telemetry * host_telemetry)129584a2947eSVictor Skvortsov static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
129684a2947eSVictor Skvortsov struct amdsriov_ras_telemetry *host_telemetry)
129784a2947eSVictor Skvortsov {
129884a2947eSVictor Skvortsov struct amd_sriov_ras_telemetry_error_count *tmp = NULL;
129984a2947eSVictor Skvortsov uint32_t checksum, used_size;
130084a2947eSVictor Skvortsov
130184a2947eSVictor Skvortsov checksum = host_telemetry->header.checksum;
130284a2947eSVictor Skvortsov used_size = host_telemetry->header.used_size;
130384a2947eSVictor Skvortsov
130484a2947eSVictor Skvortsov if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
130584a2947eSVictor Skvortsov return 0;
130684a2947eSVictor Skvortsov
1307a21ab06bSMirsad Todorovac tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
130884a2947eSVictor Skvortsov if (!tmp)
130984a2947eSVictor Skvortsov return -ENOMEM;
131084a2947eSVictor Skvortsov
131184a2947eSVictor Skvortsov if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
131284a2947eSVictor Skvortsov goto out;
131384a2947eSVictor Skvortsov
131484a2947eSVictor Skvortsov memcpy(&adev->virt.count_cache, tmp,
131584a2947eSVictor Skvortsov min(used_size, sizeof(adev->virt.count_cache)));
131684a2947eSVictor Skvortsov out:
131784a2947eSVictor Skvortsov kfree(tmp);
131884a2947eSVictor Skvortsov
131984a2947eSVictor Skvortsov return 0;
132084a2947eSVictor Skvortsov }
132184a2947eSVictor Skvortsov
amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device * adev,bool force_update)132284a2947eSVictor Skvortsov static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bool force_update)
132384a2947eSVictor Skvortsov {
132484a2947eSVictor Skvortsov struct amdgpu_virt *virt = &adev->virt;
132584a2947eSVictor Skvortsov
132684a2947eSVictor Skvortsov /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
132784a2947eSVictor Skvortsov * will ignore incoming guest messages. Ratelimit the guest messages to
132884a2947eSVictor Skvortsov * prevent guest self DOS.
132984a2947eSVictor Skvortsov */
1330a91d91b6STony Yi if (__ratelimit(&virt->ras.ras_error_cnt_rs) || force_update) {
1331a91d91b6STony Yi mutex_lock(&virt->ras.ras_telemetry_mutex);
133284a2947eSVictor Skvortsov if (!virt->ops->req_ras_err_count(adev))
133384a2947eSVictor Skvortsov amdgpu_virt_cache_host_error_counts(adev,
1334a91d91b6STony Yi virt->fw_reserve.ras_telemetry);
1335a91d91b6STony Yi mutex_unlock(&virt->ras.ras_telemetry_mutex);
133684a2947eSVictor Skvortsov }
133784a2947eSVictor Skvortsov
133884a2947eSVictor Skvortsov return 0;
133984a2947eSVictor Skvortsov }
134084a2947eSVictor Skvortsov
134184a2947eSVictor Skvortsov /* Bypass ACA interface and query ECC counts directly from host */
amdgpu_virt_req_ras_err_count(struct amdgpu_device * adev,enum amdgpu_ras_block block,struct ras_err_data * err_data)134284a2947eSVictor Skvortsov int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
134384a2947eSVictor Skvortsov struct ras_err_data *err_data)
134484a2947eSVictor Skvortsov {
134584a2947eSVictor Skvortsov enum amd_sriov_ras_telemetry_gpu_block sriov_block;
134684a2947eSVictor Skvortsov
134784a2947eSVictor Skvortsov sriov_block = amdgpu_ras_block_to_sriov(adev, block);
134884a2947eSVictor Skvortsov
134984a2947eSVictor Skvortsov if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
135084a2947eSVictor Skvortsov !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
135184a2947eSVictor Skvortsov return -EOPNOTSUPP;
135284a2947eSVictor Skvortsov
135384a2947eSVictor Skvortsov /* Host Access may be lost during reset, just return last cached data. */
135484a2947eSVictor Skvortsov if (down_read_trylock(&adev->reset_domain->sem)) {
135584a2947eSVictor Skvortsov amdgpu_virt_req_ras_err_count_internal(adev, false);
135684a2947eSVictor Skvortsov up_read(&adev->reset_domain->sem);
135784a2947eSVictor Skvortsov }
135884a2947eSVictor Skvortsov
135984a2947eSVictor Skvortsov err_data->ue_count = adev->virt.count_cache.block[sriov_block].ue_count;
136084a2947eSVictor Skvortsov err_data->ce_count = adev->virt.count_cache.block[sriov_block].ce_count;
136184a2947eSVictor Skvortsov err_data->de_count = adev->virt.count_cache.block[sriov_block].de_count;
136284a2947eSVictor Skvortsov
136384a2947eSVictor Skvortsov return 0;
136484a2947eSVictor Skvortsov }
136584a2947eSVictor Skvortsov
1366a91d91b6STony Yi static int
amdgpu_virt_write_cpers_to_ring(struct amdgpu_device * adev,struct amdsriov_ras_telemetry * host_telemetry,u32 * more)1367a91d91b6STony Yi amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
1368a91d91b6STony Yi struct amdsriov_ras_telemetry *host_telemetry,
1369a91d91b6STony Yi u32 *more)
1370a91d91b6STony Yi {
1371a91d91b6STony Yi struct amd_sriov_ras_cper_dump *cper_dump = NULL;
1372a91d91b6STony Yi struct cper_hdr *entry = NULL;
1373a91d91b6STony Yi struct amdgpu_ring *ring = &adev->cper.ring_buf;
1374a91d91b6STony Yi uint32_t checksum, used_size, i;
1375a91d91b6STony Yi int ret = 0;
1376a91d91b6STony Yi
1377a91d91b6STony Yi checksum = host_telemetry->header.checksum;
1378a91d91b6STony Yi used_size = host_telemetry->header.used_size;
1379a91d91b6STony Yi
1380a91d91b6STony Yi if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
1381a91d91b6STony Yi return 0;
1382a91d91b6STony Yi
1383a91d91b6STony Yi cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
1384a91d91b6STony Yi if (!cper_dump)
1385a91d91b6STony Yi return -ENOMEM;
1386a91d91b6STony Yi
1387a91d91b6STony Yi if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0))
1388a91d91b6STony Yi goto out;
1389a91d91b6STony Yi
1390a91d91b6STony Yi *more = cper_dump->more;
1391a91d91b6STony Yi
1392a91d91b6STony Yi if (cper_dump->wptr < adev->virt.ras.cper_rptr) {
1393a91d91b6STony Yi dev_warn(
1394a91d91b6STony Yi adev->dev,
1395a91d91b6STony Yi "guest specified rptr that was too high! guest rptr: 0x%llx, host rptr: 0x%llx\n",
1396a91d91b6STony Yi adev->virt.ras.cper_rptr, cper_dump->wptr);
1397a91d91b6STony Yi
1398a91d91b6STony Yi adev->virt.ras.cper_rptr = cper_dump->wptr;
1399a91d91b6STony Yi goto out;
1400a91d91b6STony Yi }
1401a91d91b6STony Yi
1402a91d91b6STony Yi entry = (struct cper_hdr *)&cper_dump->buf[0];
1403a91d91b6STony Yi
1404a91d91b6STony Yi for (i = 0; i < cper_dump->count; i++) {
1405a91d91b6STony Yi amdgpu_cper_ring_write(ring, entry, entry->record_length);
1406a91d91b6STony Yi entry = (struct cper_hdr *)((char *)entry +
1407a91d91b6STony Yi entry->record_length);
1408a91d91b6STony Yi }
1409a91d91b6STony Yi
1410a91d91b6STony Yi if (cper_dump->overflow_count)
1411a91d91b6STony Yi dev_warn(adev->dev,
1412a91d91b6STony Yi "host reported CPER overflow of 0x%llx entries!\n",
1413a91d91b6STony Yi cper_dump->overflow_count);
1414a91d91b6STony Yi
1415a91d91b6STony Yi adev->virt.ras.cper_rptr = cper_dump->wptr;
1416a91d91b6STony Yi out:
1417a91d91b6STony Yi kfree(cper_dump);
1418a91d91b6STony Yi
1419a91d91b6STony Yi return ret;
1420a91d91b6STony Yi }
1421a91d91b6STony Yi
amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device * adev)1422a91d91b6STony Yi static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
1423a91d91b6STony Yi {
1424a91d91b6STony Yi struct amdgpu_virt *virt = &adev->virt;
1425a91d91b6STony Yi int ret = 0;
1426a91d91b6STony Yi uint32_t more = 0;
1427a91d91b6STony Yi
1428a91d91b6STony Yi if (!amdgpu_sriov_ras_cper_en(adev))
1429a91d91b6STony Yi return -EOPNOTSUPP;
1430a91d91b6STony Yi
1431a91d91b6STony Yi do {
1432a91d91b6STony Yi if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr))
1433a91d91b6STony Yi ret = amdgpu_virt_write_cpers_to_ring(
1434a91d91b6STony Yi adev, virt->fw_reserve.ras_telemetry, &more);
1435a91d91b6STony Yi else
1436a91d91b6STony Yi ret = 0;
1437a91d91b6STony Yi } while (more);
1438a91d91b6STony Yi
1439a91d91b6STony Yi return ret;
1440a91d91b6STony Yi }
1441a91d91b6STony Yi
amdgpu_virt_req_ras_cper_dump(struct amdgpu_device * adev,bool force_update)1442a91d91b6STony Yi int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
1443a91d91b6STony Yi {
1444a91d91b6STony Yi struct amdgpu_virt *virt = &adev->virt;
1445a91d91b6STony Yi int ret = 0;
1446a91d91b6STony Yi
1447a91d91b6STony Yi if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
1448a91d91b6STony Yi down_read_trylock(&adev->reset_domain->sem)) {
1449a91d91b6STony Yi mutex_lock(&virt->ras.ras_telemetry_mutex);
1450a91d91b6STony Yi ret = amdgpu_virt_req_ras_cper_dump_internal(adev);
1451a91d91b6STony Yi mutex_unlock(&virt->ras.ras_telemetry_mutex);
1452a91d91b6STony Yi up_read(&adev->reset_domain->sem);
1453a91d91b6STony Yi }
1454a91d91b6STony Yi
1455a91d91b6STony Yi return ret;
1456a91d91b6STony Yi }
1457a91d91b6STony Yi
amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device * adev)145884a2947eSVictor Skvortsov int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev)
145984a2947eSVictor Skvortsov {
146084a2947eSVictor Skvortsov unsigned long ue_count, ce_count;
146184a2947eSVictor Skvortsov
146284a2947eSVictor Skvortsov if (amdgpu_sriov_ras_telemetry_en(adev)) {
146384a2947eSVictor Skvortsov amdgpu_virt_req_ras_err_count_internal(adev, true);
146484a2947eSVictor Skvortsov amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL);
146584a2947eSVictor Skvortsov }
146684a2947eSVictor Skvortsov
146784a2947eSVictor Skvortsov return 0;
146884a2947eSVictor Skvortsov }
146904893397SVictor Skvortsov
amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device * adev,enum amdgpu_ras_block block)147004893397SVictor Skvortsov bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
147104893397SVictor Skvortsov enum amdgpu_ras_block block)
147204893397SVictor Skvortsov {
147304893397SVictor Skvortsov enum amd_sriov_ras_telemetry_gpu_block sriov_block;
147404893397SVictor Skvortsov
147504893397SVictor Skvortsov sriov_block = amdgpu_ras_block_to_sriov(adev, block);
147604893397SVictor Skvortsov
147704893397SVictor Skvortsov if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
147804893397SVictor Skvortsov !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
147904893397SVictor Skvortsov return false;
148004893397SVictor Skvortsov
148104893397SVictor Skvortsov return true;
148204893397SVictor Skvortsov }
1483