drm/amdgpu: add spin lock to protect freed list in vm (v2)
[deliverable/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
28#include "vid.h"
29#include "amdgpu_ucode.h"
30#include "clearstate_vi.h"
31
32#include "gmc/gmc_8_2_d.h"
33#include "gmc/gmc_8_2_sh_mask.h"
34
35#include "oss/oss_3_0_d.h"
36#include "oss/oss_3_0_sh_mask.h"
37
38#include "bif/bif_5_0_d.h"
39#include "bif/bif_5_0_sh_mask.h"
40
41#include "gca/gfx_8_0_d.h"
42#include "gca/gfx_8_0_enum.h"
43#include "gca/gfx_8_0_sh_mask.h"
44#include "gca/gfx_8_0_enum.h"
45
46#include "uvd/uvd_5_0_d.h"
47#include "uvd/uvd_5_0_sh_mask.h"
48
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
52#define GFX8_NUM_GFX_RINGS 1
53#define GFX8_NUM_COMPUTE_RINGS 8
54
55#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58
59#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68
c65444fe
JZ
69MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
70MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
71MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
72MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
73MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
74MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
75
e3c7656c
SL
76MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
77MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
78MODULE_FIRMWARE("amdgpu/stoney_me.bin");
79MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
80MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
81
c65444fe
JZ
82MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
83MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
84MODULE_FIRMWARE("amdgpu/tonga_me.bin");
85MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
86MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
87MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
88
89MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
90MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
91MODULE_FIRMWARE("amdgpu/topaz_me.bin");
92MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
93MODULE_FIRMWARE("amdgpu/topaz_mec2.bin");
94MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 95
af15a2d5
DZ
96MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
97MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
98MODULE_FIRMWARE("amdgpu/fiji_me.bin");
99MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
100MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
101MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
102
aaa36a97
AD
103static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
104{
105 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
106 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
107 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
108 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
109 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
110 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
111 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
112 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
113 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
114 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
115 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
116 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
117 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
118 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
119 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
120 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
121};
122
123static const u32 golden_settings_tonga_a11[] =
124{
125 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
126 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
127 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
128 mmGB_GPU_ID, 0x0000000f, 0x00000000,
129 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
130 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
131 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
6a00a09e 132 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
133 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
134 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 135 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
136 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
137 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
138 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 139 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
140};
141
142static const u32 tonga_golden_common_all[] =
143{
144 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
145 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
146 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
147 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
148 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
149 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
150 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
151 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
152};
153
154static const u32 tonga_mgcg_cgcg_init[] =
155{
156 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
157 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
158 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
159 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
160 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
161 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
162 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
163 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
164 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
165 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
166 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
167 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
168 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
169 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
170 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
171 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
172 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
173 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
174 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
175 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
176 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
177 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
178 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
179 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
180 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
181 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
182 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
183 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
184 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
185 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
186 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
187 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
188 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
189 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
190 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
191 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
192 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
193 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
194 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
195 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
196 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
197 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
198 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
199 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
200 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
201 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
202 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
203 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
204 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
205 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
206 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
207 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
215 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
220 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
225 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
228 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
229 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
230 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
231};
232
af15a2d5
DZ
233static const u32 fiji_golden_common_all[] =
234{
235 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
236 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
237 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 238 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
239 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
240 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
241 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
242 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
243 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
244 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
245};
246
247static const u32 golden_settings_fiji_a10[] =
248{
249 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
250 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
251 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 252 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
253 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
254 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 255 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
256 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
257 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 258 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 259 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
260};
261
262static const u32 fiji_mgcg_cgcg_init[] =
263{
a7ca8ef9 264 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
265 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
266 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
267 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
268 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
269 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
270 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
271 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
272 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
273 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
274 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
275 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
276 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
277 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
278 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
279 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
280 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
281 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
282 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
283 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
284 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
285 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
286 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
287 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
288 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
289 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
290 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
291 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
292 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
293 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
294 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
295 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
296 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
297 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
298 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
299};
300
aaa36a97
AD
301static const u32 golden_settings_iceland_a11[] =
302{
303 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
304 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
305 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
306 mmGB_GPU_ID, 0x0000000f, 0x00000000,
307 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
308 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
309 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
310 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
6a00a09e 311 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
312 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
313 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 314 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
315 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
316 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
317 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
318};
319
320static const u32 iceland_golden_common_all[] =
321{
322 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
323 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
324 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
325 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
326 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
327 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
328 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
329 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
330};
331
332static const u32 iceland_mgcg_cgcg_init[] =
333{
334 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
335 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
336 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
337 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
338 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
339 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
340 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
341 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
342 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
343 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
344 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
345 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
346 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
347 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
348 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
349 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
350 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
351 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
352 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
353 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
354 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
355 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
356 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
357 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
358 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
359 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
360 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
361 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
362 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
363 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
364 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
365 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
366 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
367 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
368 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
369 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
370 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
371 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
372 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
373 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
374 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
375 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
376 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
377 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
378 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
379 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
380 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
381 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
382 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
383 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
384 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
385 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
386 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
387 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
388 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
389 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
390 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
391 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
392 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
393 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
394 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
395 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
396 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
397 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
398};
399
400static const u32 cz_golden_settings_a11[] =
401{
402 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
403 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
404 mmGB_GPU_ID, 0x0000000f, 0x00000000,
405 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
406 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
6a00a09e 407 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 408 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
6a00a09e 409 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
410 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
411 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
412};
413
414static const u32 cz_golden_common_all[] =
415{
416 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
417 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
418 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
419 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
420 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
421 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
422 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
423 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
424};
425
426static const u32 cz_mgcg_cgcg_init[] =
427{
428 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
429 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
430 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
431 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
432 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
433 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
434 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
435 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
436 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
437 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
439 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
441 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
442 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
446 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
447 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
448 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
449 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
450 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
453 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
454 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
455 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
456 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
457 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
458 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
459 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
460 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
461 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
462 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
463 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
464 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
465 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
466 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
467 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
468 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
469 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
470 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
471 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
472 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
473 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
474 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
475 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
476 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
477 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
478 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
479 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
480 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
481 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
482 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
483 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
484 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
485 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
486 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
487 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
488 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
489 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
490 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
491 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
492 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
493 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
494 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
495 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
496 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
497 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
498 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
499 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
500 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
501 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
502 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
503};
504
e3c7656c
SL
505static const u32 stoney_golden_settings_a11[] =
506{
507 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
508 mmGB_GPU_ID, 0x0000000f, 0x00000000,
509 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
510 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
511 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
512 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
513 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
514 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
515 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
516 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
517};
518
519static const u32 stoney_golden_common_all[] =
520{
521 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
522 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
523 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
524 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
525 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
526 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
527 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
528 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
529};
530
531static const u32 stoney_mgcg_cgcg_init[] =
532{
533 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
534 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
535 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
536 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
537 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
538 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
539};
540
aaa36a97
AD
541static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
542static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
543static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
544
545static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
546{
547 switch (adev->asic_type) {
548 case CHIP_TOPAZ:
549 amdgpu_program_register_sequence(adev,
550 iceland_mgcg_cgcg_init,
551 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
552 amdgpu_program_register_sequence(adev,
553 golden_settings_iceland_a11,
554 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
555 amdgpu_program_register_sequence(adev,
556 iceland_golden_common_all,
557 (const u32)ARRAY_SIZE(iceland_golden_common_all));
558 break;
af15a2d5
DZ
559 case CHIP_FIJI:
560 amdgpu_program_register_sequence(adev,
561 fiji_mgcg_cgcg_init,
562 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
563 amdgpu_program_register_sequence(adev,
564 golden_settings_fiji_a10,
565 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
566 amdgpu_program_register_sequence(adev,
567 fiji_golden_common_all,
568 (const u32)ARRAY_SIZE(fiji_golden_common_all));
569 break;
570
aaa36a97
AD
571 case CHIP_TONGA:
572 amdgpu_program_register_sequence(adev,
573 tonga_mgcg_cgcg_init,
574 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
575 amdgpu_program_register_sequence(adev,
576 golden_settings_tonga_a11,
577 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
578 amdgpu_program_register_sequence(adev,
579 tonga_golden_common_all,
580 (const u32)ARRAY_SIZE(tonga_golden_common_all));
581 break;
582 case CHIP_CARRIZO:
583 amdgpu_program_register_sequence(adev,
584 cz_mgcg_cgcg_init,
585 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
586 amdgpu_program_register_sequence(adev,
587 cz_golden_settings_a11,
588 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
589 amdgpu_program_register_sequence(adev,
590 cz_golden_common_all,
591 (const u32)ARRAY_SIZE(cz_golden_common_all));
592 break;
e3c7656c
SL
593 case CHIP_STONEY:
594 amdgpu_program_register_sequence(adev,
595 stoney_mgcg_cgcg_init,
596 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
597 amdgpu_program_register_sequence(adev,
598 stoney_golden_settings_a11,
599 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
600 amdgpu_program_register_sequence(adev,
601 stoney_golden_common_all,
602 (const u32)ARRAY_SIZE(stoney_golden_common_all));
603 break;
aaa36a97
AD
604 default:
605 break;
606 }
607}
608
609static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
610{
611 int i;
612
613 adev->gfx.scratch.num_reg = 7;
614 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
615 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
616 adev->gfx.scratch.free[i] = true;
617 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
618 }
619}
620
621static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
622{
623 struct amdgpu_device *adev = ring->adev;
624 uint32_t scratch;
625 uint32_t tmp = 0;
626 unsigned i;
627 int r;
628
629 r = amdgpu_gfx_scratch_get(adev, &scratch);
630 if (r) {
631 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
632 return r;
633 }
634 WREG32(scratch, 0xCAFEDEAD);
635 r = amdgpu_ring_lock(ring, 3);
636 if (r) {
637 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
638 ring->idx, r);
639 amdgpu_gfx_scratch_free(adev, scratch);
640 return r;
641 }
642 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
643 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
644 amdgpu_ring_write(ring, 0xDEADBEEF);
645 amdgpu_ring_unlock_commit(ring);
646
647 for (i = 0; i < adev->usec_timeout; i++) {
648 tmp = RREG32(scratch);
649 if (tmp == 0xDEADBEEF)
650 break;
651 DRM_UDELAY(1);
652 }
653 if (i < adev->usec_timeout) {
654 DRM_INFO("ring test on %d succeeded in %d usecs\n",
655 ring->idx, i);
656 } else {
657 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
658 ring->idx, scratch, tmp);
659 r = -EINVAL;
660 }
661 amdgpu_gfx_scratch_free(adev, scratch);
662 return r;
663}
664
665static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
666{
667 struct amdgpu_device *adev = ring->adev;
668 struct amdgpu_ib ib;
1763552e 669 struct fence *f = NULL;
aaa36a97
AD
670 uint32_t scratch;
671 uint32_t tmp = 0;
672 unsigned i;
673 int r;
674
675 r = amdgpu_gfx_scratch_get(adev, &scratch);
676 if (r) {
677 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
678 return r;
679 }
680 WREG32(scratch, 0xCAFEDEAD);
b203dd95 681 memset(&ib, 0, sizeof(ib));
aaa36a97
AD
682 r = amdgpu_ib_get(ring, NULL, 256, &ib);
683 if (r) {
684 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
42d13693 685 goto err1;
aaa36a97
AD
686 }
687 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
688 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
689 ib.ptr[2] = 0xDEADBEEF;
690 ib.length_dw = 3;
42d13693
CZ
691
692 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1763552e
CZ
693 AMDGPU_FENCE_OWNER_UNDEFINED,
694 &f);
42d13693
CZ
695 if (r)
696 goto err2;
697
1763552e 698 r = fence_wait(f, false);
aaa36a97
AD
699 if (r) {
700 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
42d13693 701 goto err2;
aaa36a97
AD
702 }
703 for (i = 0; i < adev->usec_timeout; i++) {
704 tmp = RREG32(scratch);
705 if (tmp == 0xDEADBEEF)
706 break;
707 DRM_UDELAY(1);
708 }
709 if (i < adev->usec_timeout) {
710 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
42d13693
CZ
711 ring->idx, i);
712 goto err2;
aaa36a97
AD
713 } else {
714 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
715 scratch, tmp);
716 r = -EINVAL;
717 }
42d13693 718err2:
281b4223 719 fence_put(f);
aaa36a97 720 amdgpu_ib_free(adev, &ib);
42d13693
CZ
721err1:
722 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
723 return r;
724}
725
726static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
727{
728 const char *chip_name;
729 char fw_name[30];
730 int err;
731 struct amdgpu_firmware_info *info = NULL;
732 const struct common_firmware_header *header = NULL;
595fd013 733 const struct gfx_firmware_header_v1_0 *cp_hdr;
aaa36a97
AD
734
735 DRM_DEBUG("\n");
736
737 switch (adev->asic_type) {
738 case CHIP_TOPAZ:
739 chip_name = "topaz";
740 break;
741 case CHIP_TONGA:
742 chip_name = "tonga";
743 break;
744 case CHIP_CARRIZO:
745 chip_name = "carrizo";
746 break;
af15a2d5
DZ
747 case CHIP_FIJI:
748 chip_name = "fiji";
749 break;
e3c7656c
SL
750 case CHIP_STONEY:
751 chip_name = "stoney";
752 break;
aaa36a97
AD
753 default:
754 BUG();
755 }
756
c65444fe 757 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
758 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
759 if (err)
760 goto out;
761 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
762 if (err)
763 goto out;
595fd013
JZ
764 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
765 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
766 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 767
c65444fe 768 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
769 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
770 if (err)
771 goto out;
772 err = amdgpu_ucode_validate(adev->gfx.me_fw);
773 if (err)
774 goto out;
595fd013
JZ
775 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
776 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
777 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 778
c65444fe 779 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
780 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
781 if (err)
782 goto out;
783 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
784 if (err)
785 goto out;
595fd013
JZ
786 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
787 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
788 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 789
c65444fe 790 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
791 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
792 if (err)
793 goto out;
794 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
595fd013
JZ
795 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
796 adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
797 adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 798
c65444fe 799 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
800 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
801 if (err)
802 goto out;
803 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
804 if (err)
805 goto out;
595fd013
JZ
806 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
807 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
808 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 809
e3c7656c
SL
810 if (adev->asic_type != CHIP_STONEY) {
811 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
812 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
813 if (!err) {
814 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
815 if (err)
816 goto out;
817 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
818 adev->gfx.mec2_fw->data;
819 adev->gfx.mec2_fw_version =
820 le32_to_cpu(cp_hdr->header.ucode_version);
821 adev->gfx.mec2_feature_version =
822 le32_to_cpu(cp_hdr->ucode_feature_version);
823 } else {
824 err = 0;
825 adev->gfx.mec2_fw = NULL;
826 }
aaa36a97
AD
827 }
828
829 if (adev->firmware.smu_load) {
830 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
831 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
832 info->fw = adev->gfx.pfp_fw;
833 header = (const struct common_firmware_header *)info->fw->data;
834 adev->firmware.fw_size +=
835 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
836
837 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
838 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
839 info->fw = adev->gfx.me_fw;
840 header = (const struct common_firmware_header *)info->fw->data;
841 adev->firmware.fw_size +=
842 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
843
844 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
845 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
846 info->fw = adev->gfx.ce_fw;
847 header = (const struct common_firmware_header *)info->fw->data;
848 adev->firmware.fw_size +=
849 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
850
851 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
852 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
853 info->fw = adev->gfx.rlc_fw;
854 header = (const struct common_firmware_header *)info->fw->data;
855 adev->firmware.fw_size +=
856 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
857
858 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
859 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
860 info->fw = adev->gfx.mec_fw;
861 header = (const struct common_firmware_header *)info->fw->data;
862 adev->firmware.fw_size +=
863 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
864
865 if (adev->gfx.mec2_fw) {
866 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
867 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
868 info->fw = adev->gfx.mec2_fw;
869 header = (const struct common_firmware_header *)info->fw->data;
870 adev->firmware.fw_size +=
871 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
872 }
873
874 }
875
876out:
877 if (err) {
878 dev_err(adev->dev,
879 "gfx8: Failed to load firmware \"%s\"\n",
880 fw_name);
881 release_firmware(adev->gfx.pfp_fw);
882 adev->gfx.pfp_fw = NULL;
883 release_firmware(adev->gfx.me_fw);
884 adev->gfx.me_fw = NULL;
885 release_firmware(adev->gfx.ce_fw);
886 adev->gfx.ce_fw = NULL;
887 release_firmware(adev->gfx.rlc_fw);
888 adev->gfx.rlc_fw = NULL;
889 release_firmware(adev->gfx.mec_fw);
890 adev->gfx.mec_fw = NULL;
891 release_firmware(adev->gfx.mec2_fw);
892 adev->gfx.mec2_fw = NULL;
893 }
894 return err;
895}
896
897static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
898{
899 int r;
900
901 if (adev->gfx.mec.hpd_eop_obj) {
902 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
903 if (unlikely(r != 0))
904 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
905 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
906 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
907
908 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
909 adev->gfx.mec.hpd_eop_obj = NULL;
910 }
911}
912
913#define MEC_HPD_SIZE 2048
914
915static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
916{
917 int r;
918 u32 *hpd;
919
920 /*
921 * we assign only 1 pipe because all other pipes will
922 * be handled by KFD
923 */
924 adev->gfx.mec.num_mec = 1;
925 adev->gfx.mec.num_pipe = 1;
926 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
927
928 if (adev->gfx.mec.hpd_eop_obj == NULL) {
929 r = amdgpu_bo_create(adev,
930 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
931 PAGE_SIZE, true,
72d7668b 932 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
933 &adev->gfx.mec.hpd_eop_obj);
934 if (r) {
935 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
936 return r;
937 }
938 }
939
940 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
941 if (unlikely(r != 0)) {
942 gfx_v8_0_mec_fini(adev);
943 return r;
944 }
945 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
946 &adev->gfx.mec.hpd_eop_gpu_addr);
947 if (r) {
948 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
949 gfx_v8_0_mec_fini(adev);
950 return r;
951 }
952 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
953 if (r) {
954 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
955 gfx_v8_0_mec_fini(adev);
956 return r;
957 }
958
959 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
960
961 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
962 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
963
964 return 0;
965}
966
ccba7691
AD
967static const u32 vgpr_init_compute_shader[] =
968{
969 0x7e000209, 0x7e020208,
970 0x7e040207, 0x7e060206,
971 0x7e080205, 0x7e0a0204,
972 0x7e0c0203, 0x7e0e0202,
973 0x7e100201, 0x7e120200,
974 0x7e140209, 0x7e160208,
975 0x7e180207, 0x7e1a0206,
976 0x7e1c0205, 0x7e1e0204,
977 0x7e200203, 0x7e220202,
978 0x7e240201, 0x7e260200,
979 0x7e280209, 0x7e2a0208,
980 0x7e2c0207, 0x7e2e0206,
981 0x7e300205, 0x7e320204,
982 0x7e340203, 0x7e360202,
983 0x7e380201, 0x7e3a0200,
984 0x7e3c0209, 0x7e3e0208,
985 0x7e400207, 0x7e420206,
986 0x7e440205, 0x7e460204,
987 0x7e480203, 0x7e4a0202,
988 0x7e4c0201, 0x7e4e0200,
989 0x7e500209, 0x7e520208,
990 0x7e540207, 0x7e560206,
991 0x7e580205, 0x7e5a0204,
992 0x7e5c0203, 0x7e5e0202,
993 0x7e600201, 0x7e620200,
994 0x7e640209, 0x7e660208,
995 0x7e680207, 0x7e6a0206,
996 0x7e6c0205, 0x7e6e0204,
997 0x7e700203, 0x7e720202,
998 0x7e740201, 0x7e760200,
999 0x7e780209, 0x7e7a0208,
1000 0x7e7c0207, 0x7e7e0206,
1001 0xbf8a0000, 0xbf810000,
1002};
1003
1004static const u32 sgpr_init_compute_shader[] =
1005{
1006 0xbe8a0100, 0xbe8c0102,
1007 0xbe8e0104, 0xbe900106,
1008 0xbe920108, 0xbe940100,
1009 0xbe960102, 0xbe980104,
1010 0xbe9a0106, 0xbe9c0108,
1011 0xbe9e0100, 0xbea00102,
1012 0xbea20104, 0xbea40106,
1013 0xbea60108, 0xbea80100,
1014 0xbeaa0102, 0xbeac0104,
1015 0xbeae0106, 0xbeb00108,
1016 0xbeb20100, 0xbeb40102,
1017 0xbeb60104, 0xbeb80106,
1018 0xbeba0108, 0xbebc0100,
1019 0xbebe0102, 0xbec00104,
1020 0xbec20106, 0xbec40108,
1021 0xbec60100, 0xbec80102,
1022 0xbee60004, 0xbee70005,
1023 0xbeea0006, 0xbeeb0007,
1024 0xbee80008, 0xbee90009,
1025 0xbefc0000, 0xbf8a0000,
1026 0xbf810000, 0x00000000,
1027};
1028
1029static const u32 vgpr_init_regs[] =
1030{
1031 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1032 mmCOMPUTE_RESOURCE_LIMITS, 0,
1033 mmCOMPUTE_NUM_THREAD_X, 256*4,
1034 mmCOMPUTE_NUM_THREAD_Y, 1,
1035 mmCOMPUTE_NUM_THREAD_Z, 1,
1036 mmCOMPUTE_PGM_RSRC2, 20,
1037 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1038 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1039 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1040 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1041 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1042 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1043 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1044 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1045 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1046 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1047};
1048
1049static const u32 sgpr1_init_regs[] =
1050{
1051 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1052 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1053 mmCOMPUTE_NUM_THREAD_X, 256*5,
1054 mmCOMPUTE_NUM_THREAD_Y, 1,
1055 mmCOMPUTE_NUM_THREAD_Z, 1,
1056 mmCOMPUTE_PGM_RSRC2, 20,
1057 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1058 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1059 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1060 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1061 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1062 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1063 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1064 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1065 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1066 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1067};
1068
1069static const u32 sgpr2_init_regs[] =
1070{
1071 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1072 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1073 mmCOMPUTE_NUM_THREAD_X, 256*5,
1074 mmCOMPUTE_NUM_THREAD_Y, 1,
1075 mmCOMPUTE_NUM_THREAD_Z, 1,
1076 mmCOMPUTE_PGM_RSRC2, 20,
1077 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1078 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1079 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1080 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1081 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1082 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1083 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1084 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1085 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1086 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1087};
1088
1089static const u32 sec_ded_counter_registers[] =
1090{
1091 mmCPC_EDC_ATC_CNT,
1092 mmCPC_EDC_SCRATCH_CNT,
1093 mmCPC_EDC_UCODE_CNT,
1094 mmCPF_EDC_ATC_CNT,
1095 mmCPF_EDC_ROQ_CNT,
1096 mmCPF_EDC_TAG_CNT,
1097 mmCPG_EDC_ATC_CNT,
1098 mmCPG_EDC_DMA_CNT,
1099 mmCPG_EDC_TAG_CNT,
1100 mmDC_EDC_CSINVOC_CNT,
1101 mmDC_EDC_RESTORE_CNT,
1102 mmDC_EDC_STATE_CNT,
1103 mmGDS_EDC_CNT,
1104 mmGDS_EDC_GRBM_CNT,
1105 mmGDS_EDC_OA_DED,
1106 mmSPI_EDC_CNT,
1107 mmSQC_ATC_EDC_GATCL1_CNT,
1108 mmSQC_EDC_CNT,
1109 mmSQ_EDC_DED_CNT,
1110 mmSQ_EDC_INFO,
1111 mmSQ_EDC_SEC_CNT,
1112 mmTCC_EDC_CNT,
1113 mmTCP_ATC_EDC_GATCL1_CNT,
1114 mmTCP_EDC_CNT,
1115 mmTD_EDC_CNT
1116};
1117
1118static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1119{
1120 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1121 struct amdgpu_ib ib;
1122 struct fence *f = NULL;
1123 int r, i;
1124 u32 tmp;
1125 unsigned total_size, vgpr_offset, sgpr_offset;
1126 u64 gpu_addr;
1127
1128 /* only supported on CZ */
1129 if (adev->asic_type != CHIP_CARRIZO)
1130 return 0;
1131
1132 /* bail if the compute ring is not ready */
1133 if (!ring->ready)
1134 return 0;
1135
1136 tmp = RREG32(mmGB_EDC_MODE);
1137 WREG32(mmGB_EDC_MODE, 0);
1138
1139 total_size =
1140 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1141 total_size +=
1142 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1143 total_size +=
1144 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1145 total_size = ALIGN(total_size, 256);
1146 vgpr_offset = total_size;
1147 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1148 sgpr_offset = total_size;
1149 total_size += sizeof(sgpr_init_compute_shader);
1150
1151 /* allocate an indirect buffer to put the commands in */
1152 memset(&ib, 0, sizeof(ib));
1153 r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1154 if (r) {
1155 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1156 return r;
1157 }
1158
1159 /* load the compute shaders */
1160 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1161 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1162
1163 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1164 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1165
1166 /* init the ib length to 0 */
1167 ib.length_dw = 0;
1168
1169 /* VGPR */
1170 /* write the register state for the compute dispatch */
1171 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1172 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1173 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1174 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1175 }
1176 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1177 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1178 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1179 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1180 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1181 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1182
1183 /* write dispatch packet */
1184 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1185 ib.ptr[ib.length_dw++] = 8; /* x */
1186 ib.ptr[ib.length_dw++] = 1; /* y */
1187 ib.ptr[ib.length_dw++] = 1; /* z */
1188 ib.ptr[ib.length_dw++] =
1189 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1190
1191 /* write CS partial flush packet */
1192 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1193 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1194
1195 /* SGPR1 */
1196 /* write the register state for the compute dispatch */
1197 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1198 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1199 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1200 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1201 }
1202 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1203 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1204 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1205 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1206 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1207 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1208
1209 /* write dispatch packet */
1210 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1211 ib.ptr[ib.length_dw++] = 8; /* x */
1212 ib.ptr[ib.length_dw++] = 1; /* y */
1213 ib.ptr[ib.length_dw++] = 1; /* z */
1214 ib.ptr[ib.length_dw++] =
1215 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1216
1217 /* write CS partial flush packet */
1218 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1219 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1220
1221 /* SGPR2 */
1222 /* write the register state for the compute dispatch */
1223 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1224 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1225 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1226 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1227 }
1228 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1229 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1230 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1231 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1232 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1233 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1234
1235 /* write dispatch packet */
1236 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1237 ib.ptr[ib.length_dw++] = 8; /* x */
1238 ib.ptr[ib.length_dw++] = 1; /* y */
1239 ib.ptr[ib.length_dw++] = 1; /* z */
1240 ib.ptr[ib.length_dw++] =
1241 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1242
1243 /* write CS partial flush packet */
1244 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1245 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1246
1247 /* shedule the ib on the ring */
1248 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1249 AMDGPU_FENCE_OWNER_UNDEFINED,
1250 &f);
1251 if (r) {
1252 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1253 goto fail;
1254 }
1255
1256 /* wait for the GPU to finish processing the IB */
1257 r = fence_wait(f, false);
1258 if (r) {
1259 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1260 goto fail;
1261 }
1262
1263 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1264 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1265 WREG32(mmGB_EDC_MODE, tmp);
1266
1267 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1268 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1269 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1270
1271
1272 /* read back registers to clear the counters */
1273 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1274 RREG32(sec_ded_counter_registers[i]);
1275
1276fail:
1277 fence_put(f);
1278 amdgpu_ib_free(adev, &ib);
1279
1280 return r;
1281}
1282
0bde3a95
AD
1283static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1284{
1285 u32 gb_addr_config;
1286 u32 mc_shared_chmap, mc_arb_ramcfg;
1287 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1288 u32 tmp;
1289
1290 switch (adev->asic_type) {
1291 case CHIP_TOPAZ:
1292 adev->gfx.config.max_shader_engines = 1;
1293 adev->gfx.config.max_tile_pipes = 2;
1294 adev->gfx.config.max_cu_per_sh = 6;
1295 adev->gfx.config.max_sh_per_se = 1;
1296 adev->gfx.config.max_backends_per_se = 2;
1297 adev->gfx.config.max_texture_channel_caches = 2;
1298 adev->gfx.config.max_gprs = 256;
1299 adev->gfx.config.max_gs_threads = 32;
1300 adev->gfx.config.max_hw_contexts = 8;
1301
1302 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1303 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1304 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1305 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1306 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1307 break;
1308 case CHIP_FIJI:
1309 adev->gfx.config.max_shader_engines = 4;
1310 adev->gfx.config.max_tile_pipes = 16;
1311 adev->gfx.config.max_cu_per_sh = 16;
1312 adev->gfx.config.max_sh_per_se = 1;
1313 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1314 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1315 adev->gfx.config.max_gprs = 256;
1316 adev->gfx.config.max_gs_threads = 32;
1317 adev->gfx.config.max_hw_contexts = 8;
1318
1319 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1320 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1321 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1322 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1323 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1324 break;
1325 case CHIP_TONGA:
1326 adev->gfx.config.max_shader_engines = 4;
1327 adev->gfx.config.max_tile_pipes = 8;
1328 adev->gfx.config.max_cu_per_sh = 8;
1329 adev->gfx.config.max_sh_per_se = 1;
1330 adev->gfx.config.max_backends_per_se = 2;
1331 adev->gfx.config.max_texture_channel_caches = 8;
1332 adev->gfx.config.max_gprs = 256;
1333 adev->gfx.config.max_gs_threads = 32;
1334 adev->gfx.config.max_hw_contexts = 8;
1335
1336 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1337 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1338 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1339 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1340 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1341 break;
1342 case CHIP_CARRIZO:
1343 adev->gfx.config.max_shader_engines = 1;
1344 adev->gfx.config.max_tile_pipes = 2;
1345 adev->gfx.config.max_sh_per_se = 1;
1346 adev->gfx.config.max_backends_per_se = 2;
1347
1348 switch (adev->pdev->revision) {
1349 case 0xc4:
1350 case 0x84:
1351 case 0xc8:
1352 case 0xcc:
b8b339ea
AD
1353 case 0xe1:
1354 case 0xe3:
0bde3a95
AD
1355 /* B10 */
1356 adev->gfx.config.max_cu_per_sh = 8;
1357 break;
1358 case 0xc5:
1359 case 0x81:
1360 case 0x85:
1361 case 0xc9:
1362 case 0xcd:
b8b339ea
AD
1363 case 0xe2:
1364 case 0xe4:
0bde3a95
AD
1365 /* B8 */
1366 adev->gfx.config.max_cu_per_sh = 6;
1367 break;
1368 case 0xc6:
1369 case 0xca:
1370 case 0xce:
b8b339ea 1371 case 0x88:
0bde3a95
AD
1372 /* B6 */
1373 adev->gfx.config.max_cu_per_sh = 6;
1374 break;
1375 case 0xc7:
1376 case 0x87:
1377 case 0xcb:
b8b339ea
AD
1378 case 0xe5:
1379 case 0x89:
0bde3a95
AD
1380 default:
1381 /* B4 */
1382 adev->gfx.config.max_cu_per_sh = 4;
1383 break;
1384 }
1385
1386 adev->gfx.config.max_texture_channel_caches = 2;
1387 adev->gfx.config.max_gprs = 256;
1388 adev->gfx.config.max_gs_threads = 32;
1389 adev->gfx.config.max_hw_contexts = 8;
1390
e3c7656c
SL
1391 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1392 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1393 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1394 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1395 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1396 break;
1397 case CHIP_STONEY:
1398 adev->gfx.config.max_shader_engines = 1;
1399 adev->gfx.config.max_tile_pipes = 2;
1400 adev->gfx.config.max_sh_per_se = 1;
1401 adev->gfx.config.max_backends_per_se = 1;
1402
1403 switch (adev->pdev->revision) {
1404 case 0xc0:
1405 case 0xc1:
1406 case 0xc2:
1407 case 0xc4:
1408 case 0xc8:
1409 case 0xc9:
1410 adev->gfx.config.max_cu_per_sh = 3;
1411 break;
1412 case 0xd0:
1413 case 0xd1:
1414 case 0xd2:
1415 default:
1416 adev->gfx.config.max_cu_per_sh = 2;
1417 break;
1418 }
1419
1420 adev->gfx.config.max_texture_channel_caches = 2;
1421 adev->gfx.config.max_gprs = 256;
1422 adev->gfx.config.max_gs_threads = 16;
1423 adev->gfx.config.max_hw_contexts = 8;
1424
0bde3a95
AD
1425 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1426 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1427 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1428 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1429 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1430 break;
1431 default:
1432 adev->gfx.config.max_shader_engines = 2;
1433 adev->gfx.config.max_tile_pipes = 4;
1434 adev->gfx.config.max_cu_per_sh = 2;
1435 adev->gfx.config.max_sh_per_se = 1;
1436 adev->gfx.config.max_backends_per_se = 2;
1437 adev->gfx.config.max_texture_channel_caches = 4;
1438 adev->gfx.config.max_gprs = 256;
1439 adev->gfx.config.max_gs_threads = 32;
1440 adev->gfx.config.max_hw_contexts = 8;
1441
1442 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1443 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1444 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1445 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1446 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1447 break;
1448 }
1449
1450 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1451 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1452 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1453
1454 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1455 adev->gfx.config.mem_max_burst_length_bytes = 256;
1456 if (adev->flags & AMD_IS_APU) {
1457 /* Get memory bank mapping mode. */
1458 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1459 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1460 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1461
1462 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1463 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1464 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1465
1466 /* Validate settings in case only one DIMM installed. */
1467 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1468 dimm00_addr_map = 0;
1469 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1470 dimm01_addr_map = 0;
1471 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1472 dimm10_addr_map = 0;
1473 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1474 dimm11_addr_map = 0;
1475
1476 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1477 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1478 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1479 adev->gfx.config.mem_row_size_in_kb = 2;
1480 else
1481 adev->gfx.config.mem_row_size_in_kb = 1;
1482 } else {
1483 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1484 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1485 if (adev->gfx.config.mem_row_size_in_kb > 4)
1486 adev->gfx.config.mem_row_size_in_kb = 4;
1487 }
1488
1489 adev->gfx.config.shader_engine_tile_size = 32;
1490 adev->gfx.config.num_gpus = 1;
1491 adev->gfx.config.multi_gpu_tile_size = 64;
1492
1493 /* fix up row size */
1494 switch (adev->gfx.config.mem_row_size_in_kb) {
1495 case 1:
1496 default:
1497 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1498 break;
1499 case 2:
1500 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1501 break;
1502 case 4:
1503 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1504 break;
1505 }
1506 adev->gfx.config.gb_addr_config = gb_addr_config;
1507}
1508
5fc3aeeb 1509static int gfx_v8_0_sw_init(void *handle)
aaa36a97
AD
1510{
1511 int i, r;
1512 struct amdgpu_ring *ring;
5fc3aeeb 1513 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
1514
1515 /* EOP Event */
1516 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1517 if (r)
1518 return r;
1519
1520 /* Privileged reg */
1521 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1522 if (r)
1523 return r;
1524
1525 /* Privileged inst */
1526 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1527 if (r)
1528 return r;
1529
1530 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1531
1532 gfx_v8_0_scratch_init(adev);
1533
1534 r = gfx_v8_0_init_microcode(adev);
1535 if (r) {
1536 DRM_ERROR("Failed to load gfx firmware!\n");
1537 return r;
1538 }
1539
1540 r = gfx_v8_0_mec_init(adev);
1541 if (r) {
1542 DRM_ERROR("Failed to init MEC BOs!\n");
1543 return r;
1544 }
1545
aaa36a97
AD
1546 /* set up the gfx ring */
1547 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1548 ring = &adev->gfx.gfx_ring[i];
1549 ring->ring_obj = NULL;
1550 sprintf(ring->name, "gfx");
1551 /* no gfx doorbells on iceland */
1552 if (adev->asic_type != CHIP_TOPAZ) {
1553 ring->use_doorbell = true;
1554 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1555 }
1556
1557 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1558 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1559 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1560 AMDGPU_RING_TYPE_GFX);
1561 if (r)
1562 return r;
1563 }
1564
1565 /* set up the compute queues */
1566 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1567 unsigned irq_type;
1568
1569 /* max 32 queues per MEC */
1570 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1571 DRM_ERROR("Too many (%d) compute rings!\n", i);
1572 break;
1573 }
1574 ring = &adev->gfx.compute_ring[i];
1575 ring->ring_obj = NULL;
1576 ring->use_doorbell = true;
1577 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1578 ring->me = 1; /* first MEC */
1579 ring->pipe = i / 8;
1580 ring->queue = i % 8;
1581 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1582 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1583 /* type-2 packets are deprecated on MEC, use type-3 instead */
1584 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1585 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1586 &adev->gfx.eop_irq, irq_type,
1587 AMDGPU_RING_TYPE_COMPUTE);
1588 if (r)
1589 return r;
1590 }
1591
1592 /* reserve GDS, GWS and OA resource for gfx */
1593 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1594 PAGE_SIZE, true,
72d7668b 1595 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
aaa36a97
AD
1596 NULL, &adev->gds.gds_gfx_bo);
1597 if (r)
1598 return r;
1599
1600 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1601 PAGE_SIZE, true,
72d7668b 1602 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
aaa36a97
AD
1603 NULL, &adev->gds.gws_gfx_bo);
1604 if (r)
1605 return r;
1606
1607 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1608 PAGE_SIZE, true,
72d7668b 1609 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
aaa36a97
AD
1610 NULL, &adev->gds.oa_gfx_bo);
1611 if (r)
1612 return r;
1613
a101a899
KW
1614 adev->gfx.ce_ram_size = 0x8000;
1615
0bde3a95
AD
1616 gfx_v8_0_gpu_early_init(adev);
1617
aaa36a97
AD
1618 return 0;
1619}
1620
5fc3aeeb 1621static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
1622{
1623 int i;
5fc3aeeb 1624 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
1625
1626 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1627 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1628 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1629
1630 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1631 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1632 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1633 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1634
aaa36a97
AD
1635 gfx_v8_0_mec_fini(adev);
1636
1637 return 0;
1638}
1639
1640static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1641{
90bea0ab 1642 uint32_t *modearray, *mod2array;
eb64526f
TSD
1643 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1644 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 1645 u32 reg_offset;
aaa36a97 1646
90bea0ab
TSD
1647 modearray = adev->gfx.config.tile_mode_array;
1648 mod2array = adev->gfx.config.macrotile_mode_array;
1649
1650 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1651 modearray[reg_offset] = 0;
1652
1653 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1654 mod2array[reg_offset] = 0;
aaa36a97
AD
1655
1656 switch (adev->asic_type) {
1657 case CHIP_TOPAZ:
90bea0ab
TSD
1658 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1659 PIPE_CONFIG(ADDR_SURF_P2) |
1660 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1661 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1662 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1663 PIPE_CONFIG(ADDR_SURF_P2) |
1664 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1665 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1666 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1667 PIPE_CONFIG(ADDR_SURF_P2) |
1668 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1669 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1670 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1671 PIPE_CONFIG(ADDR_SURF_P2) |
1672 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1673 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1674 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1675 PIPE_CONFIG(ADDR_SURF_P2) |
1676 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1677 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1678 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1679 PIPE_CONFIG(ADDR_SURF_P2) |
1680 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1681 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1682 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1683 PIPE_CONFIG(ADDR_SURF_P2) |
1684 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1685 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1686 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1687 PIPE_CONFIG(ADDR_SURF_P2));
1688 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1689 PIPE_CONFIG(ADDR_SURF_P2) |
1690 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1692 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1693 PIPE_CONFIG(ADDR_SURF_P2) |
1694 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1696 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1697 PIPE_CONFIG(ADDR_SURF_P2) |
1698 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1700 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1701 PIPE_CONFIG(ADDR_SURF_P2) |
1702 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1704 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1705 PIPE_CONFIG(ADDR_SURF_P2) |
1706 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1707 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1708 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1709 PIPE_CONFIG(ADDR_SURF_P2) |
1710 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1711 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1712 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1713 PIPE_CONFIG(ADDR_SURF_P2) |
1714 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1715 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1716 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1717 PIPE_CONFIG(ADDR_SURF_P2) |
1718 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1719 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1720 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1721 PIPE_CONFIG(ADDR_SURF_P2) |
1722 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1723 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1724 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1725 PIPE_CONFIG(ADDR_SURF_P2) |
1726 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1727 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1728 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1729 PIPE_CONFIG(ADDR_SURF_P2) |
1730 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1731 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1732 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1733 PIPE_CONFIG(ADDR_SURF_P2) |
1734 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1736 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1737 PIPE_CONFIG(ADDR_SURF_P2) |
1738 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1739 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1740 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1741 PIPE_CONFIG(ADDR_SURF_P2) |
1742 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1744 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1745 PIPE_CONFIG(ADDR_SURF_P2) |
1746 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1748 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1749 PIPE_CONFIG(ADDR_SURF_P2) |
1750 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1751 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1752 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1753 PIPE_CONFIG(ADDR_SURF_P2) |
1754 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1755 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1756 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1757 PIPE_CONFIG(ADDR_SURF_P2) |
1758 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1760
1761 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1762 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1763 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1764 NUM_BANKS(ADDR_SURF_8_BANK));
1765 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1768 NUM_BANKS(ADDR_SURF_8_BANK));
1769 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1770 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1771 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1772 NUM_BANKS(ADDR_SURF_8_BANK));
1773 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1774 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1775 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1776 NUM_BANKS(ADDR_SURF_8_BANK));
1777 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1778 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1779 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1780 NUM_BANKS(ADDR_SURF_8_BANK));
1781 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1782 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1783 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1784 NUM_BANKS(ADDR_SURF_8_BANK));
1785 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1786 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1787 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1788 NUM_BANKS(ADDR_SURF_8_BANK));
1789 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1790 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1791 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1792 NUM_BANKS(ADDR_SURF_16_BANK));
1793 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1794 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1795 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1796 NUM_BANKS(ADDR_SURF_16_BANK));
1797 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1798 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1799 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1800 NUM_BANKS(ADDR_SURF_16_BANK));
1801 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1804 NUM_BANKS(ADDR_SURF_16_BANK));
1805 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1806 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1807 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1808 NUM_BANKS(ADDR_SURF_16_BANK));
1809 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1810 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1811 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1812 NUM_BANKS(ADDR_SURF_16_BANK));
1813 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1814 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1815 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1816 NUM_BANKS(ADDR_SURF_8_BANK));
1817
1818 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1819 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1820 reg_offset != 23)
1821 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1822
1823 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1824 if (reg_offset != 7)
1825 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1826
8cdacf44 1827 break;
af15a2d5 1828 case CHIP_FIJI:
90bea0ab
TSD
1829 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1830 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1831 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1832 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1833 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1834 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1835 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1836 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1837 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1838 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1839 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1840 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1841 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1842 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1843 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1844 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1845 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1846 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1847 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1848 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1849 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1850 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1851 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1852 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1853 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1854 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1855 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1856 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1857 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1858 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1859 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1860 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1861 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1862 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1863 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1864 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1865 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1867 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1868 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1869 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1871 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1872 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1873 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1875 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1876 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1877 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1879 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1880 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1881 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1883 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1884 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1885 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1887 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1888 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1889 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1891 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1892 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1893 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1895 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1896 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1897 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1899 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1900 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1901 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1903 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1904 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1905 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1907 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1908 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1909 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1911 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1912 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1913 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1915 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1916 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1917 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1918 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1919 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1920 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1921 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1923 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1924 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1925 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1927 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1928 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1929 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1931 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1932 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1933 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1934 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1935 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1936 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1937 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1939 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1940 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1941 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1943 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1944 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1945 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1946 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1947 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1948 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1949 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1950 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1951
1952 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1955 NUM_BANKS(ADDR_SURF_8_BANK));
1956 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1959 NUM_BANKS(ADDR_SURF_8_BANK));
1960 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1963 NUM_BANKS(ADDR_SURF_8_BANK));
1964 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1965 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1966 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1967 NUM_BANKS(ADDR_SURF_8_BANK));
1968 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1971 NUM_BANKS(ADDR_SURF_8_BANK));
1972 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1975 NUM_BANKS(ADDR_SURF_8_BANK));
1976 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1977 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1978 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1979 NUM_BANKS(ADDR_SURF_8_BANK));
1980 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1981 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1982 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1983 NUM_BANKS(ADDR_SURF_8_BANK));
1984 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1987 NUM_BANKS(ADDR_SURF_8_BANK));
1988 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1989 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1990 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1991 NUM_BANKS(ADDR_SURF_8_BANK));
1992 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1995 NUM_BANKS(ADDR_SURF_8_BANK));
1996 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1999 NUM_BANKS(ADDR_SURF_8_BANK));
2000 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2001 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2002 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2003 NUM_BANKS(ADDR_SURF_8_BANK));
2004 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2005 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2006 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2007 NUM_BANKS(ADDR_SURF_4_BANK));
2008
2009 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2010 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2011
2012 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2013 if (reg_offset != 7)
2014 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2015
5f2e816b 2016 break;
aaa36a97 2017 case CHIP_TONGA:
90bea0ab
TSD
2018 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2020 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2021 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2022 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2023 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2024 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2025 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2026 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2027 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2028 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2029 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2030 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2031 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2032 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2033 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2034 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2035 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2036 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2037 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2038 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2039 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2040 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2041 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2042 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2044 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2045 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2046 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2047 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2049 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2050 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2051 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2052 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2053 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2056 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2057 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2058 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2060 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2064 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2065 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2068 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2069 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2070 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2072 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2073 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2074 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2076 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2077 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2078 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2081 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2082 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2084 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2085 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2086 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2087 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2088 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2089 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2090 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2092 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2093 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2094 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2096 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2097 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2098 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2099 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2100 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2101 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2102 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2103 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2104 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2105 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2106 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2107 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2108 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2109 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2110 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2111 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2112 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2113 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2114 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2115 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2116 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2117 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2118 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2119 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2120 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2121 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2123 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2124 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2125 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2126 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2128 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2129 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2130 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2131 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2132 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2133 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2134 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2136 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2137 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2140
2141 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2142 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2143 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2144 NUM_BANKS(ADDR_SURF_16_BANK));
2145 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2146 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2147 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2148 NUM_BANKS(ADDR_SURF_16_BANK));
2149 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2151 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2152 NUM_BANKS(ADDR_SURF_16_BANK));
2153 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2154 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2155 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2156 NUM_BANKS(ADDR_SURF_16_BANK));
2157 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2158 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2159 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2160 NUM_BANKS(ADDR_SURF_16_BANK));
2161 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164 NUM_BANKS(ADDR_SURF_16_BANK));
2165 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2166 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2167 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2168 NUM_BANKS(ADDR_SURF_16_BANK));
2169 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2170 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2171 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2172 NUM_BANKS(ADDR_SURF_16_BANK));
2173 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2176 NUM_BANKS(ADDR_SURF_16_BANK));
2177 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2178 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2179 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2180 NUM_BANKS(ADDR_SURF_16_BANK));
2181 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2182 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2183 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2184 NUM_BANKS(ADDR_SURF_16_BANK));
2185 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188 NUM_BANKS(ADDR_SURF_8_BANK));
2189 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2192 NUM_BANKS(ADDR_SURF_4_BANK));
2193 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2194 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2195 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2196 NUM_BANKS(ADDR_SURF_4_BANK));
2197
2198 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2199 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2200
2201 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2202 if (reg_offset != 7)
2203 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2204
aaa36a97 2205 break;
e3c7656c 2206 case CHIP_STONEY:
90bea0ab
TSD
2207 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2208 PIPE_CONFIG(ADDR_SURF_P2) |
2209 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2210 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2211 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2212 PIPE_CONFIG(ADDR_SURF_P2) |
2213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2214 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2215 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2216 PIPE_CONFIG(ADDR_SURF_P2) |
2217 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2218 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2219 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2220 PIPE_CONFIG(ADDR_SURF_P2) |
2221 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2222 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2223 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2224 PIPE_CONFIG(ADDR_SURF_P2) |
2225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2226 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2227 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2228 PIPE_CONFIG(ADDR_SURF_P2) |
2229 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2230 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2231 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2232 PIPE_CONFIG(ADDR_SURF_P2) |
2233 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2234 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2235 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2236 PIPE_CONFIG(ADDR_SURF_P2));
2237 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2238 PIPE_CONFIG(ADDR_SURF_P2) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2241 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242 PIPE_CONFIG(ADDR_SURF_P2) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2245 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2246 PIPE_CONFIG(ADDR_SURF_P2) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2249 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2250 PIPE_CONFIG(ADDR_SURF_P2) |
2251 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2253 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2254 PIPE_CONFIG(ADDR_SURF_P2) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2256 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2257 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2258 PIPE_CONFIG(ADDR_SURF_P2) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2261 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2262 PIPE_CONFIG(ADDR_SURF_P2) |
2263 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2265 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2266 PIPE_CONFIG(ADDR_SURF_P2) |
2267 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2269 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2270 PIPE_CONFIG(ADDR_SURF_P2) |
2271 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2272 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2273 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2274 PIPE_CONFIG(ADDR_SURF_P2) |
2275 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2277 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2278 PIPE_CONFIG(ADDR_SURF_P2) |
2279 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2280 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2281 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2282 PIPE_CONFIG(ADDR_SURF_P2) |
2283 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2284 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2285 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2286 PIPE_CONFIG(ADDR_SURF_P2) |
2287 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2289 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2290 PIPE_CONFIG(ADDR_SURF_P2) |
2291 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2293 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2294 PIPE_CONFIG(ADDR_SURF_P2) |
2295 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2297 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2298 PIPE_CONFIG(ADDR_SURF_P2) |
2299 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2301 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2302 PIPE_CONFIG(ADDR_SURF_P2) |
2303 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2305 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2306 PIPE_CONFIG(ADDR_SURF_P2) |
2307 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2309
2310 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2311 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2312 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2313 NUM_BANKS(ADDR_SURF_8_BANK));
2314 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2315 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2316 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2317 NUM_BANKS(ADDR_SURF_8_BANK));
2318 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2319 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2320 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2321 NUM_BANKS(ADDR_SURF_8_BANK));
2322 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2323 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2324 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2325 NUM_BANKS(ADDR_SURF_8_BANK));
2326 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2327 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2328 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2329 NUM_BANKS(ADDR_SURF_8_BANK));
2330 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2331 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2332 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2333 NUM_BANKS(ADDR_SURF_8_BANK));
2334 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2335 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2336 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2337 NUM_BANKS(ADDR_SURF_8_BANK));
2338 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2339 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2340 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2341 NUM_BANKS(ADDR_SURF_16_BANK));
2342 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2343 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2344 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2345 NUM_BANKS(ADDR_SURF_16_BANK));
2346 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2349 NUM_BANKS(ADDR_SURF_16_BANK));
2350 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2351 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2352 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2353 NUM_BANKS(ADDR_SURF_16_BANK));
2354 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2357 NUM_BANKS(ADDR_SURF_16_BANK));
2358 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2359 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2360 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2361 NUM_BANKS(ADDR_SURF_16_BANK));
2362 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2363 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2364 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2365 NUM_BANKS(ADDR_SURF_8_BANK));
2366
2367 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2368 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2369 reg_offset != 23)
2370 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2371
2372 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2373 if (reg_offset != 7)
2374 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2375
e3c7656c 2376 break;
aaa36a97 2377 default:
90bea0ab
TSD
2378 dev_warn(adev->dev,
2379 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2380 adev->asic_type);
2381
2382 case CHIP_CARRIZO:
2383 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2384 PIPE_CONFIG(ADDR_SURF_P2) |
2385 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2387 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2388 PIPE_CONFIG(ADDR_SURF_P2) |
2389 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2391 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392 PIPE_CONFIG(ADDR_SURF_P2) |
2393 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2395 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2396 PIPE_CONFIG(ADDR_SURF_P2) |
2397 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2399 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 PIPE_CONFIG(ADDR_SURF_P2) |
2401 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2403 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2404 PIPE_CONFIG(ADDR_SURF_P2) |
2405 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2408 PIPE_CONFIG(ADDR_SURF_P2) |
2409 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2412 PIPE_CONFIG(ADDR_SURF_P2));
2413 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2414 PIPE_CONFIG(ADDR_SURF_P2) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2418 PIPE_CONFIG(ADDR_SURF_P2) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2421 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2422 PIPE_CONFIG(ADDR_SURF_P2) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2425 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426 PIPE_CONFIG(ADDR_SURF_P2) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2430 PIPE_CONFIG(ADDR_SURF_P2) |
2431 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2434 PIPE_CONFIG(ADDR_SURF_P2) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438 PIPE_CONFIG(ADDR_SURF_P2) |
2439 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2442 PIPE_CONFIG(ADDR_SURF_P2) |
2443 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2445 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2446 PIPE_CONFIG(ADDR_SURF_P2) |
2447 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2449 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2450 PIPE_CONFIG(ADDR_SURF_P2) |
2451 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2452 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2453 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2454 PIPE_CONFIG(ADDR_SURF_P2) |
2455 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2457 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2458 PIPE_CONFIG(ADDR_SURF_P2) |
2459 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2461 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2462 PIPE_CONFIG(ADDR_SURF_P2) |
2463 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2465 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2466 PIPE_CONFIG(ADDR_SURF_P2) |
2467 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2469 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2470 PIPE_CONFIG(ADDR_SURF_P2) |
2471 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2473 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2474 PIPE_CONFIG(ADDR_SURF_P2) |
2475 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2477 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2478 PIPE_CONFIG(ADDR_SURF_P2) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2481 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2482 PIPE_CONFIG(ADDR_SURF_P2) |
2483 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2485
2486 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2489 NUM_BANKS(ADDR_SURF_8_BANK));
2490 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493 NUM_BANKS(ADDR_SURF_8_BANK));
2494 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497 NUM_BANKS(ADDR_SURF_8_BANK));
2498 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2501 NUM_BANKS(ADDR_SURF_8_BANK));
2502 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2505 NUM_BANKS(ADDR_SURF_8_BANK));
2506 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2509 NUM_BANKS(ADDR_SURF_8_BANK));
2510 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2513 NUM_BANKS(ADDR_SURF_8_BANK));
2514 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517 NUM_BANKS(ADDR_SURF_16_BANK));
2518 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2521 NUM_BANKS(ADDR_SURF_16_BANK));
2522 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2523 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2524 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2525 NUM_BANKS(ADDR_SURF_16_BANK));
2526 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2529 NUM_BANKS(ADDR_SURF_16_BANK));
2530 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2533 NUM_BANKS(ADDR_SURF_16_BANK));
2534 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2536 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2537 NUM_BANKS(ADDR_SURF_16_BANK));
2538 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541 NUM_BANKS(ADDR_SURF_8_BANK));
2542
2543 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2544 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2545 reg_offset != 23)
2546 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2547
2548 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2549 if (reg_offset != 7)
2550 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2551
2552 break;
aaa36a97
AD
2553 }
2554}
2555
2556static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2557{
544b8a74 2558 return (u32)((1ULL << bit_width) - 1);
aaa36a97
AD
2559}
2560
2561void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2562{
2563 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2564
2565 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2566 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2567 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2568 } else if (se_num == 0xffffffff) {
2569 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2570 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2571 } else if (sh_num == 0xffffffff) {
2572 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2573 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2574 } else {
2575 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2576 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2577 }
2578 WREG32(mmGRBM_GFX_INDEX, data);
2579}
2580
2581static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2582 u32 max_rb_num_per_se,
2583 u32 sh_per_se)
2584{
2585 u32 data, mask;
2586
2587 data = RREG32(mmCC_RB_BACKEND_DISABLE);
4f2d3ad6 2588 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
aaa36a97
AD
2589
2590 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2591
2592 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2593
2594 mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2595
2596 return data & mask;
2597}
2598
2599static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2600 u32 se_num, u32 sh_per_se,
2601 u32 max_rb_num_per_se)
2602{
2603 int i, j;
2604 u32 data, mask;
2605 u32 disabled_rbs = 0;
2606 u32 enabled_rbs = 0;
2607
2608 mutex_lock(&adev->grbm_idx_mutex);
2609 for (i = 0; i < se_num; i++) {
2610 for (j = 0; j < sh_per_se; j++) {
2611 gfx_v8_0_select_se_sh(adev, i, j);
2612 data = gfx_v8_0_get_rb_disabled(adev,
2613 max_rb_num_per_se, sh_per_se);
2614 disabled_rbs |= data << ((i * sh_per_se + j) *
2615 RB_BITMAP_WIDTH_PER_SH);
2616 }
2617 }
2618 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2619 mutex_unlock(&adev->grbm_idx_mutex);
2620
2621 mask = 1;
2622 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2623 if (!(disabled_rbs & mask))
2624 enabled_rbs |= mask;
2625 mask <<= 1;
2626 }
2627
2628 adev->gfx.config.backend_enable_mask = enabled_rbs;
2629
2630 mutex_lock(&adev->grbm_idx_mutex);
2631 for (i = 0; i < se_num; i++) {
2632 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
2633 data = 0;
2634 for (j = 0; j < sh_per_se; j++) {
2635 switch (enabled_rbs & 3) {
2636 case 0:
2637 if (j == 0)
2638 data |= (RASTER_CONFIG_RB_MAP_3 <<
2639 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2640 else
2641 data |= (RASTER_CONFIG_RB_MAP_0 <<
2642 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2643 break;
2644 case 1:
2645 data |= (RASTER_CONFIG_RB_MAP_0 <<
2646 (i * sh_per_se + j) * 2);
2647 break;
2648 case 2:
2649 data |= (RASTER_CONFIG_RB_MAP_3 <<
2650 (i * sh_per_se + j) * 2);
2651 break;
2652 case 3:
2653 default:
2654 data |= (RASTER_CONFIG_RB_MAP_2 <<
2655 (i * sh_per_se + j) * 2);
2656 break;
2657 }
2658 enabled_rbs >>= 2;
2659 }
2660 WREG32(mmPA_SC_RASTER_CONFIG, data);
2661 }
2662 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2663 mutex_unlock(&adev->grbm_idx_mutex);
2664}
2665
cd06bf68 2666/**
35c7a952 2667 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68
BG
2668 *
2669 * @rdev: amdgpu_device pointer
2670 *
2671 * Initialize compute vmid sh_mem registers
2672 *
2673 */
2674#define DEFAULT_SH_MEM_BASES (0x6000)
2675#define FIRST_COMPUTE_VMID (8)
2676#define LAST_COMPUTE_VMID (16)
35c7a952 2677static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
2678{
2679 int i;
2680 uint32_t sh_mem_config;
2681 uint32_t sh_mem_bases;
2682
2683 /*
2684 * Configure apertures:
2685 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2686 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2687 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2688 */
2689 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2690
2691 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2692 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2693 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2694 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2695 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2696 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2697
2698 mutex_lock(&adev->srbm_mutex);
2699 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2700 vi_srbm_select(adev, 0, 0, 0, i);
2701 /* CP and shaders */
2702 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2703 WREG32(mmSH_MEM_APE1_BASE, 1);
2704 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2705 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2706 }
2707 vi_srbm_select(adev, 0, 0, 0, 0);
2708 mutex_unlock(&adev->srbm_mutex);
2709}
2710
aaa36a97
AD
2711static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2712{
aaa36a97
AD
2713 u32 tmp;
2714 int i;
2715
aaa36a97
AD
2716 tmp = RREG32(mmGRBM_CNTL);
2717 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2718 WREG32(mmGRBM_CNTL, tmp);
2719
0bde3a95
AD
2720 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2721 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2722 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97 2723 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
0bde3a95 2724 adev->gfx.config.gb_addr_config & 0x70);
aaa36a97 2725 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
0bde3a95
AD
2726 adev->gfx.config.gb_addr_config & 0x70);
2727 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2728 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2729 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
aaa36a97
AD
2730
2731 gfx_v8_0_tiling_mode_table_init(adev);
2732
2733 gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2734 adev->gfx.config.max_sh_per_se,
2735 adev->gfx.config.max_backends_per_se);
2736
2737 /* XXX SH_MEM regs */
2738 /* where to put LDS, scratch, GPUVM in FSA64 space */
2739 mutex_lock(&adev->srbm_mutex);
2740 for (i = 0; i < 16; i++) {
2741 vi_srbm_select(adev, 0, 0, 0, i);
2742 /* CP and shaders */
2743 if (i == 0) {
2744 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2745 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 2746 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 2747 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97
AD
2748 WREG32(mmSH_MEM_CONFIG, tmp);
2749 } else {
2750 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2751 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
0bde3a95 2752 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 2753 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97
AD
2754 WREG32(mmSH_MEM_CONFIG, tmp);
2755 }
2756
2757 WREG32(mmSH_MEM_APE1_BASE, 1);
2758 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2759 WREG32(mmSH_MEM_BASES, 0);
2760 }
2761 vi_srbm_select(adev, 0, 0, 0, 0);
2762 mutex_unlock(&adev->srbm_mutex);
2763
35c7a952 2764 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 2765
aaa36a97
AD
2766 mutex_lock(&adev->grbm_idx_mutex);
2767 /*
2768 * making sure that the following register writes will be broadcasted
2769 * to all the shaders
2770 */
2771 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2772
2773 WREG32(mmPA_SC_FIFO_SIZE,
2774 (adev->gfx.config.sc_prim_fifo_size_frontend <<
2775 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2776 (adev->gfx.config.sc_prim_fifo_size_backend <<
2777 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2778 (adev->gfx.config.sc_hiz_tile_fifo_size <<
2779 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2780 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2781 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2782 mutex_unlock(&adev->grbm_idx_mutex);
2783
2784}
2785
2786static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2787{
2788 u32 i, j, k;
2789 u32 mask;
2790
2791 mutex_lock(&adev->grbm_idx_mutex);
2792 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2793 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2794 gfx_v8_0_select_se_sh(adev, i, j);
2795 for (k = 0; k < adev->usec_timeout; k++) {
2796 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2797 break;
2798 udelay(1);
2799 }
2800 }
2801 }
2802 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2803 mutex_unlock(&adev->grbm_idx_mutex);
2804
2805 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2806 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2807 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2808 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2809 for (k = 0; k < adev->usec_timeout; k++) {
2810 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2811 break;
2812 udelay(1);
2813 }
2814}
2815
2816static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2817 bool enable)
2818{
2819 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2820
0d07db7e
TSD
2821 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2822 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2823 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2824 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2825
aaa36a97
AD
2826 WREG32(mmCP_INT_CNTL_RING0, tmp);
2827}
2828
2829void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2830{
2831 u32 tmp = RREG32(mmRLC_CNTL);
2832
2833 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2834 WREG32(mmRLC_CNTL, tmp);
2835
2836 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2837
2838 gfx_v8_0_wait_for_rlc_serdes(adev);
2839}
2840
2841static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2842{
2843 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2844
2845 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2846 WREG32(mmGRBM_SOFT_RESET, tmp);
2847 udelay(50);
2848 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2849 WREG32(mmGRBM_SOFT_RESET, tmp);
2850 udelay(50);
2851}
2852
2853static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2854{
2855 u32 tmp = RREG32(mmRLC_CNTL);
2856
2857 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2858 WREG32(mmRLC_CNTL, tmp);
2859
2860 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 2861 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
2862 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2863
2864 udelay(50);
2865}
2866
2867static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2868{
2869 const struct rlc_firmware_header_v2_0 *hdr;
2870 const __le32 *fw_data;
2871 unsigned i, fw_size;
2872
2873 if (!adev->gfx.rlc_fw)
2874 return -EINVAL;
2875
2876 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2877 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
2878
2879 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2880 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2881 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2882
2883 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2884 for (i = 0; i < fw_size; i++)
2885 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2886 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2887
2888 return 0;
2889}
2890
2891static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2892{
2893 int r;
2894
2895 gfx_v8_0_rlc_stop(adev);
2896
2897 /* disable CG */
2898 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2899
2900 /* disable PG */
2901 WREG32(mmRLC_PG_CNTL, 0);
2902
2903 gfx_v8_0_rlc_reset(adev);
2904
2905 if (!adev->firmware.smu_load) {
2906 /* legacy rlc firmware loading */
2907 r = gfx_v8_0_rlc_load_microcode(adev);
2908 if (r)
2909 return r;
2910 } else {
2911 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2912 AMDGPU_UCODE_ID_RLC_G);
2913 if (r)
2914 return -EINVAL;
2915 }
2916
2917 gfx_v8_0_rlc_start(adev);
2918
2919 return 0;
2920}
2921
2922static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2923{
2924 int i;
2925 u32 tmp = RREG32(mmCP_ME_CNTL);
2926
2927 if (enable) {
2928 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2929 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2930 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2931 } else {
2932 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2933 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2934 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2935 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2936 adev->gfx.gfx_ring[i].ready = false;
2937 }
2938 WREG32(mmCP_ME_CNTL, tmp);
2939 udelay(50);
2940}
2941
2942static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2943{
2944 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2945 const struct gfx_firmware_header_v1_0 *ce_hdr;
2946 const struct gfx_firmware_header_v1_0 *me_hdr;
2947 const __le32 *fw_data;
2948 unsigned i, fw_size;
2949
2950 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2951 return -EINVAL;
2952
2953 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2954 adev->gfx.pfp_fw->data;
2955 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2956 adev->gfx.ce_fw->data;
2957 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2958 adev->gfx.me_fw->data;
2959
2960 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2961 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2962 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
2963
2964 gfx_v8_0_cp_gfx_enable(adev, false);
2965
2966 /* PFP */
2967 fw_data = (const __le32 *)
2968 (adev->gfx.pfp_fw->data +
2969 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2970 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2971 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2972 for (i = 0; i < fw_size; i++)
2973 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2974 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2975
2976 /* CE */
2977 fw_data = (const __le32 *)
2978 (adev->gfx.ce_fw->data +
2979 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2980 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2981 WREG32(mmCP_CE_UCODE_ADDR, 0);
2982 for (i = 0; i < fw_size; i++)
2983 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2984 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2985
2986 /* ME */
2987 fw_data = (const __le32 *)
2988 (adev->gfx.me_fw->data +
2989 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2990 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2991 WREG32(mmCP_ME_RAM_WADDR, 0);
2992 for (i = 0; i < fw_size; i++)
2993 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2994 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2995
2996 return 0;
2997}
2998
2999static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3000{
3001 u32 count = 0;
3002 const struct cs_section_def *sect = NULL;
3003 const struct cs_extent_def *ext = NULL;
3004
3005 /* begin clear state */
3006 count += 2;
3007 /* context control state */
3008 count += 3;
3009
3010 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3011 for (ext = sect->section; ext->extent != NULL; ++ext) {
3012 if (sect->id == SECT_CONTEXT)
3013 count += 2 + ext->reg_count;
3014 else
3015 return 0;
3016 }
3017 }
3018 /* pa_sc_raster_config/pa_sc_raster_config1 */
3019 count += 4;
3020 /* end clear state */
3021 count += 2;
3022 /* clear state */
3023 count += 2;
3024
3025 return count;
3026}
3027
3028static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3029{
3030 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3031 const struct cs_section_def *sect = NULL;
3032 const struct cs_extent_def *ext = NULL;
3033 int r, i;
3034
3035 /* init the CP */
3036 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3037 WREG32(mmCP_ENDIAN_SWAP, 0);
3038 WREG32(mmCP_DEVICE_ID, 1);
3039
3040 gfx_v8_0_cp_gfx_enable(adev, true);
3041
3042 r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
3043 if (r) {
3044 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3045 return r;
3046 }
3047
3048 /* clear state buffer */
3049 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3050 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3051
3052 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3053 amdgpu_ring_write(ring, 0x80000000);
3054 amdgpu_ring_write(ring, 0x80000000);
3055
3056 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3057 for (ext = sect->section; ext->extent != NULL; ++ext) {
3058 if (sect->id == SECT_CONTEXT) {
3059 amdgpu_ring_write(ring,
3060 PACKET3(PACKET3_SET_CONTEXT_REG,
3061 ext->reg_count));
3062 amdgpu_ring_write(ring,
3063 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3064 for (i = 0; i < ext->reg_count; i++)
3065 amdgpu_ring_write(ring, ext->extent[i]);
3066 }
3067 }
3068 }
3069
3070 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3071 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3072 switch (adev->asic_type) {
3073 case CHIP_TONGA:
3074 amdgpu_ring_write(ring, 0x16000012);
3075 amdgpu_ring_write(ring, 0x0000002A);
3076 break;
fa676048
FC
3077 case CHIP_FIJI:
3078 amdgpu_ring_write(ring, 0x3a00161a);
3079 amdgpu_ring_write(ring, 0x0000002e);
3080 break;
aaa36a97
AD
3081 case CHIP_TOPAZ:
3082 case CHIP_CARRIZO:
3083 amdgpu_ring_write(ring, 0x00000002);
3084 amdgpu_ring_write(ring, 0x00000000);
3085 break;
e3c7656c
SL
3086 case CHIP_STONEY:
3087 amdgpu_ring_write(ring, 0x00000000);
3088 amdgpu_ring_write(ring, 0x00000000);
3089 break;
aaa36a97
AD
3090 default:
3091 BUG();
3092 }
3093
3094 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3095 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3096
3097 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3098 amdgpu_ring_write(ring, 0);
3099
3100 /* init the CE partitions */
3101 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3102 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3103 amdgpu_ring_write(ring, 0x8000);
3104 amdgpu_ring_write(ring, 0x8000);
3105
3106 amdgpu_ring_unlock_commit(ring);
3107
3108 return 0;
3109}
3110
3111static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3112{
3113 struct amdgpu_ring *ring;
3114 u32 tmp;
3115 u32 rb_bufsz;
3116 u64 rb_addr, rptr_addr;
3117 int r;
3118
3119 /* Set the write pointer delay */
3120 WREG32(mmCP_RB_WPTR_DELAY, 0);
3121
3122 /* set the RB to use vmid 0 */
3123 WREG32(mmCP_RB_VMID, 0);
3124
3125 /* Set ring buffer size */
3126 ring = &adev->gfx.gfx_ring[0];
3127 rb_bufsz = order_base_2(ring->ring_size / 8);
3128 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3129 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3130 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3131 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3132#ifdef __BIG_ENDIAN
3133 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3134#endif
3135 WREG32(mmCP_RB0_CNTL, tmp);
3136
3137 /* Initialize the ring buffer's read and write pointers */
3138 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3139 ring->wptr = 0;
3140 WREG32(mmCP_RB0_WPTR, ring->wptr);
3141
3142 /* set the wb address wether it's enabled or not */
3143 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3144 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3145 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3146
3147 mdelay(1);
3148 WREG32(mmCP_RB0_CNTL, tmp);
3149
3150 rb_addr = ring->gpu_addr >> 8;
3151 WREG32(mmCP_RB0_BASE, rb_addr);
3152 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3153
3154 /* no gfx doorbells on iceland */
3155 if (adev->asic_type != CHIP_TOPAZ) {
3156 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3157 if (ring->use_doorbell) {
3158 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3159 DOORBELL_OFFSET, ring->doorbell_index);
3160 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3161 DOORBELL_EN, 1);
3162 } else {
3163 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3164 DOORBELL_EN, 0);
3165 }
3166 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3167
3168 if (adev->asic_type == CHIP_TONGA) {
3169 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3170 DOORBELL_RANGE_LOWER,
3171 AMDGPU_DOORBELL_GFX_RING0);
3172 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3173
3174 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3175 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3176 }
3177
3178 }
3179
3180 /* start the ring */
3181 gfx_v8_0_cp_gfx_start(adev);
3182 ring->ready = true;
3183 r = amdgpu_ring_test_ring(ring);
3184 if (r) {
3185 ring->ready = false;
3186 return r;
3187 }
3188
3189 return 0;
3190}
3191
3192static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3193{
3194 int i;
3195
3196 if (enable) {
3197 WREG32(mmCP_MEC_CNTL, 0);
3198 } else {
3199 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3200 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3201 adev->gfx.compute_ring[i].ready = false;
3202 }
3203 udelay(50);
3204}
3205
3206static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
3207{
3208 gfx_v8_0_cp_compute_enable(adev, true);
3209
3210 return 0;
3211}
3212
3213static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3214{
3215 const struct gfx_firmware_header_v1_0 *mec_hdr;
3216 const __le32 *fw_data;
3217 unsigned i, fw_size;
3218
3219 if (!adev->gfx.mec_fw)
3220 return -EINVAL;
3221
3222 gfx_v8_0_cp_compute_enable(adev, false);
3223
3224 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3225 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
3226
3227 fw_data = (const __le32 *)
3228 (adev->gfx.mec_fw->data +
3229 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3230 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3231
3232 /* MEC1 */
3233 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3234 for (i = 0; i < fw_size; i++)
3235 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3236 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3237
3238 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3239 if (adev->gfx.mec2_fw) {
3240 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3241
3242 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3243 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
3244
3245 fw_data = (const __le32 *)
3246 (adev->gfx.mec2_fw->data +
3247 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3248 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3249
3250 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3251 for (i = 0; i < fw_size; i++)
3252 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3253 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3254 }
3255
3256 return 0;
3257}
3258
3259struct vi_mqd {
3260 uint32_t header; /* ordinal0 */
3261 uint32_t compute_dispatch_initiator; /* ordinal1 */
3262 uint32_t compute_dim_x; /* ordinal2 */
3263 uint32_t compute_dim_y; /* ordinal3 */
3264 uint32_t compute_dim_z; /* ordinal4 */
3265 uint32_t compute_start_x; /* ordinal5 */
3266 uint32_t compute_start_y; /* ordinal6 */
3267 uint32_t compute_start_z; /* ordinal7 */
3268 uint32_t compute_num_thread_x; /* ordinal8 */
3269 uint32_t compute_num_thread_y; /* ordinal9 */
3270 uint32_t compute_num_thread_z; /* ordinal10 */
3271 uint32_t compute_pipelinestat_enable; /* ordinal11 */
3272 uint32_t compute_perfcount_enable; /* ordinal12 */
3273 uint32_t compute_pgm_lo; /* ordinal13 */
3274 uint32_t compute_pgm_hi; /* ordinal14 */
3275 uint32_t compute_tba_lo; /* ordinal15 */
3276 uint32_t compute_tba_hi; /* ordinal16 */
3277 uint32_t compute_tma_lo; /* ordinal17 */
3278 uint32_t compute_tma_hi; /* ordinal18 */
3279 uint32_t compute_pgm_rsrc1; /* ordinal19 */
3280 uint32_t compute_pgm_rsrc2; /* ordinal20 */
3281 uint32_t compute_vmid; /* ordinal21 */
3282 uint32_t compute_resource_limits; /* ordinal22 */
3283 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
3284 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
3285 uint32_t compute_tmpring_size; /* ordinal25 */
3286 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
3287 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
3288 uint32_t compute_restart_x; /* ordinal28 */
3289 uint32_t compute_restart_y; /* ordinal29 */
3290 uint32_t compute_restart_z; /* ordinal30 */
3291 uint32_t compute_thread_trace_enable; /* ordinal31 */
3292 uint32_t compute_misc_reserved; /* ordinal32 */
3293 uint32_t compute_dispatch_id; /* ordinal33 */
3294 uint32_t compute_threadgroup_id; /* ordinal34 */
3295 uint32_t compute_relaunch; /* ordinal35 */
3296 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
3297 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
3298 uint32_t compute_wave_restore_control; /* ordinal38 */
3299 uint32_t reserved9; /* ordinal39 */
3300 uint32_t reserved10; /* ordinal40 */
3301 uint32_t reserved11; /* ordinal41 */
3302 uint32_t reserved12; /* ordinal42 */
3303 uint32_t reserved13; /* ordinal43 */
3304 uint32_t reserved14; /* ordinal44 */
3305 uint32_t reserved15; /* ordinal45 */
3306 uint32_t reserved16; /* ordinal46 */
3307 uint32_t reserved17; /* ordinal47 */
3308 uint32_t reserved18; /* ordinal48 */
3309 uint32_t reserved19; /* ordinal49 */
3310 uint32_t reserved20; /* ordinal50 */
3311 uint32_t reserved21; /* ordinal51 */
3312 uint32_t reserved22; /* ordinal52 */
3313 uint32_t reserved23; /* ordinal53 */
3314 uint32_t reserved24; /* ordinal54 */
3315 uint32_t reserved25; /* ordinal55 */
3316 uint32_t reserved26; /* ordinal56 */
3317 uint32_t reserved27; /* ordinal57 */
3318 uint32_t reserved28; /* ordinal58 */
3319 uint32_t reserved29; /* ordinal59 */
3320 uint32_t reserved30; /* ordinal60 */
3321 uint32_t reserved31; /* ordinal61 */
3322 uint32_t reserved32; /* ordinal62 */
3323 uint32_t reserved33; /* ordinal63 */
3324 uint32_t reserved34; /* ordinal64 */
3325 uint32_t compute_user_data_0; /* ordinal65 */
3326 uint32_t compute_user_data_1; /* ordinal66 */
3327 uint32_t compute_user_data_2; /* ordinal67 */
3328 uint32_t compute_user_data_3; /* ordinal68 */
3329 uint32_t compute_user_data_4; /* ordinal69 */
3330 uint32_t compute_user_data_5; /* ordinal70 */
3331 uint32_t compute_user_data_6; /* ordinal71 */
3332 uint32_t compute_user_data_7; /* ordinal72 */
3333 uint32_t compute_user_data_8; /* ordinal73 */
3334 uint32_t compute_user_data_9; /* ordinal74 */
3335 uint32_t compute_user_data_10; /* ordinal75 */
3336 uint32_t compute_user_data_11; /* ordinal76 */
3337 uint32_t compute_user_data_12; /* ordinal77 */
3338 uint32_t compute_user_data_13; /* ordinal78 */
3339 uint32_t compute_user_data_14; /* ordinal79 */
3340 uint32_t compute_user_data_15; /* ordinal80 */
3341 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
3342 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
3343 uint32_t reserved35; /* ordinal83 */
3344 uint32_t reserved36; /* ordinal84 */
3345 uint32_t reserved37; /* ordinal85 */
3346 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
3347 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
3348 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
3349 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
3350 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
3351 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
3352 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
3353 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
3354 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
3355 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
3356 uint32_t reserved38; /* ordinal96 */
3357 uint32_t reserved39; /* ordinal97 */
3358 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
3359 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
3360 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
3361 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
3362 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
3363 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
3364 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
3365 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
3366 uint32_t reserved40; /* ordinal106 */
3367 uint32_t reserved41; /* ordinal107 */
3368 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
3369 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
3370 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
3371 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
3372 uint32_t reserved42; /* ordinal112 */
3373 uint32_t reserved43; /* ordinal113 */
3374 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
3375 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
3376 uint32_t cp_packet_id_lo; /* ordinal116 */
3377 uint32_t cp_packet_id_hi; /* ordinal117 */
3378 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
3379 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
3380 uint32_t gds_save_base_addr_lo; /* ordinal120 */
3381 uint32_t gds_save_base_addr_hi; /* ordinal121 */
3382 uint32_t gds_save_mask_lo; /* ordinal122 */
3383 uint32_t gds_save_mask_hi; /* ordinal123 */
3384 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
3385 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
3386 uint32_t reserved44; /* ordinal126 */
3387 uint32_t reserved45; /* ordinal127 */
3388 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
3389 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
3390 uint32_t cp_hqd_active; /* ordinal130 */
3391 uint32_t cp_hqd_vmid; /* ordinal131 */
3392 uint32_t cp_hqd_persistent_state; /* ordinal132 */
3393 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
3394 uint32_t cp_hqd_queue_priority; /* ordinal134 */
3395 uint32_t cp_hqd_quantum; /* ordinal135 */
3396 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
3397 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
3398 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
3399 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
3400 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
3401 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
3402 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
3403 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
3404 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
3405 uint32_t cp_hqd_pq_control; /* ordinal145 */
3406 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
3407 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
3408 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
3409 uint32_t cp_hqd_ib_control; /* ordinal149 */
3410 uint32_t cp_hqd_iq_timer; /* ordinal150 */
3411 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
3412 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
3413 uint32_t cp_hqd_dma_offload; /* ordinal153 */
3414 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
3415 uint32_t cp_hqd_msg_type; /* ordinal155 */
3416 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
3417 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
3418 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
3419 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
3420 uint32_t cp_hqd_hq_status0; /* ordinal160 */
3421 uint32_t cp_hqd_hq_control0; /* ordinal161 */
3422 uint32_t cp_mqd_control; /* ordinal162 */
3423 uint32_t cp_hqd_hq_status1; /* ordinal163 */
3424 uint32_t cp_hqd_hq_control1; /* ordinal164 */
3425 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
3426 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
3427 uint32_t cp_hqd_eop_control; /* ordinal167 */
3428 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
3429 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
3430 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
3431 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
3432 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
3433 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
3434 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
3435 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
3436 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
3437 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
3438 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
3439 uint32_t cp_hqd_error; /* ordinal179 */
3440 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
3441 uint32_t cp_hqd_eop_dones; /* ordinal181 */
3442 uint32_t reserved46; /* ordinal182 */
3443 uint32_t reserved47; /* ordinal183 */
3444 uint32_t reserved48; /* ordinal184 */
3445 uint32_t reserved49; /* ordinal185 */
3446 uint32_t reserved50; /* ordinal186 */
3447 uint32_t reserved51; /* ordinal187 */
3448 uint32_t reserved52; /* ordinal188 */
3449 uint32_t reserved53; /* ordinal189 */
3450 uint32_t reserved54; /* ordinal190 */
3451 uint32_t reserved55; /* ordinal191 */
3452 uint32_t iqtimer_pkt_header; /* ordinal192 */
3453 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
3454 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
3455 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
3456 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
3457 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
3458 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
3459 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
3460 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
3461 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
3462 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
3463 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
3464 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
3465 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
3466 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
3467 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
3468 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
3469 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
3470 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
3471 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
3472 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
3473 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
3474 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
3475 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
3476 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
3477 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
3478 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
3479 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
3480 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
3481 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
3482 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
3483 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
3484 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
3485 uint32_t reserved56; /* ordinal225 */
3486 uint32_t reserved57; /* ordinal226 */
3487 uint32_t reserved58; /* ordinal227 */
3488 uint32_t set_resources_header; /* ordinal228 */
3489 uint32_t set_resources_dw1; /* ordinal229 */
3490 uint32_t set_resources_dw2; /* ordinal230 */
3491 uint32_t set_resources_dw3; /* ordinal231 */
3492 uint32_t set_resources_dw4; /* ordinal232 */
3493 uint32_t set_resources_dw5; /* ordinal233 */
3494 uint32_t set_resources_dw6; /* ordinal234 */
3495 uint32_t set_resources_dw7; /* ordinal235 */
3496 uint32_t reserved59; /* ordinal236 */
3497 uint32_t reserved60; /* ordinal237 */
3498 uint32_t reserved61; /* ordinal238 */
3499 uint32_t reserved62; /* ordinal239 */
3500 uint32_t reserved63; /* ordinal240 */
3501 uint32_t reserved64; /* ordinal241 */
3502 uint32_t reserved65; /* ordinal242 */
3503 uint32_t reserved66; /* ordinal243 */
3504 uint32_t reserved67; /* ordinal244 */
3505 uint32_t reserved68; /* ordinal245 */
3506 uint32_t reserved69; /* ordinal246 */
3507 uint32_t reserved70; /* ordinal247 */
3508 uint32_t reserved71; /* ordinal248 */
3509 uint32_t reserved72; /* ordinal249 */
3510 uint32_t reserved73; /* ordinal250 */
3511 uint32_t reserved74; /* ordinal251 */
3512 uint32_t reserved75; /* ordinal252 */
3513 uint32_t reserved76; /* ordinal253 */
3514 uint32_t reserved77; /* ordinal254 */
3515 uint32_t reserved78; /* ordinal255 */
3516
3517 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3518};
3519
3520static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3521{
3522 int i, r;
3523
3524 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3525 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3526
3527 if (ring->mqd_obj) {
3528 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3529 if (unlikely(r != 0))
3530 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3531
3532 amdgpu_bo_unpin(ring->mqd_obj);
3533 amdgpu_bo_unreserve(ring->mqd_obj);
3534
3535 amdgpu_bo_unref(&ring->mqd_obj);
3536 ring->mqd_obj = NULL;
3537 }
3538 }
3539}
3540
3541static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3542{
3543 int r, i, j;
3544 u32 tmp;
3545 bool use_doorbell = true;
3546 u64 hqd_gpu_addr;
3547 u64 mqd_gpu_addr;
3548 u64 eop_gpu_addr;
3549 u64 wb_gpu_addr;
3550 u32 *buf;
3551 struct vi_mqd *mqd;
3552
3553 /* init the pipes */
3554 mutex_lock(&adev->srbm_mutex);
3555 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3556 int me = (i < 4) ? 1 : 2;
3557 int pipe = (i < 4) ? i : (i - 4);
3558
3559 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3560 eop_gpu_addr >>= 8;
3561
3562 vi_srbm_select(adev, me, pipe, 0, 0);
3563
3564 /* write the EOP addr */
3565 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3566 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3567
3568 /* set the VMID assigned */
3569 WREG32(mmCP_HQD_VMID, 0);
3570
3571 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3572 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3573 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3574 (order_base_2(MEC_HPD_SIZE / 4) - 1));
3575 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3576 }
3577 vi_srbm_select(adev, 0, 0, 0, 0);
3578 mutex_unlock(&adev->srbm_mutex);
3579
3580 /* init the queues. Just two for now. */
3581 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3582 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3583
3584 if (ring->mqd_obj == NULL) {
3585 r = amdgpu_bo_create(adev,
3586 sizeof(struct vi_mqd),
3587 PAGE_SIZE, true,
3588 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
72d7668b 3589 NULL, &ring->mqd_obj);
aaa36a97
AD
3590 if (r) {
3591 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3592 return r;
3593 }
3594 }
3595
3596 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3597 if (unlikely(r != 0)) {
3598 gfx_v8_0_cp_compute_fini(adev);
3599 return r;
3600 }
3601 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3602 &mqd_gpu_addr);
3603 if (r) {
3604 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3605 gfx_v8_0_cp_compute_fini(adev);
3606 return r;
3607 }
3608 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3609 if (r) {
3610 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3611 gfx_v8_0_cp_compute_fini(adev);
3612 return r;
3613 }
3614
3615 /* init the mqd struct */
3616 memset(buf, 0, sizeof(struct vi_mqd));
3617
3618 mqd = (struct vi_mqd *)buf;
3619 mqd->header = 0xC0310800;
3620 mqd->compute_pipelinestat_enable = 0x00000001;
3621 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3622 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3623 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3624 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3625 mqd->compute_misc_reserved = 0x00000003;
3626
3627 mutex_lock(&adev->srbm_mutex);
3628 vi_srbm_select(adev, ring->me,
3629 ring->pipe,
3630 ring->queue, 0);
3631
3632 /* disable wptr polling */
3633 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3634 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3635 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3636
3637 mqd->cp_hqd_eop_base_addr_lo =
3638 RREG32(mmCP_HQD_EOP_BASE_ADDR);
3639 mqd->cp_hqd_eop_base_addr_hi =
3640 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3641
3642 /* enable doorbell? */
3643 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3644 if (use_doorbell) {
3645 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3646 } else {
3647 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3648 }
3649 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3650 mqd->cp_hqd_pq_doorbell_control = tmp;
3651
3652 /* disable the queue if it's active */
3653 mqd->cp_hqd_dequeue_request = 0;
3654 mqd->cp_hqd_pq_rptr = 0;
3655 mqd->cp_hqd_pq_wptr= 0;
3656 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3657 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3658 for (j = 0; j < adev->usec_timeout; j++) {
3659 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3660 break;
3661 udelay(1);
3662 }
3663 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3664 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3665 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3666 }
3667
3668 /* set the pointer to the MQD */
3669 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3670 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3671 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3672 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3673
3674 /* set MQD vmid to 0 */
3675 tmp = RREG32(mmCP_MQD_CONTROL);
3676 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3677 WREG32(mmCP_MQD_CONTROL, tmp);
3678 mqd->cp_mqd_control = tmp;
3679
3680 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3681 hqd_gpu_addr = ring->gpu_addr >> 8;
3682 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3683 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3684 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3685 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3686
3687 /* set up the HQD, this is similar to CP_RB0_CNTL */
3688 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3689 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3690 (order_base_2(ring->ring_size / 4) - 1));
3691 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3692 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3693#ifdef __BIG_ENDIAN
3694 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3695#endif
3696 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3697 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3698 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3699 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3700 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3701 mqd->cp_hqd_pq_control = tmp;
3702
3703 /* set the wb address wether it's enabled or not */
3704 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3705 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3706 mqd->cp_hqd_pq_rptr_report_addr_hi =
3707 upper_32_bits(wb_gpu_addr) & 0xffff;
3708 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3709 mqd->cp_hqd_pq_rptr_report_addr_lo);
3710 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3711 mqd->cp_hqd_pq_rptr_report_addr_hi);
3712
3713 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3714 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3715 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3716 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3717 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3718 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3719 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3720
3721 /* enable the doorbell if requested */
3722 if (use_doorbell) {
bddf8026 3723 if ((adev->asic_type == CHIP_CARRIZO) ||
e3c7656c
SL
3724 (adev->asic_type == CHIP_FIJI) ||
3725 (adev->asic_type == CHIP_STONEY)) {
aaa36a97
AD
3726 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3727 AMDGPU_DOORBELL_KIQ << 2);
3728 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
b8826b0c 3729 AMDGPU_DOORBELL_MEC_RING7 << 2);
aaa36a97
AD
3730 }
3731 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3732 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3733 DOORBELL_OFFSET, ring->doorbell_index);
3734 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3735 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3736 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3737 mqd->cp_hqd_pq_doorbell_control = tmp;
3738
3739 } else {
3740 mqd->cp_hqd_pq_doorbell_control = 0;
3741 }
3742 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3743 mqd->cp_hqd_pq_doorbell_control);
3744
845253e7
SJ
3745 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3746 ring->wptr = 0;
3747 mqd->cp_hqd_pq_wptr = ring->wptr;
3748 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3749 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3750
aaa36a97
AD
3751 /* set the vmid for the queue */
3752 mqd->cp_hqd_vmid = 0;
3753 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3754
3755 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3756 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3757 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3758 mqd->cp_hqd_persistent_state = tmp;
3759
3760 /* activate the queue */
3761 mqd->cp_hqd_active = 1;
3762 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3763
3764 vi_srbm_select(adev, 0, 0, 0, 0);
3765 mutex_unlock(&adev->srbm_mutex);
3766
3767 amdgpu_bo_kunmap(ring->mqd_obj);
3768 amdgpu_bo_unreserve(ring->mqd_obj);
3769 }
3770
3771 if (use_doorbell) {
3772 tmp = RREG32(mmCP_PQ_STATUS);
3773 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3774 WREG32(mmCP_PQ_STATUS, tmp);
3775 }
3776
3777 r = gfx_v8_0_cp_compute_start(adev);
3778 if (r)
3779 return r;
3780
3781 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3782 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3783
3784 ring->ready = true;
3785 r = amdgpu_ring_test_ring(ring);
3786 if (r)
3787 ring->ready = false;
3788 }
3789
3790 return 0;
3791}
3792
3793static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3794{
3795 int r;
3796
e3c7656c 3797 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
3798 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3799
3800 if (!adev->firmware.smu_load) {
3801 /* legacy firmware loading */
3802 r = gfx_v8_0_cp_gfx_load_microcode(adev);
3803 if (r)
3804 return r;
3805
3806 r = gfx_v8_0_cp_compute_load_microcode(adev);
3807 if (r)
3808 return r;
3809 } else {
3810 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3811 AMDGPU_UCODE_ID_CP_CE);
3812 if (r)
3813 return -EINVAL;
3814
3815 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3816 AMDGPU_UCODE_ID_CP_PFP);
3817 if (r)
3818 return -EINVAL;
3819
3820 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3821 AMDGPU_UCODE_ID_CP_ME);
3822 if (r)
3823 return -EINVAL;
3824
3825 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3826 AMDGPU_UCODE_ID_CP_MEC1);
3827 if (r)
3828 return -EINVAL;
3829 }
3830
3831 r = gfx_v8_0_cp_gfx_resume(adev);
3832 if (r)
3833 return r;
3834
3835 r = gfx_v8_0_cp_compute_resume(adev);
3836 if (r)
3837 return r;
3838
3839 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3840
3841 return 0;
3842}
3843
3844static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3845{
3846 gfx_v8_0_cp_gfx_enable(adev, enable);
3847 gfx_v8_0_cp_compute_enable(adev, enable);
3848}
3849
5fc3aeeb 3850static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
3851{
3852 int r;
5fc3aeeb 3853 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
3854
3855 gfx_v8_0_init_golden_registers(adev);
3856
3857 gfx_v8_0_gpu_init(adev);
3858
3859 r = gfx_v8_0_rlc_resume(adev);
3860 if (r)
3861 return r;
3862
3863 r = gfx_v8_0_cp_resume(adev);
3864 if (r)
3865 return r;
3866
3867 return r;
3868}
3869
5fc3aeeb 3870static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 3871{
5fc3aeeb 3872 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3873
aaa36a97
AD
3874 gfx_v8_0_cp_enable(adev, false);
3875 gfx_v8_0_rlc_stop(adev);
3876 gfx_v8_0_cp_compute_fini(adev);
3877
3878 return 0;
3879}
3880
5fc3aeeb 3881static int gfx_v8_0_suspend(void *handle)
aaa36a97 3882{
5fc3aeeb 3883 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3884
aaa36a97
AD
3885 return gfx_v8_0_hw_fini(adev);
3886}
3887
5fc3aeeb 3888static int gfx_v8_0_resume(void *handle)
aaa36a97 3889{
5fc3aeeb 3890 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3891
aaa36a97
AD
3892 return gfx_v8_0_hw_init(adev);
3893}
3894
5fc3aeeb 3895static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 3896{
5fc3aeeb 3897 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3898
aaa36a97
AD
3899 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3900 return false;
3901 else
3902 return true;
3903}
3904
5fc3aeeb 3905static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
3906{
3907 unsigned i;
3908 u32 tmp;
5fc3aeeb 3909 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
3910
3911 for (i = 0; i < adev->usec_timeout; i++) {
3912 /* read MC_STATUS */
3913 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3914
3915 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3916 return 0;
3917 udelay(1);
3918 }
3919 return -ETIMEDOUT;
3920}
3921
5fc3aeeb 3922static void gfx_v8_0_print_status(void *handle)
aaa36a97
AD
3923{
3924 int i;
5fc3aeeb 3925 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
3926
3927 dev_info(adev->dev, "GFX 8.x registers\n");
3928 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
3929 RREG32(mmGRBM_STATUS));
3930 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
3931 RREG32(mmGRBM_STATUS2));
3932 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3933 RREG32(mmGRBM_STATUS_SE0));
3934 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3935 RREG32(mmGRBM_STATUS_SE1));
3936 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3937 RREG32(mmGRBM_STATUS_SE2));
3938 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3939 RREG32(mmGRBM_STATUS_SE3));
3940 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3941 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3942 RREG32(mmCP_STALLED_STAT1));
3943 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3944 RREG32(mmCP_STALLED_STAT2));
3945 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3946 RREG32(mmCP_STALLED_STAT3));
3947 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3948 RREG32(mmCP_CPF_BUSY_STAT));
3949 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3950 RREG32(mmCP_CPF_STALLED_STAT1));
3951 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3952 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3953 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3954 RREG32(mmCP_CPC_STALLED_STAT1));
3955 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3956
3957 for (i = 0; i < 32; i++) {
3958 dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n",
3959 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3960 }
3961 for (i = 0; i < 16; i++) {
3962 dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n",
3963 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3964 }
3965 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3966 dev_info(adev->dev, " se: %d\n", i);
3967 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3968 dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n",
3969 RREG32(mmPA_SC_RASTER_CONFIG));
3970 dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
3971 RREG32(mmPA_SC_RASTER_CONFIG_1));
3972 }
3973 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3974
3975 dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n",
3976 RREG32(mmGB_ADDR_CONFIG));
3977 dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n",
3978 RREG32(mmHDP_ADDR_CONFIG));
3979 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
3980 RREG32(mmDMIF_ADDR_CALC));
3981 dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
3982 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
3983 dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
3984 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
3985 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
3986 RREG32(mmUVD_UDEC_ADDR_CONFIG));
3987 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
3988 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
3989 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
3990 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
3991
3992 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
3993 RREG32(mmCP_MEQ_THRESHOLDS));
3994 dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n",
3995 RREG32(mmSX_DEBUG_1));
3996 dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n",
3997 RREG32(mmTA_CNTL_AUX));
3998 dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n",
3999 RREG32(mmSPI_CONFIG_CNTL));
4000 dev_info(adev->dev, " SQ_CONFIG=0x%08X\n",
4001 RREG32(mmSQ_CONFIG));
4002 dev_info(adev->dev, " DB_DEBUG=0x%08X\n",
4003 RREG32(mmDB_DEBUG));
4004 dev_info(adev->dev, " DB_DEBUG2=0x%08X\n",
4005 RREG32(mmDB_DEBUG2));
4006 dev_info(adev->dev, " DB_DEBUG3=0x%08X\n",
4007 RREG32(mmDB_DEBUG3));
4008 dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n",
4009 RREG32(mmCB_HW_CONTROL));
4010 dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n",
4011 RREG32(mmSPI_CONFIG_CNTL_1));
4012 dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n",
4013 RREG32(mmPA_SC_FIFO_SIZE));
4014 dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n",
4015 RREG32(mmVGT_NUM_INSTANCES));
4016 dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n",
4017 RREG32(mmCP_PERFMON_CNTL));
4018 dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4019 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4020 dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n",
4021 RREG32(mmVGT_CACHE_INVALIDATION));
4022 dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n",
4023 RREG32(mmVGT_GS_VERTEX_REUSE));
4024 dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4025 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4026 dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n",
4027 RREG32(mmPA_CL_ENHANCE));
4028 dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n",
4029 RREG32(mmPA_SC_ENHANCE));
4030
4031 dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n",
4032 RREG32(mmCP_ME_CNTL));
4033 dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n",
4034 RREG32(mmCP_MAX_CONTEXT));
4035 dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n",
4036 RREG32(mmCP_ENDIAN_SWAP));
4037 dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n",
4038 RREG32(mmCP_DEVICE_ID));
4039
4040 dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n",
4041 RREG32(mmCP_SEM_WAIT_TIMER));
4042
4043 dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n",
4044 RREG32(mmCP_RB_WPTR_DELAY));
4045 dev_info(adev->dev, " CP_RB_VMID=0x%08X\n",
4046 RREG32(mmCP_RB_VMID));
4047 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4048 RREG32(mmCP_RB0_CNTL));
4049 dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n",
4050 RREG32(mmCP_RB0_WPTR));
4051 dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n",
4052 RREG32(mmCP_RB0_RPTR_ADDR));
4053 dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4054 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4055 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4056 RREG32(mmCP_RB0_CNTL));
4057 dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n",
4058 RREG32(mmCP_RB0_BASE));
4059 dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n",
4060 RREG32(mmCP_RB0_BASE_HI));
4061 dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n",
4062 RREG32(mmCP_MEC_CNTL));
4063 dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n",
4064 RREG32(mmCP_CPF_DEBUG));
4065
4066 dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n",
4067 RREG32(mmSCRATCH_ADDR));
4068 dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n",
4069 RREG32(mmSCRATCH_UMSK));
4070
4071 dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n",
4072 RREG32(mmCP_INT_CNTL_RING0));
4073 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4074 RREG32(mmRLC_LB_CNTL));
4075 dev_info(adev->dev, " RLC_CNTL=0x%08X\n",
4076 RREG32(mmRLC_CNTL));
4077 dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
4078 RREG32(mmRLC_CGCG_CGLS_CTRL));
4079 dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n",
4080 RREG32(mmRLC_LB_CNTR_INIT));
4081 dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n",
4082 RREG32(mmRLC_LB_CNTR_MAX));
4083 dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n",
4084 RREG32(mmRLC_LB_INIT_CU_MASK));
4085 dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n",
4086 RREG32(mmRLC_LB_PARAMS));
4087 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4088 RREG32(mmRLC_LB_CNTL));
4089 dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n",
4090 RREG32(mmRLC_MC_CNTL));
4091 dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n",
4092 RREG32(mmRLC_UCODE_CNTL));
4093
4094 mutex_lock(&adev->srbm_mutex);
4095 for (i = 0; i < 16; i++) {
4096 vi_srbm_select(adev, 0, 0, 0, i);
4097 dev_info(adev->dev, " VM %d:\n", i);
4098 dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n",
4099 RREG32(mmSH_MEM_CONFIG));
4100 dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n",
4101 RREG32(mmSH_MEM_APE1_BASE));
4102 dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n",
4103 RREG32(mmSH_MEM_APE1_LIMIT));
4104 dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n",
4105 RREG32(mmSH_MEM_BASES));
4106 }
4107 vi_srbm_select(adev, 0, 0, 0, 0);
4108 mutex_unlock(&adev->srbm_mutex);
4109}
4110
5fc3aeeb 4111static int gfx_v8_0_soft_reset(void *handle)
aaa36a97
AD
4112{
4113 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4114 u32 tmp;
5fc3aeeb 4115 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
4116
4117 /* GRBM_STATUS */
4118 tmp = RREG32(mmGRBM_STATUS);
4119 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4120 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4121 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4122 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4123 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4124 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4125 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4126 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4127 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4128 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4129 }
4130
4131 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4132 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4133 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4134 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4135 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4136 }
4137
4138 /* GRBM_STATUS2 */
4139 tmp = RREG32(mmGRBM_STATUS2);
4140 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4141 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4142 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4143
4144 /* SRBM_STATUS */
4145 tmp = RREG32(mmSRBM_STATUS);
4146 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4147 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4148 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4149
4150 if (grbm_soft_reset || srbm_soft_reset) {
5fc3aeeb 4151 gfx_v8_0_print_status((void *)adev);
aaa36a97
AD
4152 /* stop the rlc */
4153 gfx_v8_0_rlc_stop(adev);
4154
4155 /* Disable GFX parsing/prefetching */
4156 gfx_v8_0_cp_gfx_enable(adev, false);
4157
4158 /* Disable MEC parsing/prefetching */
4159 /* XXX todo */
4160
4161 if (grbm_soft_reset) {
4162 tmp = RREG32(mmGRBM_SOFT_RESET);
4163 tmp |= grbm_soft_reset;
4164 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4165 WREG32(mmGRBM_SOFT_RESET, tmp);
4166 tmp = RREG32(mmGRBM_SOFT_RESET);
4167
4168 udelay(50);
4169
4170 tmp &= ~grbm_soft_reset;
4171 WREG32(mmGRBM_SOFT_RESET, tmp);
4172 tmp = RREG32(mmGRBM_SOFT_RESET);
4173 }
4174
4175 if (srbm_soft_reset) {
4176 tmp = RREG32(mmSRBM_SOFT_RESET);
4177 tmp |= srbm_soft_reset;
4178 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4179 WREG32(mmSRBM_SOFT_RESET, tmp);
4180 tmp = RREG32(mmSRBM_SOFT_RESET);
4181
4182 udelay(50);
4183
4184 tmp &= ~srbm_soft_reset;
4185 WREG32(mmSRBM_SOFT_RESET, tmp);
4186 tmp = RREG32(mmSRBM_SOFT_RESET);
4187 }
4188 /* Wait a little for things to settle down */
4189 udelay(50);
5fc3aeeb 4190 gfx_v8_0_print_status((void *)adev);
aaa36a97
AD
4191 }
4192 return 0;
4193}
4194
4195/**
4196 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4197 *
4198 * @adev: amdgpu_device pointer
4199 *
4200 * Fetches a GPU clock counter snapshot.
4201 * Returns the 64 bit clock counter snapshot.
4202 */
4203uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4204{
4205 uint64_t clock;
4206
4207 mutex_lock(&adev->gfx.gpu_clock_mutex);
4208 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4209 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4210 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4211 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4212 return clock;
4213}
4214
4215static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4216 uint32_t vmid,
4217 uint32_t gds_base, uint32_t gds_size,
4218 uint32_t gws_base, uint32_t gws_size,
4219 uint32_t oa_base, uint32_t oa_size)
4220{
4221 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4222 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4223
4224 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4225 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4226
4227 oa_base = oa_base >> AMDGPU_OA_SHIFT;
4228 oa_size = oa_size >> AMDGPU_OA_SHIFT;
4229
4230 /* GDS Base */
4231 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4232 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4233 WRITE_DATA_DST_SEL(0)));
4234 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4235 amdgpu_ring_write(ring, 0);
4236 amdgpu_ring_write(ring, gds_base);
4237
4238 /* GDS Size */
4239 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4240 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4241 WRITE_DATA_DST_SEL(0)));
4242 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4243 amdgpu_ring_write(ring, 0);
4244 amdgpu_ring_write(ring, gds_size);
4245
4246 /* GWS */
4247 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4248 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4249 WRITE_DATA_DST_SEL(0)));
4250 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4251 amdgpu_ring_write(ring, 0);
4252 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4253
4254 /* OA */
4255 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4256 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4257 WRITE_DATA_DST_SEL(0)));
4258 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4259 amdgpu_ring_write(ring, 0);
4260 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4261}
4262
5fc3aeeb 4263static int gfx_v8_0_early_init(void *handle)
aaa36a97 4264{
5fc3aeeb 4265 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
4266
4267 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4268 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4269 gfx_v8_0_set_ring_funcs(adev);
4270 gfx_v8_0_set_irq_funcs(adev);
4271 gfx_v8_0_set_gds_init(adev);
4272
4273 return 0;
4274}
4275
ccba7691
AD
4276static int gfx_v8_0_late_init(void *handle)
4277{
4278 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4279 int r;
4280
4281 /* requires IBs so do in late init after IB pool is initialized */
4282 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4283 if (r)
4284 return r;
4285
4286 return 0;
4287}
4288
5fc3aeeb 4289static int gfx_v8_0_set_powergating_state(void *handle,
4290 enum amd_powergating_state state)
aaa36a97
AD
4291{
4292 return 0;
4293}
4294
5fc3aeeb 4295static int gfx_v8_0_set_clockgating_state(void *handle,
4296 enum amd_clockgating_state state)
aaa36a97
AD
4297{
4298 return 0;
4299}
4300
4301static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4302{
4303 u32 rptr;
4304
4305 rptr = ring->adev->wb.wb[ring->rptr_offs];
4306
4307 return rptr;
4308}
4309
4310static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4311{
4312 struct amdgpu_device *adev = ring->adev;
4313 u32 wptr;
4314
4315 if (ring->use_doorbell)
4316 /* XXX check if swapping is necessary on BE */
4317 wptr = ring->adev->wb.wb[ring->wptr_offs];
4318 else
4319 wptr = RREG32(mmCP_RB0_WPTR);
4320
4321 return wptr;
4322}
4323
4324static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4325{
4326 struct amdgpu_device *adev = ring->adev;
4327
4328 if (ring->use_doorbell) {
4329 /* XXX check if swapping is necessary on BE */
4330 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4331 WDOORBELL32(ring->doorbell_index, ring->wptr);
4332 } else {
4333 WREG32(mmCP_RB0_WPTR, ring->wptr);
4334 (void)RREG32(mmCP_RB0_WPTR);
4335 }
4336}
4337
d2edb07b 4338static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
4339{
4340 u32 ref_and_mask, reg_mem_engine;
4341
4342 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4343 switch (ring->me) {
4344 case 1:
4345 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4346 break;
4347 case 2:
4348 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4349 break;
4350 default:
4351 return;
4352 }
4353 reg_mem_engine = 0;
4354 } else {
4355 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4356 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4357 }
4358
4359 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4360 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4361 WAIT_REG_MEM_FUNCTION(3) | /* == */
4362 reg_mem_engine));
4363 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4364 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4365 amdgpu_ring_write(ring, ref_and_mask);
4366 amdgpu_ring_write(ring, ref_and_mask);
4367 amdgpu_ring_write(ring, 0x20); /* poll interval */
4368}
4369
93323131 4370static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
aaa36a97
AD
4371 struct amdgpu_ib *ib)
4372{
3cb485f3 4373 bool need_ctx_switch = ring->current_ctx != ib->ctx;
aaa36a97
AD
4374 u32 header, control = 0;
4375 u32 next_rptr = ring->wptr + 5;
aa2bdb24
JZ
4376
4377 /* drop the CE preamble IB for the same context */
93323131 4378 if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
aa2bdb24
JZ
4379 return;
4380
93323131 4381 if (need_ctx_switch)
aaa36a97
AD
4382 next_rptr += 2;
4383
4384 next_rptr += 4;
4385 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4386 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4387 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4388 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4389 amdgpu_ring_write(ring, next_rptr);
4390
aaa36a97 4391 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
93323131 4392 if (need_ctx_switch) {
aaa36a97
AD
4393 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4394 amdgpu_ring_write(ring, 0);
aaa36a97
AD
4395 }
4396
de807f81 4397 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
4398 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4399 else
4400 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4401
4402 control |= ib->length_dw |
4403 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4404
4405 amdgpu_ring_write(ring, header);
4406 amdgpu_ring_write(ring,
4407#ifdef __BIG_ENDIAN
4408 (2 << 0) |
4409#endif
4410 (ib->gpu_addr & 0xFFFFFFFC));
4411 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4412 amdgpu_ring_write(ring, control);
4413}
4414
93323131 4415static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4416 struct amdgpu_ib *ib)
4417{
4418 u32 header, control = 0;
4419 u32 next_rptr = ring->wptr + 5;
4420
4421 control |= INDIRECT_BUFFER_VALID;
4422
4423 next_rptr += 4;
4424 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4425 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4426 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4427 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4428 amdgpu_ring_write(ring, next_rptr);
4429
4430 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4431
4432 control |= ib->length_dw |
4433 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4434
4435 amdgpu_ring_write(ring, header);
4436 amdgpu_ring_write(ring,
4437#ifdef __BIG_ENDIAN
4438 (2 << 0) |
4439#endif
4440 (ib->gpu_addr & 0xFFFFFFFC));
4441 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4442 amdgpu_ring_write(ring, control);
4443}
4444
aaa36a97 4445static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 4446 u64 seq, unsigned flags)
aaa36a97 4447{
890ee23f
CZ
4448 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4449 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4450
aaa36a97
AD
4451 /* EVENT_WRITE_EOP - flush caches, send int */
4452 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4453 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4454 EOP_TC_ACTION_EN |
4455 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4456 EVENT_INDEX(5)));
4457 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 4458 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 4459 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
4460 amdgpu_ring_write(ring, lower_32_bits(seq));
4461 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 4462
aaa36a97
AD
4463}
4464
4465/**
4466 * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
4467 *
4468 * @ring: amdgpu ring buffer object
4469 * @semaphore: amdgpu semaphore object
4470 * @emit_wait: Is this a sempahore wait?
4471 *
4472 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4473 * from running ahead of semaphore waits.
4474 */
4475static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
4476 struct amdgpu_semaphore *semaphore,
4477 bool emit_wait)
4478{
4479 uint64_t addr = semaphore->gpu_addr;
4480 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4481
4482 if (ring->adev->asic_type == CHIP_TOPAZ ||
af15a2d5
DZ
4483 ring->adev->asic_type == CHIP_TONGA ||
4484 ring->adev->asic_type == CHIP_FIJI)
147dbfbc
DZ
4485 /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
4486 return false;
4487 else {
aaa36a97
AD
4488 amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
4489 amdgpu_ring_write(ring, lower_32_bits(addr));
4490 amdgpu_ring_write(ring, upper_32_bits(addr));
4491 amdgpu_ring_write(ring, sel);
4492 }
4493
4494 if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
4495 /* Prevent the PFP from running ahead of the semaphore wait */
4496 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4497 amdgpu_ring_write(ring, 0x0);
4498 }
4499
4500 return true;
4501}
4502
aaa36a97
AD
4503static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4504 unsigned vm_id, uint64_t pd_addr)
4505{
4506 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
22c01cc4
AA
4507 uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
4508 uint64_t addr = ring->fence_drv.gpu_addr;
4509
4510 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4511 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4512 WAIT_REG_MEM_FUNCTION(3))); /* equal */
4513 amdgpu_ring_write(ring, addr & 0xfffffffc);
4514 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4515 amdgpu_ring_write(ring, seq);
4516 amdgpu_ring_write(ring, 0xffffffff);
4517 amdgpu_ring_write(ring, 4); /* poll interval */
aaa36a97 4518
5c3422b0 4519 if (usepfp) {
4520 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4521 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4522 amdgpu_ring_write(ring, 0);
4523 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4524 amdgpu_ring_write(ring, 0);
4525 }
4526
aaa36a97
AD
4527 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4528 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
4529 WRITE_DATA_DST_SEL(0)) |
4530 WR_CONFIRM);
aaa36a97
AD
4531 if (vm_id < 8) {
4532 amdgpu_ring_write(ring,
4533 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4534 } else {
4535 amdgpu_ring_write(ring,
4536 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4537 }
4538 amdgpu_ring_write(ring, 0);
4539 amdgpu_ring_write(ring, pd_addr >> 12);
4540
aaa36a97
AD
4541 /* bits 0-15 are the VM contexts0-15 */
4542 /* invalidate the cache */
4543 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4544 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4545 WRITE_DATA_DST_SEL(0)));
4546 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4547 amdgpu_ring_write(ring, 0);
4548 amdgpu_ring_write(ring, 1 << vm_id);
4549
4550 /* wait for the invalidate to complete */
4551 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4552 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4553 WAIT_REG_MEM_FUNCTION(0) | /* always */
4554 WAIT_REG_MEM_ENGINE(0))); /* me */
4555 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4556 amdgpu_ring_write(ring, 0);
4557 amdgpu_ring_write(ring, 0); /* ref */
4558 amdgpu_ring_write(ring, 0); /* mask */
4559 amdgpu_ring_write(ring, 0x20); /* poll interval */
4560
4561 /* compute doesn't have PFP */
4562 if (usepfp) {
4563 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4564 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4565 amdgpu_ring_write(ring, 0x0);
5c3422b0 4566 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4567 amdgpu_ring_write(ring, 0);
4568 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4569 amdgpu_ring_write(ring, 0);
aaa36a97
AD
4570 }
4571}
4572
aaa36a97
AD
4573static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4574{
4575 return ring->adev->wb.wb[ring->rptr_offs];
4576}
4577
4578static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4579{
4580 return ring->adev->wb.wb[ring->wptr_offs];
4581}
4582
4583static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4584{
4585 struct amdgpu_device *adev = ring->adev;
4586
4587 /* XXX check if swapping is necessary on BE */
4588 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4589 WDOORBELL32(ring->doorbell_index, ring->wptr);
4590}
4591
4592static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4593 u64 addr, u64 seq,
890ee23f 4594 unsigned flags)
aaa36a97 4595{
890ee23f
CZ
4596 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4597 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4598
aaa36a97
AD
4599 /* RELEASE_MEM - flush caches, send int */
4600 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4601 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4602 EOP_TC_ACTION_EN |
a3d5aaa8 4603 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
4604 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4605 EVENT_INDEX(5)));
890ee23f 4606 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
4607 amdgpu_ring_write(ring, addr & 0xfffffffc);
4608 amdgpu_ring_write(ring, upper_32_bits(addr));
4609 amdgpu_ring_write(ring, lower_32_bits(seq));
4610 amdgpu_ring_write(ring, upper_32_bits(seq));
4611}
4612
4613static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4614 enum amdgpu_interrupt_state state)
4615{
4616 u32 cp_int_cntl;
4617
4618 switch (state) {
4619 case AMDGPU_IRQ_STATE_DISABLE:
4620 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4621 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4622 TIME_STAMP_INT_ENABLE, 0);
4623 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4624 break;
4625 case AMDGPU_IRQ_STATE_ENABLE:
4626 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4627 cp_int_cntl =
4628 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4629 TIME_STAMP_INT_ENABLE, 1);
4630 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4631 break;
4632 default:
4633 break;
4634 }
4635}
4636
4637static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4638 int me, int pipe,
4639 enum amdgpu_interrupt_state state)
4640{
4641 u32 mec_int_cntl, mec_int_cntl_reg;
4642
4643 /*
4644 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4645 * handles the setting of interrupts for this specific pipe. All other
4646 * pipes' interrupts are set by amdkfd.
4647 */
4648
4649 if (me == 1) {
4650 switch (pipe) {
4651 case 0:
4652 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4653 break;
4654 default:
4655 DRM_DEBUG("invalid pipe %d\n", pipe);
4656 return;
4657 }
4658 } else {
4659 DRM_DEBUG("invalid me %d\n", me);
4660 return;
4661 }
4662
4663 switch (state) {
4664 case AMDGPU_IRQ_STATE_DISABLE:
4665 mec_int_cntl = RREG32(mec_int_cntl_reg);
4666 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4667 TIME_STAMP_INT_ENABLE, 0);
4668 WREG32(mec_int_cntl_reg, mec_int_cntl);
4669 break;
4670 case AMDGPU_IRQ_STATE_ENABLE:
4671 mec_int_cntl = RREG32(mec_int_cntl_reg);
4672 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4673 TIME_STAMP_INT_ENABLE, 1);
4674 WREG32(mec_int_cntl_reg, mec_int_cntl);
4675 break;
4676 default:
4677 break;
4678 }
4679}
4680
4681static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4682 struct amdgpu_irq_src *source,
4683 unsigned type,
4684 enum amdgpu_interrupt_state state)
4685{
4686 u32 cp_int_cntl;
4687
4688 switch (state) {
4689 case AMDGPU_IRQ_STATE_DISABLE:
4690 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4691 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4692 PRIV_REG_INT_ENABLE, 0);
4693 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4694 break;
4695 case AMDGPU_IRQ_STATE_ENABLE:
4696 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4697 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4698 PRIV_REG_INT_ENABLE, 0);
4699 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4700 break;
4701 default:
4702 break;
4703 }
4704
4705 return 0;
4706}
4707
4708static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4709 struct amdgpu_irq_src *source,
4710 unsigned type,
4711 enum amdgpu_interrupt_state state)
4712{
4713 u32 cp_int_cntl;
4714
4715 switch (state) {
4716 case AMDGPU_IRQ_STATE_DISABLE:
4717 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4718 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4719 PRIV_INSTR_INT_ENABLE, 0);
4720 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4721 break;
4722 case AMDGPU_IRQ_STATE_ENABLE:
4723 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4724 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4725 PRIV_INSTR_INT_ENABLE, 1);
4726 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4727 break;
4728 default:
4729 break;
4730 }
4731
4732 return 0;
4733}
4734
4735static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4736 struct amdgpu_irq_src *src,
4737 unsigned type,
4738 enum amdgpu_interrupt_state state)
4739{
4740 switch (type) {
4741 case AMDGPU_CP_IRQ_GFX_EOP:
4742 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4743 break;
4744 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4745 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4746 break;
4747 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4748 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4749 break;
4750 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4751 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4752 break;
4753 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4754 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4755 break;
4756 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4757 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4758 break;
4759 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4760 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4761 break;
4762 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4763 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4764 break;
4765 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4766 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4767 break;
4768 default:
4769 break;
4770 }
4771 return 0;
4772}
4773
4774static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4775 struct amdgpu_irq_src *source,
4776 struct amdgpu_iv_entry *entry)
4777{
4778 int i;
4779 u8 me_id, pipe_id, queue_id;
4780 struct amdgpu_ring *ring;
4781
4782 DRM_DEBUG("IH: CP EOP\n");
4783 me_id = (entry->ring_id & 0x0c) >> 2;
4784 pipe_id = (entry->ring_id & 0x03) >> 0;
4785 queue_id = (entry->ring_id & 0x70) >> 4;
4786
4787 switch (me_id) {
4788 case 0:
4789 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4790 break;
4791 case 1:
4792 case 2:
4793 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4794 ring = &adev->gfx.compute_ring[i];
4795 /* Per-queue interrupt is supported for MEC starting from VI.
4796 * The interrupt can only be enabled/disabled per pipe instead of per queue.
4797 */
4798 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4799 amdgpu_fence_process(ring);
4800 }
4801 break;
4802 }
4803 return 0;
4804}
4805
4806static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
4807 struct amdgpu_irq_src *source,
4808 struct amdgpu_iv_entry *entry)
4809{
4810 DRM_ERROR("Illegal register access in command stream\n");
4811 schedule_work(&adev->reset_work);
4812 return 0;
4813}
4814
4815static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
4816 struct amdgpu_irq_src *source,
4817 struct amdgpu_iv_entry *entry)
4818{
4819 DRM_ERROR("Illegal instruction in command stream\n");
4820 schedule_work(&adev->reset_work);
4821 return 0;
4822}
4823
5fc3aeeb 4824const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
aaa36a97 4825 .early_init = gfx_v8_0_early_init,
ccba7691 4826 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
4827 .sw_init = gfx_v8_0_sw_init,
4828 .sw_fini = gfx_v8_0_sw_fini,
4829 .hw_init = gfx_v8_0_hw_init,
4830 .hw_fini = gfx_v8_0_hw_fini,
4831 .suspend = gfx_v8_0_suspend,
4832 .resume = gfx_v8_0_resume,
4833 .is_idle = gfx_v8_0_is_idle,
4834 .wait_for_idle = gfx_v8_0_wait_for_idle,
4835 .soft_reset = gfx_v8_0_soft_reset,
4836 .print_status = gfx_v8_0_print_status,
4837 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
4838 .set_powergating_state = gfx_v8_0_set_powergating_state,
4839};
4840
4841static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
4842 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
4843 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
4844 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
4845 .parse_cs = NULL,
93323131 4846 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97
AD
4847 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
4848 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
4849 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
4850 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 4851 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
aaa36a97
AD
4852 .test_ring = gfx_v8_0_ring_test_ring,
4853 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 4854 .insert_nop = amdgpu_ring_insert_nop,
aaa36a97
AD
4855};
4856
4857static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
4858 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
4859 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
4860 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
4861 .parse_cs = NULL,
93323131 4862 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97
AD
4863 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
4864 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
4865 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
4866 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 4867 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
aaa36a97
AD
4868 .test_ring = gfx_v8_0_ring_test_ring,
4869 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 4870 .insert_nop = amdgpu_ring_insert_nop,
aaa36a97
AD
4871};
4872
4873static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
4874{
4875 int i;
4876
4877 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4878 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
4879
4880 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4881 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
4882}
4883
4884static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
4885 .set = gfx_v8_0_set_eop_interrupt_state,
4886 .process = gfx_v8_0_eop_irq,
4887};
4888
4889static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
4890 .set = gfx_v8_0_set_priv_reg_fault_state,
4891 .process = gfx_v8_0_priv_reg_irq,
4892};
4893
4894static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
4895 .set = gfx_v8_0_set_priv_inst_fault_state,
4896 .process = gfx_v8_0_priv_inst_irq,
4897};
4898
4899static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
4900{
4901 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
4902 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
4903
4904 adev->gfx.priv_reg_irq.num_types = 1;
4905 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
4906
4907 adev->gfx.priv_inst_irq.num_types = 1;
4908 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4909}
4910
4911static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
4912{
4913 /* init asci gds info */
4914 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
4915 adev->gds.gws.total_size = 64;
4916 adev->gds.oa.total_size = 16;
4917
4918 if (adev->gds.mem.total_size == 64 * 1024) {
4919 adev->gds.mem.gfx_partition_size = 4096;
4920 adev->gds.mem.cs_partition_size = 4096;
4921
4922 adev->gds.gws.gfx_partition_size = 4;
4923 adev->gds.gws.cs_partition_size = 4;
4924
4925 adev->gds.oa.gfx_partition_size = 4;
4926 adev->gds.oa.cs_partition_size = 1;
4927 } else {
4928 adev->gds.mem.gfx_partition_size = 1024;
4929 adev->gds.mem.cs_partition_size = 1024;
4930
4931 adev->gds.gws.gfx_partition_size = 16;
4932 adev->gds.gws.cs_partition_size = 16;
4933
4934 adev->gds.oa.gfx_partition_size = 4;
4935 adev->gds.oa.cs_partition_size = 4;
4936 }
4937}
4938
4939static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
4940 u32 se, u32 sh)
4941{
4942 u32 mask = 0, tmp, tmp1;
4943 int i;
4944
4945 gfx_v8_0_select_se_sh(adev, se, sh);
4946 tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
4947 tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
4948 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4949
4950 tmp &= 0xffff0000;
4951
4952 tmp |= tmp1;
4953 tmp >>= 16;
4954
4955 for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
4956 mask <<= 1;
4957 mask |= 1;
4958 }
4959
4960 return (~tmp) & mask;
4961}
4962
4963int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
4964 struct amdgpu_cu_info *cu_info)
4965{
4966 int i, j, k, counter, active_cu_number = 0;
4967 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
4968
4969 if (!adev || !cu_info)
4970 return -EINVAL;
4971
4972 mutex_lock(&adev->grbm_idx_mutex);
4973 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4974 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4975 mask = 1;
4976 ao_bitmap = 0;
4977 counter = 0;
4978 bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
4979 cu_info->bitmap[i][j] = bitmap;
4980
4981 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
4982 if (bitmap & mask) {
4983 if (counter < 2)
4984 ao_bitmap |= mask;
4985 counter ++;
4986 }
4987 mask <<= 1;
4988 }
4989 active_cu_number += counter;
4990 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
4991 }
4992 }
4993
4994 cu_info->number = active_cu_number;
4995 cu_info->ao_cu_mask = ao_cu_mask;
4996 mutex_unlock(&adev->grbm_idx_mutex);
4997 return 0;
4998}
This page took 0.42123 seconds and 5 git commands to generate.