drm/amdgpu: update golden setting of carrizo
[deliverable/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
81
82 /* BPM Register Address*/
83 enum {
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
89 BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength 14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160
161 static const u32 golden_settings_tonga_a11[] =
162 {
163 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179
180 static const u32 tonga_golden_common_all[] =
181 {
182 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
274 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
275 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
276 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
277 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
278 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
279 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
280 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
281 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
282 mmSQ_CONFIG, 0x07f80000, 0x07180000,
283 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
284 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
285 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
286 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
287 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
288 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
289 };
290
291 static const u32 polaris11_golden_common_all[] =
292 {
293 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
294 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
295 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
296 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
297 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
298 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
299 };
300
301 static const u32 golden_settings_polaris10_a11[] =
302 {
303 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
304 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
305 mmCB_HW_CONTROL_2, 0, 0x0f000000,
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 mmSQ_CONFIG, 0x07f80000, 0x07180000,
315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
320 };
321
322 static const u32 polaris10_golden_common_all[] =
323 {
324 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
325 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
326 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
327 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
328 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
329 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
330 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
331 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
332 };
333
334 static const u32 fiji_golden_common_all[] =
335 {
336 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
337 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
338 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
339 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
340 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
341 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
342 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
343 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
345 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
346 };
347
348 static const u32 golden_settings_fiji_a10[] =
349 {
350 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
351 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
352 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
353 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
354 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
355 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
356 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
357 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
358 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
359 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
360 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
361 };
362
363 static const u32 fiji_mgcg_cgcg_init[] =
364 {
365 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
366 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
367 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
368 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
370 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
372 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
373 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
374 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
375 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
376 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
383 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
384 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
385 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
386 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
387 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
390 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
391 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
392 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
395 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
396 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
397 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
398 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
399 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
400 };
401
402 static const u32 golden_settings_iceland_a11[] =
403 {
404 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
405 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
406 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
407 mmGB_GPU_ID, 0x0000000f, 0x00000000,
408 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
409 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
410 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
411 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
412 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
413 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
414 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
415 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
416 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
417 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
418 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
419 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
420 };
421
422 static const u32 iceland_golden_common_all[] =
423 {
424 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
425 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
426 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
427 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
428 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
429 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
430 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
431 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
432 };
433
434 static const u32 iceland_mgcg_cgcg_init[] =
435 {
436 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
437 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
438 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
439 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
440 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
441 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
442 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
443 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
444 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
445 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
446 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
447 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
454 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
455 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
456 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
457 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
458 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
459 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
461 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
462 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
463 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
465 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
466 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
467 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
468 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
469 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
470 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
471 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
472 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
473 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
474 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
475 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
476 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
477 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
480 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
490 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
498 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
499 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
500 };
501
502 static const u32 cz_golden_settings_a11[] =
503 {
504 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
505 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
506 mmGB_GPU_ID, 0x0000000f, 0x00000000,
507 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
508 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
509 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
510 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
511 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
512 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
513 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
514 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
515 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
516 };
517
518 static const u32 cz_golden_common_all[] =
519 {
520 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
521 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
522 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
523 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
524 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
525 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
526 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
527 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
528 };
529
530 static const u32 cz_mgcg_cgcg_init[] =
531 {
532 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
533 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
534 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
535 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
536 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
537 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
538 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
539 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
540 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
541 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
542 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
543 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
544 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
550 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
551 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
552 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
553 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
554 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
557 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
558 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
559 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
560 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
561 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
562 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
563 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
564 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
565 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
566 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
567 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
568 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
569 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
570 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
571 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
572 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
573 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
576 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
586 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
604 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
605 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
606 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
607 };
608
609 static const u32 stoney_golden_settings_a11[] =
610 {
611 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
612 mmGB_GPU_ID, 0x0000000f, 0x00000000,
613 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
614 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
615 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
616 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
617 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
618 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
619 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
620 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
621 };
622
623 static const u32 stoney_golden_common_all[] =
624 {
625 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
626 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
627 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
628 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
629 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
630 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
631 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
632 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
633 };
634
635 static const u32 stoney_mgcg_cgcg_init[] =
636 {
637 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
638 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
639 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
640 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
641 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
642 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
643 };
644
645 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
646 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
647 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
648 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
649 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
650 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
651
652 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
653 {
654 switch (adev->asic_type) {
655 case CHIP_TOPAZ:
656 amdgpu_program_register_sequence(adev,
657 iceland_mgcg_cgcg_init,
658 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
659 amdgpu_program_register_sequence(adev,
660 golden_settings_iceland_a11,
661 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
662 amdgpu_program_register_sequence(adev,
663 iceland_golden_common_all,
664 (const u32)ARRAY_SIZE(iceland_golden_common_all));
665 break;
666 case CHIP_FIJI:
667 amdgpu_program_register_sequence(adev,
668 fiji_mgcg_cgcg_init,
669 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
670 amdgpu_program_register_sequence(adev,
671 golden_settings_fiji_a10,
672 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
673 amdgpu_program_register_sequence(adev,
674 fiji_golden_common_all,
675 (const u32)ARRAY_SIZE(fiji_golden_common_all));
676 break;
677
678 case CHIP_TONGA:
679 amdgpu_program_register_sequence(adev,
680 tonga_mgcg_cgcg_init,
681 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
682 amdgpu_program_register_sequence(adev,
683 golden_settings_tonga_a11,
684 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
685 amdgpu_program_register_sequence(adev,
686 tonga_golden_common_all,
687 (const u32)ARRAY_SIZE(tonga_golden_common_all));
688 break;
689 case CHIP_POLARIS11:
690 amdgpu_program_register_sequence(adev,
691 golden_settings_polaris11_a11,
692 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
693 amdgpu_program_register_sequence(adev,
694 polaris11_golden_common_all,
695 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
696 break;
697 case CHIP_POLARIS10:
698 amdgpu_program_register_sequence(adev,
699 golden_settings_polaris10_a11,
700 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
701 amdgpu_program_register_sequence(adev,
702 polaris10_golden_common_all,
703 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
704 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
705 if (adev->pdev->revision == 0xc7) {
706 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
707 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
708 }
709 break;
710 case CHIP_CARRIZO:
711 amdgpu_program_register_sequence(adev,
712 cz_mgcg_cgcg_init,
713 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
714 amdgpu_program_register_sequence(adev,
715 cz_golden_settings_a11,
716 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
717 amdgpu_program_register_sequence(adev,
718 cz_golden_common_all,
719 (const u32)ARRAY_SIZE(cz_golden_common_all));
720 break;
721 case CHIP_STONEY:
722 amdgpu_program_register_sequence(adev,
723 stoney_mgcg_cgcg_init,
724 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
725 amdgpu_program_register_sequence(adev,
726 stoney_golden_settings_a11,
727 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
728 amdgpu_program_register_sequence(adev,
729 stoney_golden_common_all,
730 (const u32)ARRAY_SIZE(stoney_golden_common_all));
731 break;
732 default:
733 break;
734 }
735 }
736
737 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
738 {
739 int i;
740
741 adev->gfx.scratch.num_reg = 7;
742 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
743 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
744 adev->gfx.scratch.free[i] = true;
745 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
746 }
747 }
748
749 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
750 {
751 struct amdgpu_device *adev = ring->adev;
752 uint32_t scratch;
753 uint32_t tmp = 0;
754 unsigned i;
755 int r;
756
757 r = amdgpu_gfx_scratch_get(adev, &scratch);
758 if (r) {
759 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
760 return r;
761 }
762 WREG32(scratch, 0xCAFEDEAD);
763 r = amdgpu_ring_alloc(ring, 3);
764 if (r) {
765 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
766 ring->idx, r);
767 amdgpu_gfx_scratch_free(adev, scratch);
768 return r;
769 }
770 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
771 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
772 amdgpu_ring_write(ring, 0xDEADBEEF);
773 amdgpu_ring_commit(ring);
774
775 for (i = 0; i < adev->usec_timeout; i++) {
776 tmp = RREG32(scratch);
777 if (tmp == 0xDEADBEEF)
778 break;
779 DRM_UDELAY(1);
780 }
781 if (i < adev->usec_timeout) {
782 DRM_INFO("ring test on %d succeeded in %d usecs\n",
783 ring->idx, i);
784 } else {
785 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
786 ring->idx, scratch, tmp);
787 r = -EINVAL;
788 }
789 amdgpu_gfx_scratch_free(adev, scratch);
790 return r;
791 }
792
793 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
794 {
795 struct amdgpu_device *adev = ring->adev;
796 struct amdgpu_ib ib;
797 struct fence *f = NULL;
798 uint32_t scratch;
799 uint32_t tmp = 0;
800 long r;
801
802 r = amdgpu_gfx_scratch_get(adev, &scratch);
803 if (r) {
804 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
805 return r;
806 }
807 WREG32(scratch, 0xCAFEDEAD);
808 memset(&ib, 0, sizeof(ib));
809 r = amdgpu_ib_get(adev, NULL, 256, &ib);
810 if (r) {
811 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
812 goto err1;
813 }
814 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
815 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
816 ib.ptr[2] = 0xDEADBEEF;
817 ib.length_dw = 3;
818
819 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
820 if (r)
821 goto err2;
822
823 r = fence_wait_timeout(f, false, timeout);
824 if (r == 0) {
825 DRM_ERROR("amdgpu: IB test timed out.\n");
826 r = -ETIMEDOUT;
827 goto err2;
828 } else if (r < 0) {
829 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
830 goto err2;
831 }
832 tmp = RREG32(scratch);
833 if (tmp == 0xDEADBEEF) {
834 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
835 r = 0;
836 } else {
837 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
838 scratch, tmp);
839 r = -EINVAL;
840 }
841 err2:
842 amdgpu_ib_free(adev, &ib, NULL);
843 fence_put(f);
844 err1:
845 amdgpu_gfx_scratch_free(adev, scratch);
846 return r;
847 }
848
849
850 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
851 release_firmware(adev->gfx.pfp_fw);
852 adev->gfx.pfp_fw = NULL;
853 release_firmware(adev->gfx.me_fw);
854 adev->gfx.me_fw = NULL;
855 release_firmware(adev->gfx.ce_fw);
856 adev->gfx.ce_fw = NULL;
857 release_firmware(adev->gfx.rlc_fw);
858 adev->gfx.rlc_fw = NULL;
859 release_firmware(adev->gfx.mec_fw);
860 adev->gfx.mec_fw = NULL;
861 if ((adev->asic_type != CHIP_STONEY) &&
862 (adev->asic_type != CHIP_TOPAZ))
863 release_firmware(adev->gfx.mec2_fw);
864 adev->gfx.mec2_fw = NULL;
865
866 kfree(adev->gfx.rlc.register_list_format);
867 }
868
869 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
870 {
871 const char *chip_name;
872 char fw_name[30];
873 int err;
874 struct amdgpu_firmware_info *info = NULL;
875 const struct common_firmware_header *header = NULL;
876 const struct gfx_firmware_header_v1_0 *cp_hdr;
877 const struct rlc_firmware_header_v2_0 *rlc_hdr;
878 unsigned int *tmp = NULL, i;
879
880 DRM_DEBUG("\n");
881
882 switch (adev->asic_type) {
883 case CHIP_TOPAZ:
884 chip_name = "topaz";
885 break;
886 case CHIP_TONGA:
887 chip_name = "tonga";
888 break;
889 case CHIP_CARRIZO:
890 chip_name = "carrizo";
891 break;
892 case CHIP_FIJI:
893 chip_name = "fiji";
894 break;
895 case CHIP_POLARIS11:
896 chip_name = "polaris11";
897 break;
898 case CHIP_POLARIS10:
899 chip_name = "polaris10";
900 break;
901 case CHIP_STONEY:
902 chip_name = "stoney";
903 break;
904 default:
905 BUG();
906 }
907
908 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
909 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
910 if (err)
911 goto out;
912 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
913 if (err)
914 goto out;
915 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
916 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
917 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
918
919 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
920 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
921 if (err)
922 goto out;
923 err = amdgpu_ucode_validate(adev->gfx.me_fw);
924 if (err)
925 goto out;
926 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
927 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
928 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
929
930 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
931 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
932 if (err)
933 goto out;
934 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
935 if (err)
936 goto out;
937 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
938 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
939 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
940
941 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
942 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
943 if (err)
944 goto out;
945 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
946 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
947 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
948 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
949
950 adev->gfx.rlc.save_and_restore_offset =
951 le32_to_cpu(rlc_hdr->save_and_restore_offset);
952 adev->gfx.rlc.clear_state_descriptor_offset =
953 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
954 adev->gfx.rlc.avail_scratch_ram_locations =
955 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
956 adev->gfx.rlc.reg_restore_list_size =
957 le32_to_cpu(rlc_hdr->reg_restore_list_size);
958 adev->gfx.rlc.reg_list_format_start =
959 le32_to_cpu(rlc_hdr->reg_list_format_start);
960 adev->gfx.rlc.reg_list_format_separate_start =
961 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
962 adev->gfx.rlc.starting_offsets_start =
963 le32_to_cpu(rlc_hdr->starting_offsets_start);
964 adev->gfx.rlc.reg_list_format_size_bytes =
965 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
966 adev->gfx.rlc.reg_list_size_bytes =
967 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
968
969 adev->gfx.rlc.register_list_format =
970 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
971 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
972
973 if (!adev->gfx.rlc.register_list_format) {
974 err = -ENOMEM;
975 goto out;
976 }
977
978 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
979 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
980 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
981 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
982
983 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
984
985 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
986 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
987 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
988 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
989
990 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
991 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
992 if (err)
993 goto out;
994 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
995 if (err)
996 goto out;
997 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
998 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
999 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1000
1001 if ((adev->asic_type != CHIP_STONEY) &&
1002 (adev->asic_type != CHIP_TOPAZ)) {
1003 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1004 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1005 if (!err) {
1006 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1007 if (err)
1008 goto out;
1009 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1010 adev->gfx.mec2_fw->data;
1011 adev->gfx.mec2_fw_version =
1012 le32_to_cpu(cp_hdr->header.ucode_version);
1013 adev->gfx.mec2_feature_version =
1014 le32_to_cpu(cp_hdr->ucode_feature_version);
1015 } else {
1016 err = 0;
1017 adev->gfx.mec2_fw = NULL;
1018 }
1019 }
1020
1021 if (adev->firmware.smu_load) {
1022 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1023 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1024 info->fw = adev->gfx.pfp_fw;
1025 header = (const struct common_firmware_header *)info->fw->data;
1026 adev->firmware.fw_size +=
1027 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1028
1029 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1030 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1031 info->fw = adev->gfx.me_fw;
1032 header = (const struct common_firmware_header *)info->fw->data;
1033 adev->firmware.fw_size +=
1034 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1035
1036 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1037 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1038 info->fw = adev->gfx.ce_fw;
1039 header = (const struct common_firmware_header *)info->fw->data;
1040 adev->firmware.fw_size +=
1041 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1042
1043 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1044 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1045 info->fw = adev->gfx.rlc_fw;
1046 header = (const struct common_firmware_header *)info->fw->data;
1047 adev->firmware.fw_size +=
1048 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1049
1050 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1051 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1052 info->fw = adev->gfx.mec_fw;
1053 header = (const struct common_firmware_header *)info->fw->data;
1054 adev->firmware.fw_size +=
1055 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1056
1057 if (adev->gfx.mec2_fw) {
1058 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1059 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1060 info->fw = adev->gfx.mec2_fw;
1061 header = (const struct common_firmware_header *)info->fw->data;
1062 adev->firmware.fw_size +=
1063 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1064 }
1065
1066 }
1067
1068 out:
1069 if (err) {
1070 dev_err(adev->dev,
1071 "gfx8: Failed to load firmware \"%s\"\n",
1072 fw_name);
1073 release_firmware(adev->gfx.pfp_fw);
1074 adev->gfx.pfp_fw = NULL;
1075 release_firmware(adev->gfx.me_fw);
1076 adev->gfx.me_fw = NULL;
1077 release_firmware(adev->gfx.ce_fw);
1078 adev->gfx.ce_fw = NULL;
1079 release_firmware(adev->gfx.rlc_fw);
1080 adev->gfx.rlc_fw = NULL;
1081 release_firmware(adev->gfx.mec_fw);
1082 adev->gfx.mec_fw = NULL;
1083 release_firmware(adev->gfx.mec2_fw);
1084 adev->gfx.mec2_fw = NULL;
1085 }
1086 return err;
1087 }
1088
1089 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1090 volatile u32 *buffer)
1091 {
1092 u32 count = 0, i;
1093 const struct cs_section_def *sect = NULL;
1094 const struct cs_extent_def *ext = NULL;
1095
1096 if (adev->gfx.rlc.cs_data == NULL)
1097 return;
1098 if (buffer == NULL)
1099 return;
1100
1101 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1102 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1103
1104 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1105 buffer[count++] = cpu_to_le32(0x80000000);
1106 buffer[count++] = cpu_to_le32(0x80000000);
1107
1108 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1109 for (ext = sect->section; ext->extent != NULL; ++ext) {
1110 if (sect->id == SECT_CONTEXT) {
1111 buffer[count++] =
1112 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1113 buffer[count++] = cpu_to_le32(ext->reg_index -
1114 PACKET3_SET_CONTEXT_REG_START);
1115 for (i = 0; i < ext->reg_count; i++)
1116 buffer[count++] = cpu_to_le32(ext->extent[i]);
1117 } else {
1118 return;
1119 }
1120 }
1121 }
1122
1123 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1124 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1125 PACKET3_SET_CONTEXT_REG_START);
1126 switch (adev->asic_type) {
1127 case CHIP_TONGA:
1128 case CHIP_POLARIS10:
1129 buffer[count++] = cpu_to_le32(0x16000012);
1130 buffer[count++] = cpu_to_le32(0x0000002A);
1131 break;
1132 case CHIP_POLARIS11:
1133 buffer[count++] = cpu_to_le32(0x16000012);
1134 buffer[count++] = cpu_to_le32(0x00000000);
1135 break;
1136 case CHIP_FIJI:
1137 buffer[count++] = cpu_to_le32(0x3a00161a);
1138 buffer[count++] = cpu_to_le32(0x0000002e);
1139 break;
1140 case CHIP_TOPAZ:
1141 case CHIP_CARRIZO:
1142 buffer[count++] = cpu_to_le32(0x00000002);
1143 buffer[count++] = cpu_to_le32(0x00000000);
1144 break;
1145 case CHIP_STONEY:
1146 buffer[count++] = cpu_to_le32(0x00000000);
1147 buffer[count++] = cpu_to_le32(0x00000000);
1148 break;
1149 default:
1150 buffer[count++] = cpu_to_le32(0x00000000);
1151 buffer[count++] = cpu_to_le32(0x00000000);
1152 break;
1153 }
1154
1155 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1156 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1157
1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1159 buffer[count++] = cpu_to_le32(0);
1160 }
1161
1162 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1163 {
1164 const __le32 *fw_data;
1165 volatile u32 *dst_ptr;
1166 int me, i, max_me = 4;
1167 u32 bo_offset = 0;
1168 u32 table_offset, table_size;
1169
1170 if (adev->asic_type == CHIP_CARRIZO)
1171 max_me = 5;
1172
1173 /* write the cp table buffer */
1174 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1175 for (me = 0; me < max_me; me++) {
1176 if (me == 0) {
1177 const struct gfx_firmware_header_v1_0 *hdr =
1178 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1179 fw_data = (const __le32 *)
1180 (adev->gfx.ce_fw->data +
1181 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1182 table_offset = le32_to_cpu(hdr->jt_offset);
1183 table_size = le32_to_cpu(hdr->jt_size);
1184 } else if (me == 1) {
1185 const struct gfx_firmware_header_v1_0 *hdr =
1186 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1187 fw_data = (const __le32 *)
1188 (adev->gfx.pfp_fw->data +
1189 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1190 table_offset = le32_to_cpu(hdr->jt_offset);
1191 table_size = le32_to_cpu(hdr->jt_size);
1192 } else if (me == 2) {
1193 const struct gfx_firmware_header_v1_0 *hdr =
1194 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1195 fw_data = (const __le32 *)
1196 (adev->gfx.me_fw->data +
1197 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1198 table_offset = le32_to_cpu(hdr->jt_offset);
1199 table_size = le32_to_cpu(hdr->jt_size);
1200 } else if (me == 3) {
1201 const struct gfx_firmware_header_v1_0 *hdr =
1202 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1203 fw_data = (const __le32 *)
1204 (adev->gfx.mec_fw->data +
1205 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1206 table_offset = le32_to_cpu(hdr->jt_offset);
1207 table_size = le32_to_cpu(hdr->jt_size);
1208 } else if (me == 4) {
1209 const struct gfx_firmware_header_v1_0 *hdr =
1210 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1211 fw_data = (const __le32 *)
1212 (adev->gfx.mec2_fw->data +
1213 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1214 table_offset = le32_to_cpu(hdr->jt_offset);
1215 table_size = le32_to_cpu(hdr->jt_size);
1216 }
1217
1218 for (i = 0; i < table_size; i ++) {
1219 dst_ptr[bo_offset + i] =
1220 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1221 }
1222
1223 bo_offset += table_size;
1224 }
1225 }
1226
1227 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1228 {
1229 int r;
1230
1231 /* clear state block */
1232 if (adev->gfx.rlc.clear_state_obj) {
1233 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1234 if (unlikely(r != 0))
1235 dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1236 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1237 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1238
1239 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1240 adev->gfx.rlc.clear_state_obj = NULL;
1241 }
1242
1243 /* jump table block */
1244 if (adev->gfx.rlc.cp_table_obj) {
1245 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1246 if (unlikely(r != 0))
1247 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1248 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1249 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1250
1251 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1252 adev->gfx.rlc.cp_table_obj = NULL;
1253 }
1254 }
1255
1256 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1257 {
1258 volatile u32 *dst_ptr;
1259 u32 dws;
1260 const struct cs_section_def *cs_data;
1261 int r;
1262
1263 adev->gfx.rlc.cs_data = vi_cs_data;
1264
1265 cs_data = adev->gfx.rlc.cs_data;
1266
1267 if (cs_data) {
1268 /* clear state block */
1269 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1270
1271 if (adev->gfx.rlc.clear_state_obj == NULL) {
1272 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1273 AMDGPU_GEM_DOMAIN_VRAM,
1274 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1275 NULL, NULL,
1276 &adev->gfx.rlc.clear_state_obj);
1277 if (r) {
1278 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1279 gfx_v8_0_rlc_fini(adev);
1280 return r;
1281 }
1282 }
1283 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1284 if (unlikely(r != 0)) {
1285 gfx_v8_0_rlc_fini(adev);
1286 return r;
1287 }
1288 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1289 &adev->gfx.rlc.clear_state_gpu_addr);
1290 if (r) {
1291 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1292 dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1293 gfx_v8_0_rlc_fini(adev);
1294 return r;
1295 }
1296
1297 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1298 if (r) {
1299 dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1300 gfx_v8_0_rlc_fini(adev);
1301 return r;
1302 }
1303 /* set up the cs buffer */
1304 dst_ptr = adev->gfx.rlc.cs_ptr;
1305 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1306 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1307 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1308 }
1309
1310 if ((adev->asic_type == CHIP_CARRIZO) ||
1311 (adev->asic_type == CHIP_STONEY)) {
1312 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1313 if (adev->gfx.rlc.cp_table_obj == NULL) {
1314 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1315 AMDGPU_GEM_DOMAIN_VRAM,
1316 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1317 NULL, NULL,
1318 &adev->gfx.rlc.cp_table_obj);
1319 if (r) {
1320 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1321 return r;
1322 }
1323 }
1324
1325 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1326 if (unlikely(r != 0)) {
1327 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1328 return r;
1329 }
1330 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1331 &adev->gfx.rlc.cp_table_gpu_addr);
1332 if (r) {
1333 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1334 dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
1335 return r;
1336 }
1337 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1338 if (r) {
1339 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1340 return r;
1341 }
1342
1343 cz_init_cp_jump_table(adev);
1344
1345 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1346 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1347
1348 }
1349
1350 return 0;
1351 }
1352
1353 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1354 {
1355 int r;
1356
1357 if (adev->gfx.mec.hpd_eop_obj) {
1358 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1359 if (unlikely(r != 0))
1360 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1361 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1362 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1363
1364 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1365 adev->gfx.mec.hpd_eop_obj = NULL;
1366 }
1367 }
1368
1369 #define MEC_HPD_SIZE 2048
1370
1371 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1372 {
1373 int r;
1374 u32 *hpd;
1375
1376 /*
1377 * we assign only 1 pipe because all other pipes will
1378 * be handled by KFD
1379 */
1380 adev->gfx.mec.num_mec = 1;
1381 adev->gfx.mec.num_pipe = 1;
1382 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1383
1384 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1385 r = amdgpu_bo_create(adev,
1386 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1387 PAGE_SIZE, true,
1388 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1389 &adev->gfx.mec.hpd_eop_obj);
1390 if (r) {
1391 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1392 return r;
1393 }
1394 }
1395
1396 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1397 if (unlikely(r != 0)) {
1398 gfx_v8_0_mec_fini(adev);
1399 return r;
1400 }
1401 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1402 &adev->gfx.mec.hpd_eop_gpu_addr);
1403 if (r) {
1404 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1405 gfx_v8_0_mec_fini(adev);
1406 return r;
1407 }
1408 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1409 if (r) {
1410 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1411 gfx_v8_0_mec_fini(adev);
1412 return r;
1413 }
1414
1415 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1416
1417 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1418 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1419
1420 return 0;
1421 }
1422
1423 static const u32 vgpr_init_compute_shader[] =
1424 {
1425 0x7e000209, 0x7e020208,
1426 0x7e040207, 0x7e060206,
1427 0x7e080205, 0x7e0a0204,
1428 0x7e0c0203, 0x7e0e0202,
1429 0x7e100201, 0x7e120200,
1430 0x7e140209, 0x7e160208,
1431 0x7e180207, 0x7e1a0206,
1432 0x7e1c0205, 0x7e1e0204,
1433 0x7e200203, 0x7e220202,
1434 0x7e240201, 0x7e260200,
1435 0x7e280209, 0x7e2a0208,
1436 0x7e2c0207, 0x7e2e0206,
1437 0x7e300205, 0x7e320204,
1438 0x7e340203, 0x7e360202,
1439 0x7e380201, 0x7e3a0200,
1440 0x7e3c0209, 0x7e3e0208,
1441 0x7e400207, 0x7e420206,
1442 0x7e440205, 0x7e460204,
1443 0x7e480203, 0x7e4a0202,
1444 0x7e4c0201, 0x7e4e0200,
1445 0x7e500209, 0x7e520208,
1446 0x7e540207, 0x7e560206,
1447 0x7e580205, 0x7e5a0204,
1448 0x7e5c0203, 0x7e5e0202,
1449 0x7e600201, 0x7e620200,
1450 0x7e640209, 0x7e660208,
1451 0x7e680207, 0x7e6a0206,
1452 0x7e6c0205, 0x7e6e0204,
1453 0x7e700203, 0x7e720202,
1454 0x7e740201, 0x7e760200,
1455 0x7e780209, 0x7e7a0208,
1456 0x7e7c0207, 0x7e7e0206,
1457 0xbf8a0000, 0xbf810000,
1458 };
1459
1460 static const u32 sgpr_init_compute_shader[] =
1461 {
1462 0xbe8a0100, 0xbe8c0102,
1463 0xbe8e0104, 0xbe900106,
1464 0xbe920108, 0xbe940100,
1465 0xbe960102, 0xbe980104,
1466 0xbe9a0106, 0xbe9c0108,
1467 0xbe9e0100, 0xbea00102,
1468 0xbea20104, 0xbea40106,
1469 0xbea60108, 0xbea80100,
1470 0xbeaa0102, 0xbeac0104,
1471 0xbeae0106, 0xbeb00108,
1472 0xbeb20100, 0xbeb40102,
1473 0xbeb60104, 0xbeb80106,
1474 0xbeba0108, 0xbebc0100,
1475 0xbebe0102, 0xbec00104,
1476 0xbec20106, 0xbec40108,
1477 0xbec60100, 0xbec80102,
1478 0xbee60004, 0xbee70005,
1479 0xbeea0006, 0xbeeb0007,
1480 0xbee80008, 0xbee90009,
1481 0xbefc0000, 0xbf8a0000,
1482 0xbf810000, 0x00000000,
1483 };
1484
1485 static const u32 vgpr_init_regs[] =
1486 {
1487 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1488 mmCOMPUTE_RESOURCE_LIMITS, 0,
1489 mmCOMPUTE_NUM_THREAD_X, 256*4,
1490 mmCOMPUTE_NUM_THREAD_Y, 1,
1491 mmCOMPUTE_NUM_THREAD_Z, 1,
1492 mmCOMPUTE_PGM_RSRC2, 20,
1493 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1494 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1495 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1496 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1497 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1498 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1499 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1500 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1501 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1502 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1503 };
1504
1505 static const u32 sgpr1_init_regs[] =
1506 {
1507 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1508 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1509 mmCOMPUTE_NUM_THREAD_X, 256*5,
1510 mmCOMPUTE_NUM_THREAD_Y, 1,
1511 mmCOMPUTE_NUM_THREAD_Z, 1,
1512 mmCOMPUTE_PGM_RSRC2, 20,
1513 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1514 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1515 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1516 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1517 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1518 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1519 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1520 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1521 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1522 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1523 };
1524
1525 static const u32 sgpr2_init_regs[] =
1526 {
1527 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1528 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1529 mmCOMPUTE_NUM_THREAD_X, 256*5,
1530 mmCOMPUTE_NUM_THREAD_Y, 1,
1531 mmCOMPUTE_NUM_THREAD_Z, 1,
1532 mmCOMPUTE_PGM_RSRC2, 20,
1533 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1534 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1535 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1536 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1537 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1538 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1539 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1540 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1541 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1542 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1543 };
1544
1545 static const u32 sec_ded_counter_registers[] =
1546 {
1547 mmCPC_EDC_ATC_CNT,
1548 mmCPC_EDC_SCRATCH_CNT,
1549 mmCPC_EDC_UCODE_CNT,
1550 mmCPF_EDC_ATC_CNT,
1551 mmCPF_EDC_ROQ_CNT,
1552 mmCPF_EDC_TAG_CNT,
1553 mmCPG_EDC_ATC_CNT,
1554 mmCPG_EDC_DMA_CNT,
1555 mmCPG_EDC_TAG_CNT,
1556 mmDC_EDC_CSINVOC_CNT,
1557 mmDC_EDC_RESTORE_CNT,
1558 mmDC_EDC_STATE_CNT,
1559 mmGDS_EDC_CNT,
1560 mmGDS_EDC_GRBM_CNT,
1561 mmGDS_EDC_OA_DED,
1562 mmSPI_EDC_CNT,
1563 mmSQC_ATC_EDC_GATCL1_CNT,
1564 mmSQC_EDC_CNT,
1565 mmSQ_EDC_DED_CNT,
1566 mmSQ_EDC_INFO,
1567 mmSQ_EDC_SEC_CNT,
1568 mmTCC_EDC_CNT,
1569 mmTCP_ATC_EDC_GATCL1_CNT,
1570 mmTCP_EDC_CNT,
1571 mmTD_EDC_CNT
1572 };
1573
1574 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1575 {
1576 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1577 struct amdgpu_ib ib;
1578 struct fence *f = NULL;
1579 int r, i;
1580 u32 tmp;
1581 unsigned total_size, vgpr_offset, sgpr_offset;
1582 u64 gpu_addr;
1583
1584 /* only supported on CZ */
1585 if (adev->asic_type != CHIP_CARRIZO)
1586 return 0;
1587
1588 /* bail if the compute ring is not ready */
1589 if (!ring->ready)
1590 return 0;
1591
1592 tmp = RREG32(mmGB_EDC_MODE);
1593 WREG32(mmGB_EDC_MODE, 0);
1594
1595 total_size =
1596 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1597 total_size +=
1598 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1599 total_size +=
1600 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1601 total_size = ALIGN(total_size, 256);
1602 vgpr_offset = total_size;
1603 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1604 sgpr_offset = total_size;
1605 total_size += sizeof(sgpr_init_compute_shader);
1606
1607 /* allocate an indirect buffer to put the commands in */
1608 memset(&ib, 0, sizeof(ib));
1609 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1610 if (r) {
1611 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1612 return r;
1613 }
1614
1615 /* load the compute shaders */
1616 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1617 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1618
1619 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1620 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1621
1622 /* init the ib length to 0 */
1623 ib.length_dw = 0;
1624
1625 /* VGPR */
1626 /* write the register state for the compute dispatch */
1627 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1629 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1630 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1631 }
1632 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1633 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1634 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1635 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1636 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1637 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1638
1639 /* write dispatch packet */
1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1641 ib.ptr[ib.length_dw++] = 8; /* x */
1642 ib.ptr[ib.length_dw++] = 1; /* y */
1643 ib.ptr[ib.length_dw++] = 1; /* z */
1644 ib.ptr[ib.length_dw++] =
1645 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1646
1647 /* write CS partial flush packet */
1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1649 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1650
1651 /* SGPR1 */
1652 /* write the register state for the compute dispatch */
1653 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1655 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1656 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1657 }
1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1659 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1664
1665 /* write dispatch packet */
1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1667 ib.ptr[ib.length_dw++] = 8; /* x */
1668 ib.ptr[ib.length_dw++] = 1; /* y */
1669 ib.ptr[ib.length_dw++] = 1; /* z */
1670 ib.ptr[ib.length_dw++] =
1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1672
1673 /* write CS partial flush packet */
1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1676
1677 /* SGPR2 */
1678 /* write the register state for the compute dispatch */
1679 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1681 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1682 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1683 }
1684 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1685 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1687 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1688 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1689 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1690
1691 /* write dispatch packet */
1692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1693 ib.ptr[ib.length_dw++] = 8; /* x */
1694 ib.ptr[ib.length_dw++] = 1; /* y */
1695 ib.ptr[ib.length_dw++] = 1; /* z */
1696 ib.ptr[ib.length_dw++] =
1697 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1698
1699 /* write CS partial flush packet */
1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1701 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1702
1703 /* shedule the ib on the ring */
1704 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1705 if (r) {
1706 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1707 goto fail;
1708 }
1709
1710 /* wait for the GPU to finish processing the IB */
1711 r = fence_wait(f, false);
1712 if (r) {
1713 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1714 goto fail;
1715 }
1716
1717 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1718 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1719 WREG32(mmGB_EDC_MODE, tmp);
1720
1721 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1722 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1723 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1724
1725
1726 /* read back registers to clear the counters */
1727 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1728 RREG32(sec_ded_counter_registers[i]);
1729
1730 fail:
1731 amdgpu_ib_free(adev, &ib, NULL);
1732 fence_put(f);
1733
1734 return r;
1735 }
1736
1737 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1738 {
1739 u32 gb_addr_config;
1740 u32 mc_shared_chmap, mc_arb_ramcfg;
1741 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1742 u32 tmp;
1743 int ret;
1744
1745 switch (adev->asic_type) {
1746 case CHIP_TOPAZ:
1747 adev->gfx.config.max_shader_engines = 1;
1748 adev->gfx.config.max_tile_pipes = 2;
1749 adev->gfx.config.max_cu_per_sh = 6;
1750 adev->gfx.config.max_sh_per_se = 1;
1751 adev->gfx.config.max_backends_per_se = 2;
1752 adev->gfx.config.max_texture_channel_caches = 2;
1753 adev->gfx.config.max_gprs = 256;
1754 adev->gfx.config.max_gs_threads = 32;
1755 adev->gfx.config.max_hw_contexts = 8;
1756
1757 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1758 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1759 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1760 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1761 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1762 break;
1763 case CHIP_FIJI:
1764 adev->gfx.config.max_shader_engines = 4;
1765 adev->gfx.config.max_tile_pipes = 16;
1766 adev->gfx.config.max_cu_per_sh = 16;
1767 adev->gfx.config.max_sh_per_se = 1;
1768 adev->gfx.config.max_backends_per_se = 4;
1769 adev->gfx.config.max_texture_channel_caches = 16;
1770 adev->gfx.config.max_gprs = 256;
1771 adev->gfx.config.max_gs_threads = 32;
1772 adev->gfx.config.max_hw_contexts = 8;
1773
1774 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1775 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1776 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1777 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1778 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1779 break;
1780 case CHIP_POLARIS11:
1781 ret = amdgpu_atombios_get_gfx_info(adev);
1782 if (ret)
1783 return ret;
1784 adev->gfx.config.max_gprs = 256;
1785 adev->gfx.config.max_gs_threads = 32;
1786 adev->gfx.config.max_hw_contexts = 8;
1787
1788 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1789 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1790 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1791 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1792 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1793 break;
1794 case CHIP_POLARIS10:
1795 ret = amdgpu_atombios_get_gfx_info(adev);
1796 if (ret)
1797 return ret;
1798 adev->gfx.config.max_gprs = 256;
1799 adev->gfx.config.max_gs_threads = 32;
1800 adev->gfx.config.max_hw_contexts = 8;
1801
1802 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1803 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1804 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1805 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1806 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1807 break;
1808 case CHIP_TONGA:
1809 adev->gfx.config.max_shader_engines = 4;
1810 adev->gfx.config.max_tile_pipes = 8;
1811 adev->gfx.config.max_cu_per_sh = 8;
1812 adev->gfx.config.max_sh_per_se = 1;
1813 adev->gfx.config.max_backends_per_se = 2;
1814 adev->gfx.config.max_texture_channel_caches = 8;
1815 adev->gfx.config.max_gprs = 256;
1816 adev->gfx.config.max_gs_threads = 32;
1817 adev->gfx.config.max_hw_contexts = 8;
1818
1819 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1820 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1821 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1822 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1823 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1824 break;
1825 case CHIP_CARRIZO:
1826 adev->gfx.config.max_shader_engines = 1;
1827 adev->gfx.config.max_tile_pipes = 2;
1828 adev->gfx.config.max_sh_per_se = 1;
1829 adev->gfx.config.max_backends_per_se = 2;
1830
1831 switch (adev->pdev->revision) {
1832 case 0xc4:
1833 case 0x84:
1834 case 0xc8:
1835 case 0xcc:
1836 case 0xe1:
1837 case 0xe3:
1838 /* B10 */
1839 adev->gfx.config.max_cu_per_sh = 8;
1840 break;
1841 case 0xc5:
1842 case 0x81:
1843 case 0x85:
1844 case 0xc9:
1845 case 0xcd:
1846 case 0xe2:
1847 case 0xe4:
1848 /* B8 */
1849 adev->gfx.config.max_cu_per_sh = 6;
1850 break;
1851 case 0xc6:
1852 case 0xca:
1853 case 0xce:
1854 case 0x88:
1855 /* B6 */
1856 adev->gfx.config.max_cu_per_sh = 6;
1857 break;
1858 case 0xc7:
1859 case 0x87:
1860 case 0xcb:
1861 case 0xe5:
1862 case 0x89:
1863 default:
1864 /* B4 */
1865 adev->gfx.config.max_cu_per_sh = 4;
1866 break;
1867 }
1868
1869 adev->gfx.config.max_texture_channel_caches = 2;
1870 adev->gfx.config.max_gprs = 256;
1871 adev->gfx.config.max_gs_threads = 32;
1872 adev->gfx.config.max_hw_contexts = 8;
1873
1874 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1875 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1876 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1877 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1878 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1879 break;
1880 case CHIP_STONEY:
1881 adev->gfx.config.max_shader_engines = 1;
1882 adev->gfx.config.max_tile_pipes = 2;
1883 adev->gfx.config.max_sh_per_se = 1;
1884 adev->gfx.config.max_backends_per_se = 1;
1885
1886 switch (adev->pdev->revision) {
1887 case 0xc0:
1888 case 0xc1:
1889 case 0xc2:
1890 case 0xc4:
1891 case 0xc8:
1892 case 0xc9:
1893 adev->gfx.config.max_cu_per_sh = 3;
1894 break;
1895 case 0xd0:
1896 case 0xd1:
1897 case 0xd2:
1898 default:
1899 adev->gfx.config.max_cu_per_sh = 2;
1900 break;
1901 }
1902
1903 adev->gfx.config.max_texture_channel_caches = 2;
1904 adev->gfx.config.max_gprs = 256;
1905 adev->gfx.config.max_gs_threads = 16;
1906 adev->gfx.config.max_hw_contexts = 8;
1907
1908 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1909 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1910 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1911 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1912 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1913 break;
1914 default:
1915 adev->gfx.config.max_shader_engines = 2;
1916 adev->gfx.config.max_tile_pipes = 4;
1917 adev->gfx.config.max_cu_per_sh = 2;
1918 adev->gfx.config.max_sh_per_se = 1;
1919 adev->gfx.config.max_backends_per_se = 2;
1920 adev->gfx.config.max_texture_channel_caches = 4;
1921 adev->gfx.config.max_gprs = 256;
1922 adev->gfx.config.max_gs_threads = 32;
1923 adev->gfx.config.max_hw_contexts = 8;
1924
1925 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1926 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1927 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1928 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1929 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1930 break;
1931 }
1932
1933 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1934 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1935 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1936
1937 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1938 adev->gfx.config.mem_max_burst_length_bytes = 256;
1939 if (adev->flags & AMD_IS_APU) {
1940 /* Get memory bank mapping mode. */
1941 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1942 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1943 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1944
1945 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1946 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1947 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1948
1949 /* Validate settings in case only one DIMM installed. */
1950 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1951 dimm00_addr_map = 0;
1952 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1953 dimm01_addr_map = 0;
1954 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1955 dimm10_addr_map = 0;
1956 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1957 dimm11_addr_map = 0;
1958
1959 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1960 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1961 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1962 adev->gfx.config.mem_row_size_in_kb = 2;
1963 else
1964 adev->gfx.config.mem_row_size_in_kb = 1;
1965 } else {
1966 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1967 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1968 if (adev->gfx.config.mem_row_size_in_kb > 4)
1969 adev->gfx.config.mem_row_size_in_kb = 4;
1970 }
1971
1972 adev->gfx.config.shader_engine_tile_size = 32;
1973 adev->gfx.config.num_gpus = 1;
1974 adev->gfx.config.multi_gpu_tile_size = 64;
1975
1976 /* fix up row size */
1977 switch (adev->gfx.config.mem_row_size_in_kb) {
1978 case 1:
1979 default:
1980 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1981 break;
1982 case 2:
1983 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1984 break;
1985 case 4:
1986 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1987 break;
1988 }
1989 adev->gfx.config.gb_addr_config = gb_addr_config;
1990
1991 return 0;
1992 }
1993
1994 static int gfx_v8_0_sw_init(void *handle)
1995 {
1996 int i, r;
1997 struct amdgpu_ring *ring;
1998 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1999
2000 /* EOP Event */
2001 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2002 if (r)
2003 return r;
2004
2005 /* Privileged reg */
2006 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2007 if (r)
2008 return r;
2009
2010 /* Privileged inst */
2011 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2012 if (r)
2013 return r;
2014
2015 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2016
2017 gfx_v8_0_scratch_init(adev);
2018
2019 r = gfx_v8_0_init_microcode(adev);
2020 if (r) {
2021 DRM_ERROR("Failed to load gfx firmware!\n");
2022 return r;
2023 }
2024
2025 r = gfx_v8_0_rlc_init(adev);
2026 if (r) {
2027 DRM_ERROR("Failed to init rlc BOs!\n");
2028 return r;
2029 }
2030
2031 r = gfx_v8_0_mec_init(adev);
2032 if (r) {
2033 DRM_ERROR("Failed to init MEC BOs!\n");
2034 return r;
2035 }
2036
2037 /* set up the gfx ring */
2038 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2039 ring = &adev->gfx.gfx_ring[i];
2040 ring->ring_obj = NULL;
2041 sprintf(ring->name, "gfx");
2042 /* no gfx doorbells on iceland */
2043 if (adev->asic_type != CHIP_TOPAZ) {
2044 ring->use_doorbell = true;
2045 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2046 }
2047
2048 r = amdgpu_ring_init(adev, ring, 1024,
2049 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2050 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2051 AMDGPU_RING_TYPE_GFX);
2052 if (r)
2053 return r;
2054 }
2055
2056 /* set up the compute queues */
2057 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2058 unsigned irq_type;
2059
2060 /* max 32 queues per MEC */
2061 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2062 DRM_ERROR("Too many (%d) compute rings!\n", i);
2063 break;
2064 }
2065 ring = &adev->gfx.compute_ring[i];
2066 ring->ring_obj = NULL;
2067 ring->use_doorbell = true;
2068 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2069 ring->me = 1; /* first MEC */
2070 ring->pipe = i / 8;
2071 ring->queue = i % 8;
2072 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2073 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2074 /* type-2 packets are deprecated on MEC, use type-3 instead */
2075 r = amdgpu_ring_init(adev, ring, 1024,
2076 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2077 &adev->gfx.eop_irq, irq_type,
2078 AMDGPU_RING_TYPE_COMPUTE);
2079 if (r)
2080 return r;
2081 }
2082
2083 /* reserve GDS, GWS and OA resource for gfx */
2084 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
2085 PAGE_SIZE, true,
2086 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
2087 NULL, &adev->gds.gds_gfx_bo);
2088 if (r)
2089 return r;
2090
2091 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
2092 PAGE_SIZE, true,
2093 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
2094 NULL, &adev->gds.gws_gfx_bo);
2095 if (r)
2096 return r;
2097
2098 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
2099 PAGE_SIZE, true,
2100 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
2101 NULL, &adev->gds.oa_gfx_bo);
2102 if (r)
2103 return r;
2104
2105 adev->gfx.ce_ram_size = 0x8000;
2106
2107 r = gfx_v8_0_gpu_early_init(adev);
2108 if (r)
2109 return r;
2110
2111 return 0;
2112 }
2113
2114 static int gfx_v8_0_sw_fini(void *handle)
2115 {
2116 int i;
2117 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2118
2119 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2120 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2121 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2122
2123 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2124 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2125 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2126 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2127
2128 gfx_v8_0_mec_fini(adev);
2129
2130 gfx_v8_0_rlc_fini(adev);
2131
2132 gfx_v8_0_free_microcode(adev);
2133
2134 return 0;
2135 }
2136
2137 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2138 {
2139 uint32_t *modearray, *mod2array;
2140 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2141 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2142 u32 reg_offset;
2143
2144 modearray = adev->gfx.config.tile_mode_array;
2145 mod2array = adev->gfx.config.macrotile_mode_array;
2146
2147 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2148 modearray[reg_offset] = 0;
2149
2150 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2151 mod2array[reg_offset] = 0;
2152
2153 switch (adev->asic_type) {
2154 case CHIP_TOPAZ:
2155 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156 PIPE_CONFIG(ADDR_SURF_P2) |
2157 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160 PIPE_CONFIG(ADDR_SURF_P2) |
2161 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2162 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2164 PIPE_CONFIG(ADDR_SURF_P2) |
2165 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2167 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2168 PIPE_CONFIG(ADDR_SURF_P2) |
2169 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2171 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2172 PIPE_CONFIG(ADDR_SURF_P2) |
2173 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2175 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2176 PIPE_CONFIG(ADDR_SURF_P2) |
2177 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2178 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2179 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2180 PIPE_CONFIG(ADDR_SURF_P2) |
2181 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2182 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2183 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2184 PIPE_CONFIG(ADDR_SURF_P2));
2185 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2186 PIPE_CONFIG(ADDR_SURF_P2) |
2187 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2190 PIPE_CONFIG(ADDR_SURF_P2) |
2191 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2194 PIPE_CONFIG(ADDR_SURF_P2) |
2195 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2197 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2198 PIPE_CONFIG(ADDR_SURF_P2) |
2199 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2201 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2202 PIPE_CONFIG(ADDR_SURF_P2) |
2203 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2205 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2206 PIPE_CONFIG(ADDR_SURF_P2) |
2207 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2209 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2210 PIPE_CONFIG(ADDR_SURF_P2) |
2211 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2213 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2214 PIPE_CONFIG(ADDR_SURF_P2) |
2215 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2216 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2218 PIPE_CONFIG(ADDR_SURF_P2) |
2219 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2222 PIPE_CONFIG(ADDR_SURF_P2) |
2223 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2226 PIPE_CONFIG(ADDR_SURF_P2) |
2227 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2228 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2229 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2230 PIPE_CONFIG(ADDR_SURF_P2) |
2231 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2233 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2234 PIPE_CONFIG(ADDR_SURF_P2) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2237 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2238 PIPE_CONFIG(ADDR_SURF_P2) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2241 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2242 PIPE_CONFIG(ADDR_SURF_P2) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2245 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2246 PIPE_CONFIG(ADDR_SURF_P2) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2249 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2250 PIPE_CONFIG(ADDR_SURF_P2) |
2251 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2253 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2254 PIPE_CONFIG(ADDR_SURF_P2) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2256 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2257
2258 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261 NUM_BANKS(ADDR_SURF_8_BANK));
2262 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2265 NUM_BANKS(ADDR_SURF_8_BANK));
2266 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269 NUM_BANKS(ADDR_SURF_8_BANK));
2270 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2273 NUM_BANKS(ADDR_SURF_8_BANK));
2274 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2276 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2277 NUM_BANKS(ADDR_SURF_8_BANK));
2278 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2281 NUM_BANKS(ADDR_SURF_8_BANK));
2282 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2285 NUM_BANKS(ADDR_SURF_8_BANK));
2286 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2287 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2288 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289 NUM_BANKS(ADDR_SURF_16_BANK));
2290 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2291 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2292 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293 NUM_BANKS(ADDR_SURF_16_BANK));
2294 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2295 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2296 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297 NUM_BANKS(ADDR_SURF_16_BANK));
2298 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2299 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2300 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2301 NUM_BANKS(ADDR_SURF_16_BANK));
2302 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2305 NUM_BANKS(ADDR_SURF_16_BANK));
2306 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2307 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2308 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2309 NUM_BANKS(ADDR_SURF_16_BANK));
2310 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2311 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2312 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2313 NUM_BANKS(ADDR_SURF_8_BANK));
2314
2315 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2316 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2317 reg_offset != 23)
2318 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2319
2320 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2321 if (reg_offset != 7)
2322 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2323
2324 break;
2325 case CHIP_FIJI:
2326 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2333 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2339 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2342 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2346 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2348 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2350 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2351 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2352 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2353 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2354 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2355 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2356 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2358 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2359 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2360 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2364 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2365 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2368 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2372 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2373 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2376 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2377 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2384 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2385 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2388 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2389 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2392 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2393 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2396 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2397 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2401 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2405 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2409 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2413 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2416 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2417 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2420 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2424 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2428 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2432 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2442 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2445 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2448
2449 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 NUM_BANKS(ADDR_SURF_8_BANK));
2453 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2455 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456 NUM_BANKS(ADDR_SURF_8_BANK));
2457 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2460 NUM_BANKS(ADDR_SURF_8_BANK));
2461 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2463 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2464 NUM_BANKS(ADDR_SURF_8_BANK));
2465 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 NUM_BANKS(ADDR_SURF_8_BANK));
2469 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2472 NUM_BANKS(ADDR_SURF_8_BANK));
2473 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2476 NUM_BANKS(ADDR_SURF_8_BANK));
2477 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2480 NUM_BANKS(ADDR_SURF_8_BANK));
2481 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2484 NUM_BANKS(ADDR_SURF_8_BANK));
2485 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2488 NUM_BANKS(ADDR_SURF_8_BANK));
2489 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2492 NUM_BANKS(ADDR_SURF_8_BANK));
2493 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2496 NUM_BANKS(ADDR_SURF_8_BANK));
2497 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2500 NUM_BANKS(ADDR_SURF_8_BANK));
2501 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504 NUM_BANKS(ADDR_SURF_4_BANK));
2505
2506 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2507 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2508
2509 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2510 if (reg_offset != 7)
2511 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2512
2513 break;
2514 case CHIP_TONGA:
2515 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2528 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2531 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2532 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2536 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2537 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2539 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2540 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2541 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2542 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2543 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2544 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2545 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2546 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2547 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2548 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2549 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2553 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2554 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2555 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2556 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2557 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2558 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2561 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2562 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2563 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2565 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2569 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2572 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2573 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2574 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2575 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2577 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2578 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2581 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2582 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2583 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2584 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2585 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2586 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2590 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2592 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2594 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2598 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2602 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2606 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2613 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2614 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2617 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2620 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2621 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2624 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2625 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2626 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2629 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2630 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2631 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2632 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2633 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2635 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2636 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2637
2638 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2641 NUM_BANKS(ADDR_SURF_16_BANK));
2642 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2644 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2645 NUM_BANKS(ADDR_SURF_16_BANK));
2646 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2648 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2649 NUM_BANKS(ADDR_SURF_16_BANK));
2650 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2653 NUM_BANKS(ADDR_SURF_16_BANK));
2654 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2656 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2657 NUM_BANKS(ADDR_SURF_16_BANK));
2658 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2661 NUM_BANKS(ADDR_SURF_16_BANK));
2662 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2665 NUM_BANKS(ADDR_SURF_16_BANK));
2666 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2668 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2669 NUM_BANKS(ADDR_SURF_16_BANK));
2670 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2673 NUM_BANKS(ADDR_SURF_16_BANK));
2674 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2677 NUM_BANKS(ADDR_SURF_16_BANK));
2678 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2681 NUM_BANKS(ADDR_SURF_16_BANK));
2682 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2684 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2685 NUM_BANKS(ADDR_SURF_8_BANK));
2686 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2689 NUM_BANKS(ADDR_SURF_4_BANK));
2690 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2693 NUM_BANKS(ADDR_SURF_4_BANK));
2694
2695 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2696 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2697
2698 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2699 if (reg_offset != 7)
2700 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2701
2702 break;
2703 case CHIP_POLARIS11:
2704 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2708 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2712 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2713 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2716 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2720 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2721 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2724 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2725 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2728 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2729 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2730 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2731 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2732 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2733 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2734 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2735 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2736 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2737 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2738 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2742 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2746 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2750 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2754 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2755 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2757 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2758 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2759 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2761 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2762 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2763 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2766 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2767 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2769 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2770 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2771 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2773 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2774 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2775 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2777 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2778 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2779 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2782 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2783 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2786 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2787 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2789 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2790 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2791 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2793 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2794 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2795 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2798 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2799 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2801 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2802 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2805 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2806 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2810 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2814 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2815 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2818 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2819 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2820 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2822 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2823 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2825 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2826
2827 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2828 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2829 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2830 NUM_BANKS(ADDR_SURF_16_BANK));
2831
2832 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2833 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 NUM_BANKS(ADDR_SURF_16_BANK));
2836
2837 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2840 NUM_BANKS(ADDR_SURF_16_BANK));
2841
2842 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2844 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2845 NUM_BANKS(ADDR_SURF_16_BANK));
2846
2847 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2849 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2850 NUM_BANKS(ADDR_SURF_16_BANK));
2851
2852 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2853 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2854 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2855 NUM_BANKS(ADDR_SURF_16_BANK));
2856
2857 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2860 NUM_BANKS(ADDR_SURF_16_BANK));
2861
2862 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2863 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2864 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2865 NUM_BANKS(ADDR_SURF_16_BANK));
2866
2867 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2870 NUM_BANKS(ADDR_SURF_16_BANK));
2871
2872 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2873 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2874 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2875 NUM_BANKS(ADDR_SURF_16_BANK));
2876
2877 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2878 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2879 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2880 NUM_BANKS(ADDR_SURF_16_BANK));
2881
2882 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2883 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2884 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2885 NUM_BANKS(ADDR_SURF_16_BANK));
2886
2887 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2889 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2890 NUM_BANKS(ADDR_SURF_8_BANK));
2891
2892 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2893 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2894 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2895 NUM_BANKS(ADDR_SURF_4_BANK));
2896
2897 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2898 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2899
2900 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2901 if (reg_offset != 7)
2902 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2903
2904 break;
2905 case CHIP_POLARIS10:
2906 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2912 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2914 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2915 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2916 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2918 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2919 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2920 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2921 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2922 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2923 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2924 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2925 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2926 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2927 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2928 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2929 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2930 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2931 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2932 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2934 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2935 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2936 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2937 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2938 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2939 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2940 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2948 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2949 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2952 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2953 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2956 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2957 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2961 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2964 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2965 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2968 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2972 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2973 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2975 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2976 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2977 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2979 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2980 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2981 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2983 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2984 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2987 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2988 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2989 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2991 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2992 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2993 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2996 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2997 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2998 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2999 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3000 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3001 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3003 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3004 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3007 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3008 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3009 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3012 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3016 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3020 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3022 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3024 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3025 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3026 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3027 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3028
3029 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3030 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3031 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3032 NUM_BANKS(ADDR_SURF_16_BANK));
3033
3034 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3035 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3036 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3037 NUM_BANKS(ADDR_SURF_16_BANK));
3038
3039 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3040 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3041 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3042 NUM_BANKS(ADDR_SURF_16_BANK));
3043
3044 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3047 NUM_BANKS(ADDR_SURF_16_BANK));
3048
3049 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3050 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3051 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3052 NUM_BANKS(ADDR_SURF_16_BANK));
3053
3054 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3055 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3056 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3057 NUM_BANKS(ADDR_SURF_16_BANK));
3058
3059 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3060 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3061 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3062 NUM_BANKS(ADDR_SURF_16_BANK));
3063
3064 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3065 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3066 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3067 NUM_BANKS(ADDR_SURF_16_BANK));
3068
3069 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3072 NUM_BANKS(ADDR_SURF_16_BANK));
3073
3074 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3077 NUM_BANKS(ADDR_SURF_16_BANK));
3078
3079 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3082 NUM_BANKS(ADDR_SURF_16_BANK));
3083
3084 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3085 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3086 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3087 NUM_BANKS(ADDR_SURF_8_BANK));
3088
3089 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3090 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3091 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3092 NUM_BANKS(ADDR_SURF_4_BANK));
3093
3094 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3095 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3096 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3097 NUM_BANKS(ADDR_SURF_4_BANK));
3098
3099 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3100 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3101
3102 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3103 if (reg_offset != 7)
3104 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3105
3106 break;
3107 case CHIP_STONEY:
3108 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3109 PIPE_CONFIG(ADDR_SURF_P2) |
3110 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3112 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3113 PIPE_CONFIG(ADDR_SURF_P2) |
3114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3116 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117 PIPE_CONFIG(ADDR_SURF_P2) |
3118 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3119 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3120 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3121 PIPE_CONFIG(ADDR_SURF_P2) |
3122 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3123 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3124 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3125 PIPE_CONFIG(ADDR_SURF_P2) |
3126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3127 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3128 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3129 PIPE_CONFIG(ADDR_SURF_P2) |
3130 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3131 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3132 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3133 PIPE_CONFIG(ADDR_SURF_P2) |
3134 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3135 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3136 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3137 PIPE_CONFIG(ADDR_SURF_P2));
3138 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3139 PIPE_CONFIG(ADDR_SURF_P2) |
3140 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3142 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3143 PIPE_CONFIG(ADDR_SURF_P2) |
3144 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3146 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3147 PIPE_CONFIG(ADDR_SURF_P2) |
3148 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3150 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3151 PIPE_CONFIG(ADDR_SURF_P2) |
3152 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3153 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3154 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3155 PIPE_CONFIG(ADDR_SURF_P2) |
3156 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3157 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3158 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3159 PIPE_CONFIG(ADDR_SURF_P2) |
3160 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3162 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3163 PIPE_CONFIG(ADDR_SURF_P2) |
3164 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3165 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3166 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3167 PIPE_CONFIG(ADDR_SURF_P2) |
3168 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3170 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3171 PIPE_CONFIG(ADDR_SURF_P2) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3174 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3175 PIPE_CONFIG(ADDR_SURF_P2) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3178 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3179 PIPE_CONFIG(ADDR_SURF_P2) |
3180 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3182 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3183 PIPE_CONFIG(ADDR_SURF_P2) |
3184 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3186 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3187 PIPE_CONFIG(ADDR_SURF_P2) |
3188 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3190 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3191 PIPE_CONFIG(ADDR_SURF_P2) |
3192 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3194 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3195 PIPE_CONFIG(ADDR_SURF_P2) |
3196 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3197 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3198 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3199 PIPE_CONFIG(ADDR_SURF_P2) |
3200 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3202 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3203 PIPE_CONFIG(ADDR_SURF_P2) |
3204 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3206 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3207 PIPE_CONFIG(ADDR_SURF_P2) |
3208 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3210
3211 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3214 NUM_BANKS(ADDR_SURF_8_BANK));
3215 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3218 NUM_BANKS(ADDR_SURF_8_BANK));
3219 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3220 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3221 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3222 NUM_BANKS(ADDR_SURF_8_BANK));
3223 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3226 NUM_BANKS(ADDR_SURF_8_BANK));
3227 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3228 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3229 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3230 NUM_BANKS(ADDR_SURF_8_BANK));
3231 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3232 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3233 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3234 NUM_BANKS(ADDR_SURF_8_BANK));
3235 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3238 NUM_BANKS(ADDR_SURF_8_BANK));
3239 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3240 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3241 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3242 NUM_BANKS(ADDR_SURF_16_BANK));
3243 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3246 NUM_BANKS(ADDR_SURF_16_BANK));
3247 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3250 NUM_BANKS(ADDR_SURF_16_BANK));
3251 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3252 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3253 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3254 NUM_BANKS(ADDR_SURF_16_BANK));
3255 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3258 NUM_BANKS(ADDR_SURF_16_BANK));
3259 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3262 NUM_BANKS(ADDR_SURF_16_BANK));
3263 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3264 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3265 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3266 NUM_BANKS(ADDR_SURF_8_BANK));
3267
3268 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3269 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3270 reg_offset != 23)
3271 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3272
3273 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3274 if (reg_offset != 7)
3275 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3276
3277 break;
3278 default:
3279 dev_warn(adev->dev,
3280 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3281 adev->asic_type);
3282
3283 case CHIP_CARRIZO:
3284 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3285 PIPE_CONFIG(ADDR_SURF_P2) |
3286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3287 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3288 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3289 PIPE_CONFIG(ADDR_SURF_P2) |
3290 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3291 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3292 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3293 PIPE_CONFIG(ADDR_SURF_P2) |
3294 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3295 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3296 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3297 PIPE_CONFIG(ADDR_SURF_P2) |
3298 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3299 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3300 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3301 PIPE_CONFIG(ADDR_SURF_P2) |
3302 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3303 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3304 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3305 PIPE_CONFIG(ADDR_SURF_P2) |
3306 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3307 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3308 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3309 PIPE_CONFIG(ADDR_SURF_P2) |
3310 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3311 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3312 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3313 PIPE_CONFIG(ADDR_SURF_P2));
3314 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3315 PIPE_CONFIG(ADDR_SURF_P2) |
3316 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3318 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3319 PIPE_CONFIG(ADDR_SURF_P2) |
3320 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3322 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3323 PIPE_CONFIG(ADDR_SURF_P2) |
3324 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3326 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3327 PIPE_CONFIG(ADDR_SURF_P2) |
3328 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3330 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3331 PIPE_CONFIG(ADDR_SURF_P2) |
3332 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3334 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3335 PIPE_CONFIG(ADDR_SURF_P2) |
3336 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3338 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3339 PIPE_CONFIG(ADDR_SURF_P2) |
3340 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3342 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3343 PIPE_CONFIG(ADDR_SURF_P2) |
3344 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3346 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3347 PIPE_CONFIG(ADDR_SURF_P2) |
3348 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3350 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3351 PIPE_CONFIG(ADDR_SURF_P2) |
3352 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3354 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3355 PIPE_CONFIG(ADDR_SURF_P2) |
3356 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3358 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3359 PIPE_CONFIG(ADDR_SURF_P2) |
3360 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3362 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3363 PIPE_CONFIG(ADDR_SURF_P2) |
3364 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3366 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3367 PIPE_CONFIG(ADDR_SURF_P2) |
3368 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3370 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3371 PIPE_CONFIG(ADDR_SURF_P2) |
3372 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3374 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3375 PIPE_CONFIG(ADDR_SURF_P2) |
3376 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3378 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3379 PIPE_CONFIG(ADDR_SURF_P2) |
3380 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3382 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3383 PIPE_CONFIG(ADDR_SURF_P2) |
3384 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3386
3387 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3390 NUM_BANKS(ADDR_SURF_8_BANK));
3391 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3394 NUM_BANKS(ADDR_SURF_8_BANK));
3395 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3398 NUM_BANKS(ADDR_SURF_8_BANK));
3399 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3402 NUM_BANKS(ADDR_SURF_8_BANK));
3403 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3406 NUM_BANKS(ADDR_SURF_8_BANK));
3407 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3410 NUM_BANKS(ADDR_SURF_8_BANK));
3411 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3412 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3413 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3414 NUM_BANKS(ADDR_SURF_8_BANK));
3415 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3418 NUM_BANKS(ADDR_SURF_16_BANK));
3419 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3420 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3421 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3422 NUM_BANKS(ADDR_SURF_16_BANK));
3423 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3424 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3425 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3426 NUM_BANKS(ADDR_SURF_16_BANK));
3427 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3430 NUM_BANKS(ADDR_SURF_16_BANK));
3431 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3432 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3433 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3434 NUM_BANKS(ADDR_SURF_16_BANK));
3435 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3436 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3437 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3438 NUM_BANKS(ADDR_SURF_16_BANK));
3439 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3440 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3441 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3442 NUM_BANKS(ADDR_SURF_8_BANK));
3443
3444 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3445 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3446 reg_offset != 23)
3447 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3448
3449 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3450 if (reg_offset != 7)
3451 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3452
3453 break;
3454 }
3455 }
3456
3457 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3458 u32 se_num, u32 sh_num, u32 instance)
3459 {
3460 u32 data;
3461
3462 if (instance == 0xffffffff)
3463 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3464 else
3465 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3466
3467 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3468 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3469 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3470 } else if (se_num == 0xffffffff) {
3471 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3472 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3473 } else if (sh_num == 0xffffffff) {
3474 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3475 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3476 } else {
3477 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3478 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3479 }
3480 WREG32(mmGRBM_GFX_INDEX, data);
3481 }
3482
3483 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3484 {
3485 return (u32)((1ULL << bit_width) - 1);
3486 }
3487
3488 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3489 {
3490 u32 data, mask;
3491
3492 data = RREG32(mmCC_RB_BACKEND_DISABLE);
3493 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3494
3495 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3496 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3497
3498 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3499 adev->gfx.config.max_sh_per_se);
3500
3501 return (~data) & mask;
3502 }
3503
3504 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3505 {
3506 int i, j;
3507 u32 data;
3508 u32 active_rbs = 0;
3509 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3510 adev->gfx.config.max_sh_per_se;
3511
3512 mutex_lock(&adev->grbm_idx_mutex);
3513 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3514 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3515 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3516 data = gfx_v8_0_get_rb_active_bitmap(adev);
3517 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3518 rb_bitmap_width_per_sh);
3519 }
3520 }
3521 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3522 mutex_unlock(&adev->grbm_idx_mutex);
3523
3524 adev->gfx.config.backend_enable_mask = active_rbs;
3525 adev->gfx.config.num_rbs = hweight32(active_rbs);
3526 }
3527
3528 /**
3529 * gfx_v8_0_init_compute_vmid - gart enable
3530 *
3531 * @rdev: amdgpu_device pointer
3532 *
3533 * Initialize compute vmid sh_mem registers
3534 *
3535 */
3536 #define DEFAULT_SH_MEM_BASES (0x6000)
3537 #define FIRST_COMPUTE_VMID (8)
3538 #define LAST_COMPUTE_VMID (16)
3539 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3540 {
3541 int i;
3542 uint32_t sh_mem_config;
3543 uint32_t sh_mem_bases;
3544
3545 /*
3546 * Configure apertures:
3547 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3548 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3549 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3550 */
3551 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3552
3553 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3554 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3555 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3556 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3557 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3558 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3559
3560 mutex_lock(&adev->srbm_mutex);
3561 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3562 vi_srbm_select(adev, 0, 0, 0, i);
3563 /* CP and shaders */
3564 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3565 WREG32(mmSH_MEM_APE1_BASE, 1);
3566 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3567 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3568 }
3569 vi_srbm_select(adev, 0, 0, 0, 0);
3570 mutex_unlock(&adev->srbm_mutex);
3571 }
3572
3573 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3574 {
3575 u32 tmp;
3576 int i;
3577
3578 tmp = RREG32(mmGRBM_CNTL);
3579 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3580 WREG32(mmGRBM_CNTL, tmp);
3581
3582 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3583 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3584 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3585
3586 gfx_v8_0_tiling_mode_table_init(adev);
3587
3588 gfx_v8_0_setup_rb(adev);
3589 gfx_v8_0_get_cu_info(adev);
3590
3591 /* XXX SH_MEM regs */
3592 /* where to put LDS, scratch, GPUVM in FSA64 space */
3593 mutex_lock(&adev->srbm_mutex);
3594 for (i = 0; i < 16; i++) {
3595 vi_srbm_select(adev, 0, 0, 0, i);
3596 /* CP and shaders */
3597 if (i == 0) {
3598 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3599 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3600 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3601 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3602 WREG32(mmSH_MEM_CONFIG, tmp);
3603 } else {
3604 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3605 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3606 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3607 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3608 WREG32(mmSH_MEM_CONFIG, tmp);
3609 }
3610
3611 WREG32(mmSH_MEM_APE1_BASE, 1);
3612 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3613 WREG32(mmSH_MEM_BASES, 0);
3614 }
3615 vi_srbm_select(adev, 0, 0, 0, 0);
3616 mutex_unlock(&adev->srbm_mutex);
3617
3618 gfx_v8_0_init_compute_vmid(adev);
3619
3620 mutex_lock(&adev->grbm_idx_mutex);
3621 /*
3622 * making sure that the following register writes will be broadcasted
3623 * to all the shaders
3624 */
3625 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3626
3627 WREG32(mmPA_SC_FIFO_SIZE,
3628 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3629 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3630 (adev->gfx.config.sc_prim_fifo_size_backend <<
3631 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3632 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3633 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3634 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3635 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3636 mutex_unlock(&adev->grbm_idx_mutex);
3637
3638 }
3639
3640 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3641 {
3642 u32 i, j, k;
3643 u32 mask;
3644
3645 mutex_lock(&adev->grbm_idx_mutex);
3646 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3647 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3648 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3649 for (k = 0; k < adev->usec_timeout; k++) {
3650 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3651 break;
3652 udelay(1);
3653 }
3654 }
3655 }
3656 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3657 mutex_unlock(&adev->grbm_idx_mutex);
3658
3659 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3660 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3661 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3662 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3663 for (k = 0; k < adev->usec_timeout; k++) {
3664 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3665 break;
3666 udelay(1);
3667 }
3668 }
3669
3670 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3671 bool enable)
3672 {
3673 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3674
3675 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3676 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3677 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3678 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3679
3680 WREG32(mmCP_INT_CNTL_RING0, tmp);
3681 }
3682
3683 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3684 {
3685 /* csib */
3686 WREG32(mmRLC_CSIB_ADDR_HI,
3687 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3688 WREG32(mmRLC_CSIB_ADDR_LO,
3689 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3690 WREG32(mmRLC_CSIB_LENGTH,
3691 adev->gfx.rlc.clear_state_size);
3692 }
3693
3694 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3695 int ind_offset,
3696 int list_size,
3697 int *unique_indices,
3698 int *indices_count,
3699 int max_indices,
3700 int *ind_start_offsets,
3701 int *offset_count,
3702 int max_offset)
3703 {
3704 int indices;
3705 bool new_entry = true;
3706
3707 for (; ind_offset < list_size; ind_offset++) {
3708
3709 if (new_entry) {
3710 new_entry = false;
3711 ind_start_offsets[*offset_count] = ind_offset;
3712 *offset_count = *offset_count + 1;
3713 BUG_ON(*offset_count >= max_offset);
3714 }
3715
3716 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3717 new_entry = true;
3718 continue;
3719 }
3720
3721 ind_offset += 2;
3722
3723 /* look for the matching indice */
3724 for (indices = 0;
3725 indices < *indices_count;
3726 indices++) {
3727 if (unique_indices[indices] ==
3728 register_list_format[ind_offset])
3729 break;
3730 }
3731
3732 if (indices >= *indices_count) {
3733 unique_indices[*indices_count] =
3734 register_list_format[ind_offset];
3735 indices = *indices_count;
3736 *indices_count = *indices_count + 1;
3737 BUG_ON(*indices_count >= max_indices);
3738 }
3739
3740 register_list_format[ind_offset] = indices;
3741 }
3742 }
3743
3744 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3745 {
3746 int i, temp, data;
3747 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3748 int indices_count = 0;
3749 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3750 int offset_count = 0;
3751
3752 int list_size;
3753 unsigned int *register_list_format =
3754 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3755 if (register_list_format == NULL)
3756 return -ENOMEM;
3757 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3758 adev->gfx.rlc.reg_list_format_size_bytes);
3759
3760 gfx_v8_0_parse_ind_reg_list(register_list_format,
3761 RLC_FormatDirectRegListLength,
3762 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3763 unique_indices,
3764 &indices_count,
3765 sizeof(unique_indices) / sizeof(int),
3766 indirect_start_offsets,
3767 &offset_count,
3768 sizeof(indirect_start_offsets)/sizeof(int));
3769
3770 /* save and restore list */
3771 temp = RREG32(mmRLC_SRM_CNTL);
3772 temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3773 WREG32(mmRLC_SRM_CNTL, temp);
3774
3775 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3776 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3777 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3778
3779 /* indirect list */
3780 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3781 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3782 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3783
3784 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3785 list_size = list_size >> 1;
3786 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3787 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3788
3789 /* starting offsets starts */
3790 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3791 adev->gfx.rlc.starting_offsets_start);
3792 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3793 WREG32(mmRLC_GPM_SCRATCH_DATA,
3794 indirect_start_offsets[i]);
3795
3796 /* unique indices */
3797 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3798 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3799 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3800 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3801 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3802 }
3803 kfree(register_list_format);
3804
3805 return 0;
3806 }
3807
3808 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3809 {
3810 uint32_t data;
3811
3812 data = RREG32(mmRLC_SRM_CNTL);
3813 data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3814 WREG32(mmRLC_SRM_CNTL, data);
3815 }
3816
3817 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3818 {
3819 uint32_t data;
3820
3821 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3822 AMD_PG_SUPPORT_GFX_SMG |
3823 AMD_PG_SUPPORT_GFX_DMG)) {
3824 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3825 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3826 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3827 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3828
3829 data = 0;
3830 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3831 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3832 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3833 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3834 WREG32(mmRLC_PG_DELAY, data);
3835
3836 data = RREG32(mmRLC_PG_DELAY_2);
3837 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3838 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3839 WREG32(mmRLC_PG_DELAY_2, data);
3840
3841 data = RREG32(mmRLC_AUTO_PG_CTRL);
3842 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3843 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3844 WREG32(mmRLC_AUTO_PG_CTRL, data);
3845 }
3846 }
3847
3848 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3849 bool enable)
3850 {
3851 u32 data, orig;
3852
3853 orig = data = RREG32(mmRLC_PG_CNTL);
3854
3855 if (enable)
3856 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3857 else
3858 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3859
3860 if (orig != data)
3861 WREG32(mmRLC_PG_CNTL, data);
3862 }
3863
3864 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3865 bool enable)
3866 {
3867 u32 data, orig;
3868
3869 orig = data = RREG32(mmRLC_PG_CNTL);
3870
3871 if (enable)
3872 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3873 else
3874 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3875
3876 if (orig != data)
3877 WREG32(mmRLC_PG_CNTL, data);
3878 }
3879
3880 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3881 {
3882 u32 data, orig;
3883
3884 orig = data = RREG32(mmRLC_PG_CNTL);
3885
3886 if (enable)
3887 data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3888 else
3889 data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3890
3891 if (orig != data)
3892 WREG32(mmRLC_PG_CNTL, data);
3893 }
3894
3895 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3896 {
3897 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3898 AMD_PG_SUPPORT_GFX_SMG |
3899 AMD_PG_SUPPORT_GFX_DMG |
3900 AMD_PG_SUPPORT_CP |
3901 AMD_PG_SUPPORT_GDS |
3902 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3903 gfx_v8_0_init_csb(adev);
3904 gfx_v8_0_init_save_restore_list(adev);
3905 gfx_v8_0_enable_save_restore_machine(adev);
3906
3907 if ((adev->asic_type == CHIP_CARRIZO) ||
3908 (adev->asic_type == CHIP_STONEY)) {
3909 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3910 gfx_v8_0_init_power_gating(adev);
3911 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3912 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3913 cz_enable_sck_slow_down_on_power_up(adev, true);
3914 cz_enable_sck_slow_down_on_power_down(adev, true);
3915 } else {
3916 cz_enable_sck_slow_down_on_power_up(adev, false);
3917 cz_enable_sck_slow_down_on_power_down(adev, false);
3918 }
3919 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3920 cz_enable_cp_power_gating(adev, true);
3921 else
3922 cz_enable_cp_power_gating(adev, false);
3923 } else if (adev->asic_type == CHIP_POLARIS11) {
3924 gfx_v8_0_init_power_gating(adev);
3925 }
3926 }
3927 }
3928
3929 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3930 {
3931 u32 tmp = RREG32(mmRLC_CNTL);
3932
3933 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3934 WREG32(mmRLC_CNTL, tmp);
3935
3936 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3937
3938 gfx_v8_0_wait_for_rlc_serdes(adev);
3939 }
3940
3941 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3942 {
3943 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3944
3945 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3946 WREG32(mmGRBM_SOFT_RESET, tmp);
3947 udelay(50);
3948 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3949 WREG32(mmGRBM_SOFT_RESET, tmp);
3950 udelay(50);
3951 }
3952
3953 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3954 {
3955 u32 tmp = RREG32(mmRLC_CNTL);
3956
3957 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3958 WREG32(mmRLC_CNTL, tmp);
3959
3960 /* carrizo do enable cp interrupt after cp inited */
3961 if (!(adev->flags & AMD_IS_APU))
3962 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3963
3964 udelay(50);
3965 }
3966
3967 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3968 {
3969 const struct rlc_firmware_header_v2_0 *hdr;
3970 const __le32 *fw_data;
3971 unsigned i, fw_size;
3972
3973 if (!adev->gfx.rlc_fw)
3974 return -EINVAL;
3975
3976 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3977 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3978
3979 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3980 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3981 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3982
3983 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3984 for (i = 0; i < fw_size; i++)
3985 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3986 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3987
3988 return 0;
3989 }
3990
3991 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3992 {
3993 int r;
3994
3995 gfx_v8_0_rlc_stop(adev);
3996
3997 /* disable CG */
3998 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3999 if (adev->asic_type == CHIP_POLARIS11 ||
4000 adev->asic_type == CHIP_POLARIS10)
4001 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
4002
4003 /* disable PG */
4004 WREG32(mmRLC_PG_CNTL, 0);
4005
4006 gfx_v8_0_rlc_reset(adev);
4007
4008 gfx_v8_0_init_pg(adev);
4009
4010 if (!adev->pp_enabled) {
4011 if (!adev->firmware.smu_load) {
4012 /* legacy rlc firmware loading */
4013 r = gfx_v8_0_rlc_load_microcode(adev);
4014 if (r)
4015 return r;
4016 } else {
4017 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4018 AMDGPU_UCODE_ID_RLC_G);
4019 if (r)
4020 return -EINVAL;
4021 }
4022 }
4023
4024 gfx_v8_0_rlc_start(adev);
4025
4026 return 0;
4027 }
4028
4029 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4030 {
4031 int i;
4032 u32 tmp = RREG32(mmCP_ME_CNTL);
4033
4034 if (enable) {
4035 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4036 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4037 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4038 } else {
4039 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4040 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4041 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4042 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4043 adev->gfx.gfx_ring[i].ready = false;
4044 }
4045 WREG32(mmCP_ME_CNTL, tmp);
4046 udelay(50);
4047 }
4048
4049 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4050 {
4051 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4052 const struct gfx_firmware_header_v1_0 *ce_hdr;
4053 const struct gfx_firmware_header_v1_0 *me_hdr;
4054 const __le32 *fw_data;
4055 unsigned i, fw_size;
4056
4057 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4058 return -EINVAL;
4059
4060 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4061 adev->gfx.pfp_fw->data;
4062 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4063 adev->gfx.ce_fw->data;
4064 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4065 adev->gfx.me_fw->data;
4066
4067 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4068 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4069 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4070
4071 gfx_v8_0_cp_gfx_enable(adev, false);
4072
4073 /* PFP */
4074 fw_data = (const __le32 *)
4075 (adev->gfx.pfp_fw->data +
4076 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4077 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4078 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4079 for (i = 0; i < fw_size; i++)
4080 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4081 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4082
4083 /* CE */
4084 fw_data = (const __le32 *)
4085 (adev->gfx.ce_fw->data +
4086 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4087 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4088 WREG32(mmCP_CE_UCODE_ADDR, 0);
4089 for (i = 0; i < fw_size; i++)
4090 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4091 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4092
4093 /* ME */
4094 fw_data = (const __le32 *)
4095 (adev->gfx.me_fw->data +
4096 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4097 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4098 WREG32(mmCP_ME_RAM_WADDR, 0);
4099 for (i = 0; i < fw_size; i++)
4100 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4101 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4102
4103 return 0;
4104 }
4105
4106 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4107 {
4108 u32 count = 0;
4109 const struct cs_section_def *sect = NULL;
4110 const struct cs_extent_def *ext = NULL;
4111
4112 /* begin clear state */
4113 count += 2;
4114 /* context control state */
4115 count += 3;
4116
4117 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4118 for (ext = sect->section; ext->extent != NULL; ++ext) {
4119 if (sect->id == SECT_CONTEXT)
4120 count += 2 + ext->reg_count;
4121 else
4122 return 0;
4123 }
4124 }
4125 /* pa_sc_raster_config/pa_sc_raster_config1 */
4126 count += 4;
4127 /* end clear state */
4128 count += 2;
4129 /* clear state */
4130 count += 2;
4131
4132 return count;
4133 }
4134
4135 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4136 {
4137 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4138 const struct cs_section_def *sect = NULL;
4139 const struct cs_extent_def *ext = NULL;
4140 int r, i;
4141
4142 /* init the CP */
4143 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4144 WREG32(mmCP_ENDIAN_SWAP, 0);
4145 WREG32(mmCP_DEVICE_ID, 1);
4146
4147 gfx_v8_0_cp_gfx_enable(adev, true);
4148
4149 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4150 if (r) {
4151 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4152 return r;
4153 }
4154
4155 /* clear state buffer */
4156 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4157 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4158
4159 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4160 amdgpu_ring_write(ring, 0x80000000);
4161 amdgpu_ring_write(ring, 0x80000000);
4162
4163 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4164 for (ext = sect->section; ext->extent != NULL; ++ext) {
4165 if (sect->id == SECT_CONTEXT) {
4166 amdgpu_ring_write(ring,
4167 PACKET3(PACKET3_SET_CONTEXT_REG,
4168 ext->reg_count));
4169 amdgpu_ring_write(ring,
4170 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4171 for (i = 0; i < ext->reg_count; i++)
4172 amdgpu_ring_write(ring, ext->extent[i]);
4173 }
4174 }
4175 }
4176
4177 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4178 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4179 switch (adev->asic_type) {
4180 case CHIP_TONGA:
4181 case CHIP_POLARIS10:
4182 amdgpu_ring_write(ring, 0x16000012);
4183 amdgpu_ring_write(ring, 0x0000002A);
4184 break;
4185 case CHIP_POLARIS11:
4186 amdgpu_ring_write(ring, 0x16000012);
4187 amdgpu_ring_write(ring, 0x00000000);
4188 break;
4189 case CHIP_FIJI:
4190 amdgpu_ring_write(ring, 0x3a00161a);
4191 amdgpu_ring_write(ring, 0x0000002e);
4192 break;
4193 case CHIP_CARRIZO:
4194 amdgpu_ring_write(ring, 0x00000002);
4195 amdgpu_ring_write(ring, 0x00000000);
4196 break;
4197 case CHIP_TOPAZ:
4198 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4199 0x00000000 : 0x00000002);
4200 amdgpu_ring_write(ring, 0x00000000);
4201 break;
4202 case CHIP_STONEY:
4203 amdgpu_ring_write(ring, 0x00000000);
4204 amdgpu_ring_write(ring, 0x00000000);
4205 break;
4206 default:
4207 BUG();
4208 }
4209
4210 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4211 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4212
4213 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4214 amdgpu_ring_write(ring, 0);
4215
4216 /* init the CE partitions */
4217 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4218 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4219 amdgpu_ring_write(ring, 0x8000);
4220 amdgpu_ring_write(ring, 0x8000);
4221
4222 amdgpu_ring_commit(ring);
4223
4224 return 0;
4225 }
4226
4227 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4228 {
4229 struct amdgpu_ring *ring;
4230 u32 tmp;
4231 u32 rb_bufsz;
4232 u64 rb_addr, rptr_addr;
4233 int r;
4234
4235 /* Set the write pointer delay */
4236 WREG32(mmCP_RB_WPTR_DELAY, 0);
4237
4238 /* set the RB to use vmid 0 */
4239 WREG32(mmCP_RB_VMID, 0);
4240
4241 /* Set ring buffer size */
4242 ring = &adev->gfx.gfx_ring[0];
4243 rb_bufsz = order_base_2(ring->ring_size / 8);
4244 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4245 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4246 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4247 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4248 #ifdef __BIG_ENDIAN
4249 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4250 #endif
4251 WREG32(mmCP_RB0_CNTL, tmp);
4252
4253 /* Initialize the ring buffer's read and write pointers */
4254 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4255 ring->wptr = 0;
4256 WREG32(mmCP_RB0_WPTR, ring->wptr);
4257
4258 /* set the wb address wether it's enabled or not */
4259 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4260 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4261 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4262
4263 mdelay(1);
4264 WREG32(mmCP_RB0_CNTL, tmp);
4265
4266 rb_addr = ring->gpu_addr >> 8;
4267 WREG32(mmCP_RB0_BASE, rb_addr);
4268 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4269
4270 /* no gfx doorbells on iceland */
4271 if (adev->asic_type != CHIP_TOPAZ) {
4272 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4273 if (ring->use_doorbell) {
4274 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4275 DOORBELL_OFFSET, ring->doorbell_index);
4276 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4277 DOORBELL_HIT, 0);
4278 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4279 DOORBELL_EN, 1);
4280 } else {
4281 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4282 DOORBELL_EN, 0);
4283 }
4284 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4285
4286 if (adev->asic_type == CHIP_TONGA) {
4287 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4288 DOORBELL_RANGE_LOWER,
4289 AMDGPU_DOORBELL_GFX_RING0);
4290 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4291
4292 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4293 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4294 }
4295
4296 }
4297
4298 /* start the ring */
4299 gfx_v8_0_cp_gfx_start(adev);
4300 ring->ready = true;
4301 r = amdgpu_ring_test_ring(ring);
4302 if (r) {
4303 ring->ready = false;
4304 return r;
4305 }
4306
4307 return 0;
4308 }
4309
4310 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4311 {
4312 int i;
4313
4314 if (enable) {
4315 WREG32(mmCP_MEC_CNTL, 0);
4316 } else {
4317 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4318 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4319 adev->gfx.compute_ring[i].ready = false;
4320 }
4321 udelay(50);
4322 }
4323
4324 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4325 {
4326 const struct gfx_firmware_header_v1_0 *mec_hdr;
4327 const __le32 *fw_data;
4328 unsigned i, fw_size;
4329
4330 if (!adev->gfx.mec_fw)
4331 return -EINVAL;
4332
4333 gfx_v8_0_cp_compute_enable(adev, false);
4334
4335 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4336 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4337
4338 fw_data = (const __le32 *)
4339 (adev->gfx.mec_fw->data +
4340 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4341 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4342
4343 /* MEC1 */
4344 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4345 for (i = 0; i < fw_size; i++)
4346 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4347 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4348
4349 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4350 if (adev->gfx.mec2_fw) {
4351 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4352
4353 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4354 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4355
4356 fw_data = (const __le32 *)
4357 (adev->gfx.mec2_fw->data +
4358 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4359 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4360
4361 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4362 for (i = 0; i < fw_size; i++)
4363 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4364 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4365 }
4366
4367 return 0;
4368 }
4369
4370 struct vi_mqd {
4371 uint32_t header; /* ordinal0 */
4372 uint32_t compute_dispatch_initiator; /* ordinal1 */
4373 uint32_t compute_dim_x; /* ordinal2 */
4374 uint32_t compute_dim_y; /* ordinal3 */
4375 uint32_t compute_dim_z; /* ordinal4 */
4376 uint32_t compute_start_x; /* ordinal5 */
4377 uint32_t compute_start_y; /* ordinal6 */
4378 uint32_t compute_start_z; /* ordinal7 */
4379 uint32_t compute_num_thread_x; /* ordinal8 */
4380 uint32_t compute_num_thread_y; /* ordinal9 */
4381 uint32_t compute_num_thread_z; /* ordinal10 */
4382 uint32_t compute_pipelinestat_enable; /* ordinal11 */
4383 uint32_t compute_perfcount_enable; /* ordinal12 */
4384 uint32_t compute_pgm_lo; /* ordinal13 */
4385 uint32_t compute_pgm_hi; /* ordinal14 */
4386 uint32_t compute_tba_lo; /* ordinal15 */
4387 uint32_t compute_tba_hi; /* ordinal16 */
4388 uint32_t compute_tma_lo; /* ordinal17 */
4389 uint32_t compute_tma_hi; /* ordinal18 */
4390 uint32_t compute_pgm_rsrc1; /* ordinal19 */
4391 uint32_t compute_pgm_rsrc2; /* ordinal20 */
4392 uint32_t compute_vmid; /* ordinal21 */
4393 uint32_t compute_resource_limits; /* ordinal22 */
4394 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
4395 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
4396 uint32_t compute_tmpring_size; /* ordinal25 */
4397 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
4398 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
4399 uint32_t compute_restart_x; /* ordinal28 */
4400 uint32_t compute_restart_y; /* ordinal29 */
4401 uint32_t compute_restart_z; /* ordinal30 */
4402 uint32_t compute_thread_trace_enable; /* ordinal31 */
4403 uint32_t compute_misc_reserved; /* ordinal32 */
4404 uint32_t compute_dispatch_id; /* ordinal33 */
4405 uint32_t compute_threadgroup_id; /* ordinal34 */
4406 uint32_t compute_relaunch; /* ordinal35 */
4407 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
4408 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
4409 uint32_t compute_wave_restore_control; /* ordinal38 */
4410 uint32_t reserved9; /* ordinal39 */
4411 uint32_t reserved10; /* ordinal40 */
4412 uint32_t reserved11; /* ordinal41 */
4413 uint32_t reserved12; /* ordinal42 */
4414 uint32_t reserved13; /* ordinal43 */
4415 uint32_t reserved14; /* ordinal44 */
4416 uint32_t reserved15; /* ordinal45 */
4417 uint32_t reserved16; /* ordinal46 */
4418 uint32_t reserved17; /* ordinal47 */
4419 uint32_t reserved18; /* ordinal48 */
4420 uint32_t reserved19; /* ordinal49 */
4421 uint32_t reserved20; /* ordinal50 */
4422 uint32_t reserved21; /* ordinal51 */
4423 uint32_t reserved22; /* ordinal52 */
4424 uint32_t reserved23; /* ordinal53 */
4425 uint32_t reserved24; /* ordinal54 */
4426 uint32_t reserved25; /* ordinal55 */
4427 uint32_t reserved26; /* ordinal56 */
4428 uint32_t reserved27; /* ordinal57 */
4429 uint32_t reserved28; /* ordinal58 */
4430 uint32_t reserved29; /* ordinal59 */
4431 uint32_t reserved30; /* ordinal60 */
4432 uint32_t reserved31; /* ordinal61 */
4433 uint32_t reserved32; /* ordinal62 */
4434 uint32_t reserved33; /* ordinal63 */
4435 uint32_t reserved34; /* ordinal64 */
4436 uint32_t compute_user_data_0; /* ordinal65 */
4437 uint32_t compute_user_data_1; /* ordinal66 */
4438 uint32_t compute_user_data_2; /* ordinal67 */
4439 uint32_t compute_user_data_3; /* ordinal68 */
4440 uint32_t compute_user_data_4; /* ordinal69 */
4441 uint32_t compute_user_data_5; /* ordinal70 */
4442 uint32_t compute_user_data_6; /* ordinal71 */
4443 uint32_t compute_user_data_7; /* ordinal72 */
4444 uint32_t compute_user_data_8; /* ordinal73 */
4445 uint32_t compute_user_data_9; /* ordinal74 */
4446 uint32_t compute_user_data_10; /* ordinal75 */
4447 uint32_t compute_user_data_11; /* ordinal76 */
4448 uint32_t compute_user_data_12; /* ordinal77 */
4449 uint32_t compute_user_data_13; /* ordinal78 */
4450 uint32_t compute_user_data_14; /* ordinal79 */
4451 uint32_t compute_user_data_15; /* ordinal80 */
4452 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
4453 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
4454 uint32_t reserved35; /* ordinal83 */
4455 uint32_t reserved36; /* ordinal84 */
4456 uint32_t reserved37; /* ordinal85 */
4457 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
4458 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
4459 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
4460 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
4461 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
4462 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
4463 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
4464 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
4465 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
4466 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
4467 uint32_t reserved38; /* ordinal96 */
4468 uint32_t reserved39; /* ordinal97 */
4469 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
4470 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
4471 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
4472 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
4473 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
4474 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
4475 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
4476 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
4477 uint32_t reserved40; /* ordinal106 */
4478 uint32_t reserved41; /* ordinal107 */
4479 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
4480 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
4481 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
4482 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
4483 uint32_t reserved42; /* ordinal112 */
4484 uint32_t reserved43; /* ordinal113 */
4485 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
4486 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
4487 uint32_t cp_packet_id_lo; /* ordinal116 */
4488 uint32_t cp_packet_id_hi; /* ordinal117 */
4489 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
4490 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
4491 uint32_t gds_save_base_addr_lo; /* ordinal120 */
4492 uint32_t gds_save_base_addr_hi; /* ordinal121 */
4493 uint32_t gds_save_mask_lo; /* ordinal122 */
4494 uint32_t gds_save_mask_hi; /* ordinal123 */
4495 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
4496 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
4497 uint32_t reserved44; /* ordinal126 */
4498 uint32_t reserved45; /* ordinal127 */
4499 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
4500 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
4501 uint32_t cp_hqd_active; /* ordinal130 */
4502 uint32_t cp_hqd_vmid; /* ordinal131 */
4503 uint32_t cp_hqd_persistent_state; /* ordinal132 */
4504 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
4505 uint32_t cp_hqd_queue_priority; /* ordinal134 */
4506 uint32_t cp_hqd_quantum; /* ordinal135 */
4507 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
4508 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
4509 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
4510 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
4511 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
4512 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
4513 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
4514 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
4515 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
4516 uint32_t cp_hqd_pq_control; /* ordinal145 */
4517 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
4518 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
4519 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
4520 uint32_t cp_hqd_ib_control; /* ordinal149 */
4521 uint32_t cp_hqd_iq_timer; /* ordinal150 */
4522 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
4523 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
4524 uint32_t cp_hqd_dma_offload; /* ordinal153 */
4525 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
4526 uint32_t cp_hqd_msg_type; /* ordinal155 */
4527 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
4528 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
4529 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
4530 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
4531 uint32_t cp_hqd_hq_status0; /* ordinal160 */
4532 uint32_t cp_hqd_hq_control0; /* ordinal161 */
4533 uint32_t cp_mqd_control; /* ordinal162 */
4534 uint32_t cp_hqd_hq_status1; /* ordinal163 */
4535 uint32_t cp_hqd_hq_control1; /* ordinal164 */
4536 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
4537 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
4538 uint32_t cp_hqd_eop_control; /* ordinal167 */
4539 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
4540 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
4541 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
4542 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
4543 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
4544 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
4545 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
4546 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
4547 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
4548 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
4549 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
4550 uint32_t cp_hqd_error; /* ordinal179 */
4551 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
4552 uint32_t cp_hqd_eop_dones; /* ordinal181 */
4553 uint32_t reserved46; /* ordinal182 */
4554 uint32_t reserved47; /* ordinal183 */
4555 uint32_t reserved48; /* ordinal184 */
4556 uint32_t reserved49; /* ordinal185 */
4557 uint32_t reserved50; /* ordinal186 */
4558 uint32_t reserved51; /* ordinal187 */
4559 uint32_t reserved52; /* ordinal188 */
4560 uint32_t reserved53; /* ordinal189 */
4561 uint32_t reserved54; /* ordinal190 */
4562 uint32_t reserved55; /* ordinal191 */
4563 uint32_t iqtimer_pkt_header; /* ordinal192 */
4564 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
4565 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
4566 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
4567 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
4568 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
4569 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
4570 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
4571 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
4572 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
4573 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
4574 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
4575 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
4576 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
4577 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
4578 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
4579 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
4580 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
4581 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
4582 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
4583 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
4584 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
4585 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
4586 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
4587 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
4588 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
4589 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
4590 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
4591 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
4592 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
4593 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
4594 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
4595 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
4596 uint32_t reserved56; /* ordinal225 */
4597 uint32_t reserved57; /* ordinal226 */
4598 uint32_t reserved58; /* ordinal227 */
4599 uint32_t set_resources_header; /* ordinal228 */
4600 uint32_t set_resources_dw1; /* ordinal229 */
4601 uint32_t set_resources_dw2; /* ordinal230 */
4602 uint32_t set_resources_dw3; /* ordinal231 */
4603 uint32_t set_resources_dw4; /* ordinal232 */
4604 uint32_t set_resources_dw5; /* ordinal233 */
4605 uint32_t set_resources_dw6; /* ordinal234 */
4606 uint32_t set_resources_dw7; /* ordinal235 */
4607 uint32_t reserved59; /* ordinal236 */
4608 uint32_t reserved60; /* ordinal237 */
4609 uint32_t reserved61; /* ordinal238 */
4610 uint32_t reserved62; /* ordinal239 */
4611 uint32_t reserved63; /* ordinal240 */
4612 uint32_t reserved64; /* ordinal241 */
4613 uint32_t reserved65; /* ordinal242 */
4614 uint32_t reserved66; /* ordinal243 */
4615 uint32_t reserved67; /* ordinal244 */
4616 uint32_t reserved68; /* ordinal245 */
4617 uint32_t reserved69; /* ordinal246 */
4618 uint32_t reserved70; /* ordinal247 */
4619 uint32_t reserved71; /* ordinal248 */
4620 uint32_t reserved72; /* ordinal249 */
4621 uint32_t reserved73; /* ordinal250 */
4622 uint32_t reserved74; /* ordinal251 */
4623 uint32_t reserved75; /* ordinal252 */
4624 uint32_t reserved76; /* ordinal253 */
4625 uint32_t reserved77; /* ordinal254 */
4626 uint32_t reserved78; /* ordinal255 */
4627
4628 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4629 };
4630
4631 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4632 {
4633 int i, r;
4634
4635 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4636 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4637
4638 if (ring->mqd_obj) {
4639 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4640 if (unlikely(r != 0))
4641 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4642
4643 amdgpu_bo_unpin(ring->mqd_obj);
4644 amdgpu_bo_unreserve(ring->mqd_obj);
4645
4646 amdgpu_bo_unref(&ring->mqd_obj);
4647 ring->mqd_obj = NULL;
4648 }
4649 }
4650 }
4651
4652 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4653 {
4654 int r, i, j;
4655 u32 tmp;
4656 bool use_doorbell = true;
4657 u64 hqd_gpu_addr;
4658 u64 mqd_gpu_addr;
4659 u64 eop_gpu_addr;
4660 u64 wb_gpu_addr;
4661 u32 *buf;
4662 struct vi_mqd *mqd;
4663
4664 /* init the pipes */
4665 mutex_lock(&adev->srbm_mutex);
4666 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4667 int me = (i < 4) ? 1 : 2;
4668 int pipe = (i < 4) ? i : (i - 4);
4669
4670 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4671 eop_gpu_addr >>= 8;
4672
4673 vi_srbm_select(adev, me, pipe, 0, 0);
4674
4675 /* write the EOP addr */
4676 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4677 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4678
4679 /* set the VMID assigned */
4680 WREG32(mmCP_HQD_VMID, 0);
4681
4682 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4683 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4684 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4685 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4686 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4687 }
4688 vi_srbm_select(adev, 0, 0, 0, 0);
4689 mutex_unlock(&adev->srbm_mutex);
4690
4691 /* init the queues. Just two for now. */
4692 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4693 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4694
4695 if (ring->mqd_obj == NULL) {
4696 r = amdgpu_bo_create(adev,
4697 sizeof(struct vi_mqd),
4698 PAGE_SIZE, true,
4699 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4700 NULL, &ring->mqd_obj);
4701 if (r) {
4702 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4703 return r;
4704 }
4705 }
4706
4707 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4708 if (unlikely(r != 0)) {
4709 gfx_v8_0_cp_compute_fini(adev);
4710 return r;
4711 }
4712 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4713 &mqd_gpu_addr);
4714 if (r) {
4715 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4716 gfx_v8_0_cp_compute_fini(adev);
4717 return r;
4718 }
4719 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4720 if (r) {
4721 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4722 gfx_v8_0_cp_compute_fini(adev);
4723 return r;
4724 }
4725
4726 /* init the mqd struct */
4727 memset(buf, 0, sizeof(struct vi_mqd));
4728
4729 mqd = (struct vi_mqd *)buf;
4730 mqd->header = 0xC0310800;
4731 mqd->compute_pipelinestat_enable = 0x00000001;
4732 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4733 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4734 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4735 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4736 mqd->compute_misc_reserved = 0x00000003;
4737
4738 mutex_lock(&adev->srbm_mutex);
4739 vi_srbm_select(adev, ring->me,
4740 ring->pipe,
4741 ring->queue, 0);
4742
4743 /* disable wptr polling */
4744 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4745 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4746 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4747
4748 mqd->cp_hqd_eop_base_addr_lo =
4749 RREG32(mmCP_HQD_EOP_BASE_ADDR);
4750 mqd->cp_hqd_eop_base_addr_hi =
4751 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4752
4753 /* enable doorbell? */
4754 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4755 if (use_doorbell) {
4756 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4757 } else {
4758 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4759 }
4760 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4761 mqd->cp_hqd_pq_doorbell_control = tmp;
4762
4763 /* disable the queue if it's active */
4764 mqd->cp_hqd_dequeue_request = 0;
4765 mqd->cp_hqd_pq_rptr = 0;
4766 mqd->cp_hqd_pq_wptr= 0;
4767 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4768 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4769 for (j = 0; j < adev->usec_timeout; j++) {
4770 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4771 break;
4772 udelay(1);
4773 }
4774 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4775 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4776 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4777 }
4778
4779 /* set the pointer to the MQD */
4780 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4781 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4782 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4783 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4784
4785 /* set MQD vmid to 0 */
4786 tmp = RREG32(mmCP_MQD_CONTROL);
4787 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4788 WREG32(mmCP_MQD_CONTROL, tmp);
4789 mqd->cp_mqd_control = tmp;
4790
4791 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4792 hqd_gpu_addr = ring->gpu_addr >> 8;
4793 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4794 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4795 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4796 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4797
4798 /* set up the HQD, this is similar to CP_RB0_CNTL */
4799 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4800 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4801 (order_base_2(ring->ring_size / 4) - 1));
4802 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4803 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4804 #ifdef __BIG_ENDIAN
4805 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4806 #endif
4807 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4808 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4809 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4810 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4811 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4812 mqd->cp_hqd_pq_control = tmp;
4813
4814 /* set the wb address wether it's enabled or not */
4815 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4816 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4817 mqd->cp_hqd_pq_rptr_report_addr_hi =
4818 upper_32_bits(wb_gpu_addr) & 0xffff;
4819 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4820 mqd->cp_hqd_pq_rptr_report_addr_lo);
4821 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4822 mqd->cp_hqd_pq_rptr_report_addr_hi);
4823
4824 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4825 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4826 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4827 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4828 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4829 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4830 mqd->cp_hqd_pq_wptr_poll_addr_hi);
4831
4832 /* enable the doorbell if requested */
4833 if (use_doorbell) {
4834 if ((adev->asic_type == CHIP_CARRIZO) ||
4835 (adev->asic_type == CHIP_FIJI) ||
4836 (adev->asic_type == CHIP_STONEY) ||
4837 (adev->asic_type == CHIP_POLARIS11) ||
4838 (adev->asic_type == CHIP_POLARIS10)) {
4839 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4840 AMDGPU_DOORBELL_KIQ << 2);
4841 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4842 AMDGPU_DOORBELL_MEC_RING7 << 2);
4843 }
4844 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4845 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4846 DOORBELL_OFFSET, ring->doorbell_index);
4847 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4848 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4849 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4850 mqd->cp_hqd_pq_doorbell_control = tmp;
4851
4852 } else {
4853 mqd->cp_hqd_pq_doorbell_control = 0;
4854 }
4855 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4856 mqd->cp_hqd_pq_doorbell_control);
4857
4858 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4859 ring->wptr = 0;
4860 mqd->cp_hqd_pq_wptr = ring->wptr;
4861 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4862 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4863
4864 /* set the vmid for the queue */
4865 mqd->cp_hqd_vmid = 0;
4866 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4867
4868 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4869 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4870 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4871 mqd->cp_hqd_persistent_state = tmp;
4872 if (adev->asic_type == CHIP_STONEY ||
4873 adev->asic_type == CHIP_POLARIS11 ||
4874 adev->asic_type == CHIP_POLARIS10) {
4875 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4876 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4877 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4878 }
4879
4880 /* activate the queue */
4881 mqd->cp_hqd_active = 1;
4882 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4883
4884 vi_srbm_select(adev, 0, 0, 0, 0);
4885 mutex_unlock(&adev->srbm_mutex);
4886
4887 amdgpu_bo_kunmap(ring->mqd_obj);
4888 amdgpu_bo_unreserve(ring->mqd_obj);
4889 }
4890
4891 if (use_doorbell) {
4892 tmp = RREG32(mmCP_PQ_STATUS);
4893 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4894 WREG32(mmCP_PQ_STATUS, tmp);
4895 }
4896
4897 gfx_v8_0_cp_compute_enable(adev, true);
4898
4899 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4900 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4901
4902 ring->ready = true;
4903 r = amdgpu_ring_test_ring(ring);
4904 if (r)
4905 ring->ready = false;
4906 }
4907
4908 return 0;
4909 }
4910
4911 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4912 {
4913 int r;
4914
4915 if (!(adev->flags & AMD_IS_APU))
4916 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4917
4918 if (!adev->pp_enabled) {
4919 if (!adev->firmware.smu_load) {
4920 /* legacy firmware loading */
4921 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4922 if (r)
4923 return r;
4924
4925 r = gfx_v8_0_cp_compute_load_microcode(adev);
4926 if (r)
4927 return r;
4928 } else {
4929 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4930 AMDGPU_UCODE_ID_CP_CE);
4931 if (r)
4932 return -EINVAL;
4933
4934 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4935 AMDGPU_UCODE_ID_CP_PFP);
4936 if (r)
4937 return -EINVAL;
4938
4939 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4940 AMDGPU_UCODE_ID_CP_ME);
4941 if (r)
4942 return -EINVAL;
4943
4944 if (adev->asic_type == CHIP_TOPAZ) {
4945 r = gfx_v8_0_cp_compute_load_microcode(adev);
4946 if (r)
4947 return r;
4948 } else {
4949 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4950 AMDGPU_UCODE_ID_CP_MEC1);
4951 if (r)
4952 return -EINVAL;
4953 }
4954 }
4955 }
4956
4957 r = gfx_v8_0_cp_gfx_resume(adev);
4958 if (r)
4959 return r;
4960
4961 r = gfx_v8_0_cp_compute_resume(adev);
4962 if (r)
4963 return r;
4964
4965 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4966
4967 return 0;
4968 }
4969
4970 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4971 {
4972 gfx_v8_0_cp_gfx_enable(adev, enable);
4973 gfx_v8_0_cp_compute_enable(adev, enable);
4974 }
4975
4976 static int gfx_v8_0_hw_init(void *handle)
4977 {
4978 int r;
4979 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4980
4981 gfx_v8_0_init_golden_registers(adev);
4982
4983 gfx_v8_0_gpu_init(adev);
4984
4985 r = gfx_v8_0_rlc_resume(adev);
4986 if (r)
4987 return r;
4988
4989 r = gfx_v8_0_cp_resume(adev);
4990 if (r)
4991 return r;
4992
4993 return r;
4994 }
4995
4996 static int gfx_v8_0_hw_fini(void *handle)
4997 {
4998 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4999
5000 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5001 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5002 gfx_v8_0_cp_enable(adev, false);
5003 gfx_v8_0_rlc_stop(adev);
5004 gfx_v8_0_cp_compute_fini(adev);
5005
5006 amdgpu_set_powergating_state(adev,
5007 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5008
5009 return 0;
5010 }
5011
5012 static int gfx_v8_0_suspend(void *handle)
5013 {
5014 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5015
5016 return gfx_v8_0_hw_fini(adev);
5017 }
5018
5019 static int gfx_v8_0_resume(void *handle)
5020 {
5021 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5022
5023 return gfx_v8_0_hw_init(adev);
5024 }
5025
5026 static bool gfx_v8_0_is_idle(void *handle)
5027 {
5028 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5029
5030 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5031 return false;
5032 else
5033 return true;
5034 }
5035
5036 static int gfx_v8_0_wait_for_idle(void *handle)
5037 {
5038 unsigned i;
5039 u32 tmp;
5040 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5041
5042 for (i = 0; i < adev->usec_timeout; i++) {
5043 /* read MC_STATUS */
5044 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
5045
5046 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5047 return 0;
5048 udelay(1);
5049 }
5050 return -ETIMEDOUT;
5051 }
5052
5053 static int gfx_v8_0_soft_reset(void *handle)
5054 {
5055 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5056 u32 tmp;
5057 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5058
5059 /* GRBM_STATUS */
5060 tmp = RREG32(mmGRBM_STATUS);
5061 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5062 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5063 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5064 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5065 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5066 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
5067 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5068 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5069 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5070 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5071 }
5072
5073 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5074 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5075 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5076 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5077 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5078 }
5079
5080 /* GRBM_STATUS2 */
5081 tmp = RREG32(mmGRBM_STATUS2);
5082 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5083 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5084 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5085
5086 /* SRBM_STATUS */
5087 tmp = RREG32(mmSRBM_STATUS);
5088 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5089 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5090 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5091
5092 if (grbm_soft_reset || srbm_soft_reset) {
5093 /* stop the rlc */
5094 gfx_v8_0_rlc_stop(adev);
5095
5096 /* Disable GFX parsing/prefetching */
5097 gfx_v8_0_cp_gfx_enable(adev, false);
5098
5099 /* Disable MEC parsing/prefetching */
5100 gfx_v8_0_cp_compute_enable(adev, false);
5101
5102 if (grbm_soft_reset || srbm_soft_reset) {
5103 tmp = RREG32(mmGMCON_DEBUG);
5104 tmp = REG_SET_FIELD(tmp,
5105 GMCON_DEBUG, GFX_STALL, 1);
5106 tmp = REG_SET_FIELD(tmp,
5107 GMCON_DEBUG, GFX_CLEAR, 1);
5108 WREG32(mmGMCON_DEBUG, tmp);
5109
5110 udelay(50);
5111 }
5112
5113 if (grbm_soft_reset) {
5114 tmp = RREG32(mmGRBM_SOFT_RESET);
5115 tmp |= grbm_soft_reset;
5116 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5117 WREG32(mmGRBM_SOFT_RESET, tmp);
5118 tmp = RREG32(mmGRBM_SOFT_RESET);
5119
5120 udelay(50);
5121
5122 tmp &= ~grbm_soft_reset;
5123 WREG32(mmGRBM_SOFT_RESET, tmp);
5124 tmp = RREG32(mmGRBM_SOFT_RESET);
5125 }
5126
5127 if (srbm_soft_reset) {
5128 tmp = RREG32(mmSRBM_SOFT_RESET);
5129 tmp |= srbm_soft_reset;
5130 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5131 WREG32(mmSRBM_SOFT_RESET, tmp);
5132 tmp = RREG32(mmSRBM_SOFT_RESET);
5133
5134 udelay(50);
5135
5136 tmp &= ~srbm_soft_reset;
5137 WREG32(mmSRBM_SOFT_RESET, tmp);
5138 tmp = RREG32(mmSRBM_SOFT_RESET);
5139 }
5140
5141 if (grbm_soft_reset || srbm_soft_reset) {
5142 tmp = RREG32(mmGMCON_DEBUG);
5143 tmp = REG_SET_FIELD(tmp,
5144 GMCON_DEBUG, GFX_STALL, 0);
5145 tmp = REG_SET_FIELD(tmp,
5146 GMCON_DEBUG, GFX_CLEAR, 0);
5147 WREG32(mmGMCON_DEBUG, tmp);
5148 }
5149
5150 /* Wait a little for things to settle down */
5151 udelay(50);
5152 }
5153 return 0;
5154 }
5155
5156 /**
5157 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5158 *
5159 * @adev: amdgpu_device pointer
5160 *
5161 * Fetches a GPU clock counter snapshot.
5162 * Returns the 64 bit clock counter snapshot.
5163 */
5164 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5165 {
5166 uint64_t clock;
5167
5168 mutex_lock(&adev->gfx.gpu_clock_mutex);
5169 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5170 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5171 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5172 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5173 return clock;
5174 }
5175
5176 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5177 uint32_t vmid,
5178 uint32_t gds_base, uint32_t gds_size,
5179 uint32_t gws_base, uint32_t gws_size,
5180 uint32_t oa_base, uint32_t oa_size)
5181 {
5182 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5183 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5184
5185 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5186 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5187
5188 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5189 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5190
5191 /* GDS Base */
5192 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5193 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5194 WRITE_DATA_DST_SEL(0)));
5195 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5196 amdgpu_ring_write(ring, 0);
5197 amdgpu_ring_write(ring, gds_base);
5198
5199 /* GDS Size */
5200 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5201 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5202 WRITE_DATA_DST_SEL(0)));
5203 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5204 amdgpu_ring_write(ring, 0);
5205 amdgpu_ring_write(ring, gds_size);
5206
5207 /* GWS */
5208 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5209 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5210 WRITE_DATA_DST_SEL(0)));
5211 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5212 amdgpu_ring_write(ring, 0);
5213 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5214
5215 /* OA */
5216 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5217 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5218 WRITE_DATA_DST_SEL(0)));
5219 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5220 amdgpu_ring_write(ring, 0);
5221 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5222 }
5223
5224 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5225 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5226 .select_se_sh = &gfx_v8_0_select_se_sh,
5227 };
5228
5229 static int gfx_v8_0_early_init(void *handle)
5230 {
5231 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5232
5233 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5234 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5235 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5236 gfx_v8_0_set_ring_funcs(adev);
5237 gfx_v8_0_set_irq_funcs(adev);
5238 gfx_v8_0_set_gds_init(adev);
5239 gfx_v8_0_set_rlc_funcs(adev);
5240
5241 return 0;
5242 }
5243
5244 static int gfx_v8_0_late_init(void *handle)
5245 {
5246 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5247 int r;
5248
5249 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5250 if (r)
5251 return r;
5252
5253 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5254 if (r)
5255 return r;
5256
5257 /* requires IBs so do in late init after IB pool is initialized */
5258 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5259 if (r)
5260 return r;
5261
5262 amdgpu_set_powergating_state(adev,
5263 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5264
5265 return 0;
5266 }
5267
5268 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5269 bool enable)
5270 {
5271 uint32_t data, temp;
5272
5273 if (adev->asic_type == CHIP_POLARIS11)
5274 /* Send msg to SMU via Powerplay */
5275 amdgpu_set_powergating_state(adev,
5276 AMD_IP_BLOCK_TYPE_SMC,
5277 enable ?
5278 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5279
5280 temp = data = RREG32(mmRLC_PG_CNTL);
5281 /* Enable static MGPG */
5282 if (enable)
5283 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5284 else
5285 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5286
5287 if (temp != data)
5288 WREG32(mmRLC_PG_CNTL, data);
5289 }
5290
5291 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5292 bool enable)
5293 {
5294 uint32_t data, temp;
5295
5296 temp = data = RREG32(mmRLC_PG_CNTL);
5297 /* Enable dynamic MGPG */
5298 if (enable)
5299 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5300 else
5301 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5302
5303 if (temp != data)
5304 WREG32(mmRLC_PG_CNTL, data);
5305 }
5306
5307 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5308 bool enable)
5309 {
5310 uint32_t data, temp;
5311
5312 temp = data = RREG32(mmRLC_PG_CNTL);
5313 /* Enable quick PG */
5314 if (enable)
5315 data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5316 else
5317 data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5318
5319 if (temp != data)
5320 WREG32(mmRLC_PG_CNTL, data);
5321 }
5322
5323 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5324 bool enable)
5325 {
5326 u32 data, orig;
5327
5328 orig = data = RREG32(mmRLC_PG_CNTL);
5329
5330 if (enable)
5331 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5332 else
5333 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5334
5335 if (orig != data)
5336 WREG32(mmRLC_PG_CNTL, data);
5337 }
5338
5339 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5340 bool enable)
5341 {
5342 u32 data, orig;
5343
5344 orig = data = RREG32(mmRLC_PG_CNTL);
5345
5346 if (enable)
5347 data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5348 else
5349 data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5350
5351 if (orig != data)
5352 WREG32(mmRLC_PG_CNTL, data);
5353
5354 /* Read any GFX register to wake up GFX. */
5355 if (!enable)
5356 data = RREG32(mmDB_RENDER_CONTROL);
5357 }
5358
5359 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5360 bool enable)
5361 {
5362 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5363 cz_enable_gfx_cg_power_gating(adev, true);
5364 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5365 cz_enable_gfx_pipeline_power_gating(adev, true);
5366 } else {
5367 cz_enable_gfx_cg_power_gating(adev, false);
5368 cz_enable_gfx_pipeline_power_gating(adev, false);
5369 }
5370 }
5371
5372 static int gfx_v8_0_set_powergating_state(void *handle,
5373 enum amd_powergating_state state)
5374 {
5375 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5376 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5377
5378 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5379 return 0;
5380
5381 switch (adev->asic_type) {
5382 case CHIP_CARRIZO:
5383 case CHIP_STONEY:
5384 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5385 cz_update_gfx_cg_power_gating(adev, enable);
5386
5387 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5388 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5389 else
5390 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5391
5392 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5393 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5394 else
5395 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5396 break;
5397 case CHIP_POLARIS11:
5398 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5399 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5400 else
5401 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5402
5403 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5404 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5405 else
5406 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5407
5408 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5409 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5410 else
5411 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5412 break;
5413 default:
5414 break;
5415 }
5416
5417 return 0;
5418 }
5419
5420 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5421 uint32_t reg_addr, uint32_t cmd)
5422 {
5423 uint32_t data;
5424
5425 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5426
5427 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5428 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5429
5430 data = RREG32(mmRLC_SERDES_WR_CTRL);
5431 if (adev->asic_type == CHIP_STONEY)
5432 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5433 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5434 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5435 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5436 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5437 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5438 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5439 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5440 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5441 else
5442 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5443 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5444 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5445 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5446 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5447 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5448 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5449 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5450 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5451 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5452 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5453 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5454 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5455 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5456 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5457
5458 WREG32(mmRLC_SERDES_WR_CTRL, data);
5459 }
5460
5461 #define MSG_ENTER_RLC_SAFE_MODE 1
5462 #define MSG_EXIT_RLC_SAFE_MODE 0
5463
5464 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5465 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5466 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5467
5468 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5469 {
5470 u32 data = 0;
5471 unsigned i;
5472
5473 data = RREG32(mmRLC_CNTL);
5474 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5475 return;
5476
5477 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5478 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5479 AMD_PG_SUPPORT_GFX_DMG))) {
5480 data |= RLC_GPR_REG2__REQ_MASK;
5481 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5482 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5483 WREG32(mmRLC_GPR_REG2, data);
5484
5485 for (i = 0; i < adev->usec_timeout; i++) {
5486 if ((RREG32(mmRLC_GPM_STAT) &
5487 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5488 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5489 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5490 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5491 break;
5492 udelay(1);
5493 }
5494
5495 for (i = 0; i < adev->usec_timeout; i++) {
5496 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5497 break;
5498 udelay(1);
5499 }
5500 adev->gfx.rlc.in_safe_mode = true;
5501 }
5502 }
5503
5504 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5505 {
5506 u32 data;
5507 unsigned i;
5508
5509 data = RREG32(mmRLC_CNTL);
5510 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5511 return;
5512
5513 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5514 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5515 AMD_PG_SUPPORT_GFX_DMG))) {
5516 data |= RLC_GPR_REG2__REQ_MASK;
5517 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5518 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5519 WREG32(mmRLC_GPR_REG2, data);
5520 adev->gfx.rlc.in_safe_mode = false;
5521 }
5522
5523 for (i = 0; i < adev->usec_timeout; i++) {
5524 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5525 break;
5526 udelay(1);
5527 }
5528 }
5529
5530 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5531 {
5532 u32 data;
5533 unsigned i;
5534
5535 data = RREG32(mmRLC_CNTL);
5536 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5537 return;
5538
5539 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5540 data |= RLC_SAFE_MODE__CMD_MASK;
5541 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5542 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5543 WREG32(mmRLC_SAFE_MODE, data);
5544
5545 for (i = 0; i < adev->usec_timeout; i++) {
5546 if ((RREG32(mmRLC_GPM_STAT) &
5547 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5548 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5549 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5550 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5551 break;
5552 udelay(1);
5553 }
5554
5555 for (i = 0; i < adev->usec_timeout; i++) {
5556 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5557 break;
5558 udelay(1);
5559 }
5560 adev->gfx.rlc.in_safe_mode = true;
5561 }
5562 }
5563
5564 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5565 {
5566 u32 data = 0;
5567 unsigned i;
5568
5569 data = RREG32(mmRLC_CNTL);
5570 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5571 return;
5572
5573 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5574 if (adev->gfx.rlc.in_safe_mode) {
5575 data |= RLC_SAFE_MODE__CMD_MASK;
5576 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5577 WREG32(mmRLC_SAFE_MODE, data);
5578 adev->gfx.rlc.in_safe_mode = false;
5579 }
5580 }
5581
5582 for (i = 0; i < adev->usec_timeout; i++) {
5583 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5584 break;
5585 udelay(1);
5586 }
5587 }
5588
5589 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5590 {
5591 adev->gfx.rlc.in_safe_mode = true;
5592 }
5593
5594 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5595 {
5596 adev->gfx.rlc.in_safe_mode = false;
5597 }
5598
5599 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5600 .enter_safe_mode = cz_enter_rlc_safe_mode,
5601 .exit_safe_mode = cz_exit_rlc_safe_mode
5602 };
5603
5604 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5605 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5606 .exit_safe_mode = iceland_exit_rlc_safe_mode
5607 };
5608
5609 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5610 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5611 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5612 };
5613
5614 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5615 bool enable)
5616 {
5617 uint32_t temp, data;
5618
5619 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5620
5621 /* It is disabled by HW by default */
5622 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5623 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5624 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5625 /* 1 - RLC memory Light sleep */
5626 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5627 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5628 if (temp != data)
5629 WREG32(mmRLC_MEM_SLP_CNTL, data);
5630 }
5631
5632 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5633 /* 2 - CP memory Light sleep */
5634 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5635 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5636 if (temp != data)
5637 WREG32(mmCP_MEM_SLP_CNTL, data);
5638 }
5639 }
5640
5641 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5642 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5643 if (adev->flags & AMD_IS_APU)
5644 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5645 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5646 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5647 else
5648 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5649 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5650 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5651 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5652
5653 if (temp != data)
5654 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5655
5656 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5657 gfx_v8_0_wait_for_rlc_serdes(adev);
5658
5659 /* 5 - clear mgcg override */
5660 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5661
5662 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5663 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5664 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5665 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5666 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5667 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5668 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5669 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5670 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5671 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5672 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5673 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5674 if (temp != data)
5675 WREG32(mmCGTS_SM_CTRL_REG, data);
5676 }
5677 udelay(50);
5678
5679 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5680 gfx_v8_0_wait_for_rlc_serdes(adev);
5681 } else {
5682 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5683 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5684 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5685 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5686 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5687 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5688 if (temp != data)
5689 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5690
5691 /* 2 - disable MGLS in RLC */
5692 data = RREG32(mmRLC_MEM_SLP_CNTL);
5693 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5694 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5695 WREG32(mmRLC_MEM_SLP_CNTL, data);
5696 }
5697
5698 /* 3 - disable MGLS in CP */
5699 data = RREG32(mmCP_MEM_SLP_CNTL);
5700 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5701 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5702 WREG32(mmCP_MEM_SLP_CNTL, data);
5703 }
5704
5705 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5706 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5707 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5708 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5709 if (temp != data)
5710 WREG32(mmCGTS_SM_CTRL_REG, data);
5711
5712 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5713 gfx_v8_0_wait_for_rlc_serdes(adev);
5714
5715 /* 6 - set mgcg override */
5716 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5717
5718 udelay(50);
5719
5720 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5721 gfx_v8_0_wait_for_rlc_serdes(adev);
5722 }
5723
5724 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5725 }
5726
5727 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5728 bool enable)
5729 {
5730 uint32_t temp, temp1, data, data1;
5731
5732 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5733
5734 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5735
5736 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5737 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5738 * Cmp_busy/GFX_Idle interrupts
5739 */
5740 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5741
5742 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5743 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5744 if (temp1 != data1)
5745 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5746
5747 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5748 gfx_v8_0_wait_for_rlc_serdes(adev);
5749
5750 /* 3 - clear cgcg override */
5751 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5752
5753 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5754 gfx_v8_0_wait_for_rlc_serdes(adev);
5755
5756 /* 4 - write cmd to set CGLS */
5757 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5758
5759 /* 5 - enable cgcg */
5760 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5761
5762 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5763 /* enable cgls*/
5764 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5765
5766 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5767 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5768
5769 if (temp1 != data1)
5770 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5771 } else {
5772 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5773 }
5774
5775 if (temp != data)
5776 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5777 } else {
5778 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5779 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5780
5781 /* TEST CGCG */
5782 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5783 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5784 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5785 if (temp1 != data1)
5786 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5787
5788 /* read gfx register to wake up cgcg */
5789 RREG32(mmCB_CGTT_SCLK_CTRL);
5790 RREG32(mmCB_CGTT_SCLK_CTRL);
5791 RREG32(mmCB_CGTT_SCLK_CTRL);
5792 RREG32(mmCB_CGTT_SCLK_CTRL);
5793
5794 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5795 gfx_v8_0_wait_for_rlc_serdes(adev);
5796
5797 /* write cmd to Set CGCG Overrride */
5798 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5799
5800 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5801 gfx_v8_0_wait_for_rlc_serdes(adev);
5802
5803 /* write cmd to Clear CGLS */
5804 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5805
5806 /* disable cgcg, cgls should be disabled too. */
5807 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5808 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5809 if (temp != data)
5810 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5811 }
5812
5813 gfx_v8_0_wait_for_rlc_serdes(adev);
5814
5815 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5816 }
5817 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5818 bool enable)
5819 {
5820 if (enable) {
5821 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5822 * === MGCG + MGLS + TS(CG/LS) ===
5823 */
5824 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5825 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5826 } else {
5827 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5828 * === CGCG + CGLS ===
5829 */
5830 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5831 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5832 }
5833 return 0;
5834 }
5835
5836 static int gfx_v8_0_set_clockgating_state(void *handle,
5837 enum amd_clockgating_state state)
5838 {
5839 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5840
5841 switch (adev->asic_type) {
5842 case CHIP_FIJI:
5843 case CHIP_CARRIZO:
5844 case CHIP_STONEY:
5845 gfx_v8_0_update_gfx_clock_gating(adev,
5846 state == AMD_CG_STATE_GATE ? true : false);
5847 break;
5848 default:
5849 break;
5850 }
5851 return 0;
5852 }
5853
5854 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5855 {
5856 u32 rptr;
5857
5858 rptr = ring->adev->wb.wb[ring->rptr_offs];
5859
5860 return rptr;
5861 }
5862
5863 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5864 {
5865 struct amdgpu_device *adev = ring->adev;
5866 u32 wptr;
5867
5868 if (ring->use_doorbell)
5869 /* XXX check if swapping is necessary on BE */
5870 wptr = ring->adev->wb.wb[ring->wptr_offs];
5871 else
5872 wptr = RREG32(mmCP_RB0_WPTR);
5873
5874 return wptr;
5875 }
5876
5877 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5878 {
5879 struct amdgpu_device *adev = ring->adev;
5880
5881 if (ring->use_doorbell) {
5882 /* XXX check if swapping is necessary on BE */
5883 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5884 WDOORBELL32(ring->doorbell_index, ring->wptr);
5885 } else {
5886 WREG32(mmCP_RB0_WPTR, ring->wptr);
5887 (void)RREG32(mmCP_RB0_WPTR);
5888 }
5889 }
5890
5891 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5892 {
5893 u32 ref_and_mask, reg_mem_engine;
5894
5895 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5896 switch (ring->me) {
5897 case 1:
5898 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5899 break;
5900 case 2:
5901 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5902 break;
5903 default:
5904 return;
5905 }
5906 reg_mem_engine = 0;
5907 } else {
5908 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5909 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5910 }
5911
5912 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5913 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5914 WAIT_REG_MEM_FUNCTION(3) | /* == */
5915 reg_mem_engine));
5916 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5917 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5918 amdgpu_ring_write(ring, ref_and_mask);
5919 amdgpu_ring_write(ring, ref_and_mask);
5920 amdgpu_ring_write(ring, 0x20); /* poll interval */
5921 }
5922
5923 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5924 {
5925 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5926 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5927 WRITE_DATA_DST_SEL(0) |
5928 WR_CONFIRM));
5929 amdgpu_ring_write(ring, mmHDP_DEBUG0);
5930 amdgpu_ring_write(ring, 0);
5931 amdgpu_ring_write(ring, 1);
5932
5933 }
5934
5935 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5936 struct amdgpu_ib *ib,
5937 unsigned vm_id, bool ctx_switch)
5938 {
5939 u32 header, control = 0;
5940
5941 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5942 if (ctx_switch) {
5943 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5944 amdgpu_ring_write(ring, 0);
5945 }
5946
5947 if (ib->flags & AMDGPU_IB_FLAG_CE)
5948 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5949 else
5950 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5951
5952 control |= ib->length_dw | (vm_id << 24);
5953
5954 amdgpu_ring_write(ring, header);
5955 amdgpu_ring_write(ring,
5956 #ifdef __BIG_ENDIAN
5957 (2 << 0) |
5958 #endif
5959 (ib->gpu_addr & 0xFFFFFFFC));
5960 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5961 amdgpu_ring_write(ring, control);
5962 }
5963
5964 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5965 struct amdgpu_ib *ib,
5966 unsigned vm_id, bool ctx_switch)
5967 {
5968 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
5969
5970 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5971 amdgpu_ring_write(ring,
5972 #ifdef __BIG_ENDIAN
5973 (2 << 0) |
5974 #endif
5975 (ib->gpu_addr & 0xFFFFFFFC));
5976 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5977 amdgpu_ring_write(ring, control);
5978 }
5979
5980 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5981 u64 seq, unsigned flags)
5982 {
5983 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5984 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5985
5986 /* EVENT_WRITE_EOP - flush caches, send int */
5987 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5988 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5989 EOP_TC_ACTION_EN |
5990 EOP_TC_WB_ACTION_EN |
5991 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5992 EVENT_INDEX(5)));
5993 amdgpu_ring_write(ring, addr & 0xfffffffc);
5994 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5995 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5996 amdgpu_ring_write(ring, lower_32_bits(seq));
5997 amdgpu_ring_write(ring, upper_32_bits(seq));
5998
5999 }
6000
6001 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6002 {
6003 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6004 uint32_t seq = ring->fence_drv.sync_seq;
6005 uint64_t addr = ring->fence_drv.gpu_addr;
6006
6007 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6008 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6009 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6010 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6011 amdgpu_ring_write(ring, addr & 0xfffffffc);
6012 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6013 amdgpu_ring_write(ring, seq);
6014 amdgpu_ring_write(ring, 0xffffffff);
6015 amdgpu_ring_write(ring, 4); /* poll interval */
6016
6017 if (usepfp) {
6018 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
6019 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6020 amdgpu_ring_write(ring, 0);
6021 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6022 amdgpu_ring_write(ring, 0);
6023 }
6024 }
6025
6026 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6027 unsigned vm_id, uint64_t pd_addr)
6028 {
6029 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6030
6031 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6032 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6033 WRITE_DATA_DST_SEL(0)) |
6034 WR_CONFIRM);
6035 if (vm_id < 8) {
6036 amdgpu_ring_write(ring,
6037 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6038 } else {
6039 amdgpu_ring_write(ring,
6040 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6041 }
6042 amdgpu_ring_write(ring, 0);
6043 amdgpu_ring_write(ring, pd_addr >> 12);
6044
6045 /* bits 0-15 are the VM contexts0-15 */
6046 /* invalidate the cache */
6047 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6048 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6049 WRITE_DATA_DST_SEL(0)));
6050 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6051 amdgpu_ring_write(ring, 0);
6052 amdgpu_ring_write(ring, 1 << vm_id);
6053
6054 /* wait for the invalidate to complete */
6055 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6056 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6057 WAIT_REG_MEM_FUNCTION(0) | /* always */
6058 WAIT_REG_MEM_ENGINE(0))); /* me */
6059 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6060 amdgpu_ring_write(ring, 0);
6061 amdgpu_ring_write(ring, 0); /* ref */
6062 amdgpu_ring_write(ring, 0); /* mask */
6063 amdgpu_ring_write(ring, 0x20); /* poll interval */
6064
6065 /* compute doesn't have PFP */
6066 if (usepfp) {
6067 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6068 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6069 amdgpu_ring_write(ring, 0x0);
6070 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6071 amdgpu_ring_write(ring, 0);
6072 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6073 amdgpu_ring_write(ring, 0);
6074 }
6075 }
6076
6077 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
6078 {
6079 return ring->adev->wb.wb[ring->rptr_offs];
6080 }
6081
6082 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6083 {
6084 return ring->adev->wb.wb[ring->wptr_offs];
6085 }
6086
6087 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6088 {
6089 struct amdgpu_device *adev = ring->adev;
6090
6091 /* XXX check if swapping is necessary on BE */
6092 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6093 WDOORBELL32(ring->doorbell_index, ring->wptr);
6094 }
6095
6096 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6097 u64 addr, u64 seq,
6098 unsigned flags)
6099 {
6100 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6101 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6102
6103 /* RELEASE_MEM - flush caches, send int */
6104 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6105 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6106 EOP_TC_ACTION_EN |
6107 EOP_TC_WB_ACTION_EN |
6108 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6109 EVENT_INDEX(5)));
6110 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6111 amdgpu_ring_write(ring, addr & 0xfffffffc);
6112 amdgpu_ring_write(ring, upper_32_bits(addr));
6113 amdgpu_ring_write(ring, lower_32_bits(seq));
6114 amdgpu_ring_write(ring, upper_32_bits(seq));
6115 }
6116
6117 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6118 enum amdgpu_interrupt_state state)
6119 {
6120 u32 cp_int_cntl;
6121
6122 switch (state) {
6123 case AMDGPU_IRQ_STATE_DISABLE:
6124 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6125 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6126 TIME_STAMP_INT_ENABLE, 0);
6127 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6128 break;
6129 case AMDGPU_IRQ_STATE_ENABLE:
6130 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6131 cp_int_cntl =
6132 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6133 TIME_STAMP_INT_ENABLE, 1);
6134 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6135 break;
6136 default:
6137 break;
6138 }
6139 }
6140
6141 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6142 int me, int pipe,
6143 enum amdgpu_interrupt_state state)
6144 {
6145 u32 mec_int_cntl, mec_int_cntl_reg;
6146
6147 /*
6148 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6149 * handles the setting of interrupts for this specific pipe. All other
6150 * pipes' interrupts are set by amdkfd.
6151 */
6152
6153 if (me == 1) {
6154 switch (pipe) {
6155 case 0:
6156 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6157 break;
6158 default:
6159 DRM_DEBUG("invalid pipe %d\n", pipe);
6160 return;
6161 }
6162 } else {
6163 DRM_DEBUG("invalid me %d\n", me);
6164 return;
6165 }
6166
6167 switch (state) {
6168 case AMDGPU_IRQ_STATE_DISABLE:
6169 mec_int_cntl = RREG32(mec_int_cntl_reg);
6170 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6171 TIME_STAMP_INT_ENABLE, 0);
6172 WREG32(mec_int_cntl_reg, mec_int_cntl);
6173 break;
6174 case AMDGPU_IRQ_STATE_ENABLE:
6175 mec_int_cntl = RREG32(mec_int_cntl_reg);
6176 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6177 TIME_STAMP_INT_ENABLE, 1);
6178 WREG32(mec_int_cntl_reg, mec_int_cntl);
6179 break;
6180 default:
6181 break;
6182 }
6183 }
6184
6185 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6186 struct amdgpu_irq_src *source,
6187 unsigned type,
6188 enum amdgpu_interrupt_state state)
6189 {
6190 u32 cp_int_cntl;
6191
6192 switch (state) {
6193 case AMDGPU_IRQ_STATE_DISABLE:
6194 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6195 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6196 PRIV_REG_INT_ENABLE, 0);
6197 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6198 break;
6199 case AMDGPU_IRQ_STATE_ENABLE:
6200 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6201 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6202 PRIV_REG_INT_ENABLE, 1);
6203 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6204 break;
6205 default:
6206 break;
6207 }
6208
6209 return 0;
6210 }
6211
6212 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6213 struct amdgpu_irq_src *source,
6214 unsigned type,
6215 enum amdgpu_interrupt_state state)
6216 {
6217 u32 cp_int_cntl;
6218
6219 switch (state) {
6220 case AMDGPU_IRQ_STATE_DISABLE:
6221 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6222 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6223 PRIV_INSTR_INT_ENABLE, 0);
6224 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6225 break;
6226 case AMDGPU_IRQ_STATE_ENABLE:
6227 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6228 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6229 PRIV_INSTR_INT_ENABLE, 1);
6230 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6231 break;
6232 default:
6233 break;
6234 }
6235
6236 return 0;
6237 }
6238
6239 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6240 struct amdgpu_irq_src *src,
6241 unsigned type,
6242 enum amdgpu_interrupt_state state)
6243 {
6244 switch (type) {
6245 case AMDGPU_CP_IRQ_GFX_EOP:
6246 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6247 break;
6248 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6249 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6250 break;
6251 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6252 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6253 break;
6254 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6255 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6256 break;
6257 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6258 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6259 break;
6260 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6261 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6262 break;
6263 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6264 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6265 break;
6266 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6267 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6268 break;
6269 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6270 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6271 break;
6272 default:
6273 break;
6274 }
6275 return 0;
6276 }
6277
6278 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6279 struct amdgpu_irq_src *source,
6280 struct amdgpu_iv_entry *entry)
6281 {
6282 int i;
6283 u8 me_id, pipe_id, queue_id;
6284 struct amdgpu_ring *ring;
6285
6286 DRM_DEBUG("IH: CP EOP\n");
6287 me_id = (entry->ring_id & 0x0c) >> 2;
6288 pipe_id = (entry->ring_id & 0x03) >> 0;
6289 queue_id = (entry->ring_id & 0x70) >> 4;
6290
6291 switch (me_id) {
6292 case 0:
6293 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6294 break;
6295 case 1:
6296 case 2:
6297 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6298 ring = &adev->gfx.compute_ring[i];
6299 /* Per-queue interrupt is supported for MEC starting from VI.
6300 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6301 */
6302 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6303 amdgpu_fence_process(ring);
6304 }
6305 break;
6306 }
6307 return 0;
6308 }
6309
6310 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6311 struct amdgpu_irq_src *source,
6312 struct amdgpu_iv_entry *entry)
6313 {
6314 DRM_ERROR("Illegal register access in command stream\n");
6315 schedule_work(&adev->reset_work);
6316 return 0;
6317 }
6318
6319 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6320 struct amdgpu_irq_src *source,
6321 struct amdgpu_iv_entry *entry)
6322 {
6323 DRM_ERROR("Illegal instruction in command stream\n");
6324 schedule_work(&adev->reset_work);
6325 return 0;
6326 }
6327
6328 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6329 .name = "gfx_v8_0",
6330 .early_init = gfx_v8_0_early_init,
6331 .late_init = gfx_v8_0_late_init,
6332 .sw_init = gfx_v8_0_sw_init,
6333 .sw_fini = gfx_v8_0_sw_fini,
6334 .hw_init = gfx_v8_0_hw_init,
6335 .hw_fini = gfx_v8_0_hw_fini,
6336 .suspend = gfx_v8_0_suspend,
6337 .resume = gfx_v8_0_resume,
6338 .is_idle = gfx_v8_0_is_idle,
6339 .wait_for_idle = gfx_v8_0_wait_for_idle,
6340 .soft_reset = gfx_v8_0_soft_reset,
6341 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6342 .set_powergating_state = gfx_v8_0_set_powergating_state,
6343 };
6344
6345 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6346 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6347 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6348 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6349 .parse_cs = NULL,
6350 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6351 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6352 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6353 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6354 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6355 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6356 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6357 .test_ring = gfx_v8_0_ring_test_ring,
6358 .test_ib = gfx_v8_0_ring_test_ib,
6359 .insert_nop = amdgpu_ring_insert_nop,
6360 .pad_ib = amdgpu_ring_generic_pad_ib,
6361 };
6362
6363 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6364 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6365 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6366 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6367 .parse_cs = NULL,
6368 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6369 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6370 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6371 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6372 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6373 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6374 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6375 .test_ring = gfx_v8_0_ring_test_ring,
6376 .test_ib = gfx_v8_0_ring_test_ib,
6377 .insert_nop = amdgpu_ring_insert_nop,
6378 .pad_ib = amdgpu_ring_generic_pad_ib,
6379 };
6380
6381 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6382 {
6383 int i;
6384
6385 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6386 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6387
6388 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6389 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6390 }
6391
6392 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6393 .set = gfx_v8_0_set_eop_interrupt_state,
6394 .process = gfx_v8_0_eop_irq,
6395 };
6396
6397 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6398 .set = gfx_v8_0_set_priv_reg_fault_state,
6399 .process = gfx_v8_0_priv_reg_irq,
6400 };
6401
6402 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6403 .set = gfx_v8_0_set_priv_inst_fault_state,
6404 .process = gfx_v8_0_priv_inst_irq,
6405 };
6406
6407 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6408 {
6409 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6410 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6411
6412 adev->gfx.priv_reg_irq.num_types = 1;
6413 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6414
6415 adev->gfx.priv_inst_irq.num_types = 1;
6416 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6417 }
6418
6419 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6420 {
6421 switch (adev->asic_type) {
6422 case CHIP_TOPAZ:
6423 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6424 break;
6425 case CHIP_STONEY:
6426 case CHIP_CARRIZO:
6427 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6428 break;
6429 default:
6430 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6431 break;
6432 }
6433 }
6434
6435 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6436 {
6437 /* init asci gds info */
6438 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6439 adev->gds.gws.total_size = 64;
6440 adev->gds.oa.total_size = 16;
6441
6442 if (adev->gds.mem.total_size == 64 * 1024) {
6443 adev->gds.mem.gfx_partition_size = 4096;
6444 adev->gds.mem.cs_partition_size = 4096;
6445
6446 adev->gds.gws.gfx_partition_size = 4;
6447 adev->gds.gws.cs_partition_size = 4;
6448
6449 adev->gds.oa.gfx_partition_size = 4;
6450 adev->gds.oa.cs_partition_size = 1;
6451 } else {
6452 adev->gds.mem.gfx_partition_size = 1024;
6453 adev->gds.mem.cs_partition_size = 1024;
6454
6455 adev->gds.gws.gfx_partition_size = 16;
6456 adev->gds.gws.cs_partition_size = 16;
6457
6458 adev->gds.oa.gfx_partition_size = 4;
6459 adev->gds.oa.cs_partition_size = 4;
6460 }
6461 }
6462
6463 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6464 u32 bitmap)
6465 {
6466 u32 data;
6467
6468 if (!bitmap)
6469 return;
6470
6471 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6472 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6473
6474 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6475 }
6476
6477 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6478 {
6479 u32 data, mask;
6480
6481 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6482 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6483
6484 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6485 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6486
6487 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6488
6489 return (~data) & mask;
6490 }
6491
6492 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6493 {
6494 int i, j, k, counter, active_cu_number = 0;
6495 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6496 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6497 unsigned disable_masks[4 * 2];
6498
6499 memset(cu_info, 0, sizeof(*cu_info));
6500
6501 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6502
6503 mutex_lock(&adev->grbm_idx_mutex);
6504 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6505 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6506 mask = 1;
6507 ao_bitmap = 0;
6508 counter = 0;
6509 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6510 if (i < 4 && j < 2)
6511 gfx_v8_0_set_user_cu_inactive_bitmap(
6512 adev, disable_masks[i * 2 + j]);
6513 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6514 cu_info->bitmap[i][j] = bitmap;
6515
6516 for (k = 0; k < 16; k ++) {
6517 if (bitmap & mask) {
6518 if (counter < 2)
6519 ao_bitmap |= mask;
6520 counter ++;
6521 }
6522 mask <<= 1;
6523 }
6524 active_cu_number += counter;
6525 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6526 }
6527 }
6528 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6529 mutex_unlock(&adev->grbm_idx_mutex);
6530
6531 cu_info->number = active_cu_number;
6532 cu_info->ao_cu_mask = ao_cu_mask;
6533 }
This page took 0.245371 seconds and 5 git commands to generate.