drm/amdgpu/gfx: clean up harvest configuration (v2)
[deliverable/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
28#include "vid.h"
29#include "amdgpu_ucode.h"
30#include "clearstate_vi.h"
31
32#include "gmc/gmc_8_2_d.h"
33#include "gmc/gmc_8_2_sh_mask.h"
34
35#include "oss/oss_3_0_d.h"
36#include "oss/oss_3_0_sh_mask.h"
37
38#include "bif/bif_5_0_d.h"
39#include "bif/bif_5_0_sh_mask.h"
40
41#include "gca/gfx_8_0_d.h"
42#include "gca/gfx_8_0_enum.h"
43#include "gca/gfx_8_0_sh_mask.h"
44#include "gca/gfx_8_0_enum.h"
45
46#include "uvd/uvd_5_0_d.h"
47#include "uvd/uvd_5_0_sh_mask.h"
48
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
52#define GFX8_NUM_GFX_RINGS 1
53#define GFX8_NUM_COMPUTE_RINGS 8
54
55#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58
59#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68
6e378858
EH
69#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
70#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
71#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
72#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
73#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
74#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
75
76/* BPM SERDES CMD */
77#define SET_BPM_SERDES_CMD 1
78#define CLE_BPM_SERDES_CMD 0
79
80/* BPM Register Address*/
81enum {
82 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
83 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
84 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
85 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
86 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
87 BPM_REG_FGCG_MAX
88};
89
c65444fe
JZ
90MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
91MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
92MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
93MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
94MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
95MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
96
e3c7656c
SL
97MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
98MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
99MODULE_FIRMWARE("amdgpu/stoney_me.bin");
100MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
101MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
102
c65444fe
JZ
103MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
104MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
105MODULE_FIRMWARE("amdgpu/tonga_me.bin");
106MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
107MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
108MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
109
110MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
111MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
112MODULE_FIRMWARE("amdgpu/topaz_me.bin");
113MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 114MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 115
af15a2d5
DZ
116MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
117MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
118MODULE_FIRMWARE("amdgpu/fiji_me.bin");
119MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
120MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
121MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
122
aaa36a97
AD
123static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
124{
125 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
126 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
127 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
128 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
129 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
130 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
131 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
132 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
133 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
134 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
135 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
136 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
137 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
138 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
139 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
140 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
141};
142
143static const u32 golden_settings_tonga_a11[] =
144{
145 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
146 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
147 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
148 mmGB_GPU_ID, 0x0000000f, 0x00000000,
149 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
150 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
151 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
6a00a09e 152 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
153 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
154 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 155 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
156 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
157 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
158 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 159 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
160};
161
162static const u32 tonga_golden_common_all[] =
163{
164 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
165 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
166 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
167 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
168 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
169 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
170 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
171 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
172};
173
174static const u32 tonga_mgcg_cgcg_init[] =
175{
176 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
177 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
178 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
179 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
180 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
181 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
182 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
183 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
184 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
185 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
186 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
187 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
188 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
189 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
190 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
191 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
192 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
193 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
194 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
195 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
196 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
197 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
198 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
199 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
200 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
201 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
202 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
203 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
204 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
206 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
215 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
220 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
225 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
230 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
235 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
240 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
245 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
248 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
249 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
250 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
251};
252
af15a2d5
DZ
253static const u32 fiji_golden_common_all[] =
254{
255 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
257 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 258 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
259 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
260 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
261 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
262 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
263 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
264 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
265};
266
267static const u32 golden_settings_fiji_a10[] =
268{
269 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
270 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
271 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 272 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
273 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
274 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 275 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
276 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
277 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 278 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 279 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
280};
281
282static const u32 fiji_mgcg_cgcg_init[] =
283{
a7ca8ef9 284 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
285 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
286 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
287 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
288 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
289 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
290 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
291 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
292 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
293 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
294 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
295 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
296 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
297 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
298 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
299 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
300 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
301 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
302 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
303 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
304 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
305 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
306 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
307 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
308 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
309 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
310 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
311 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
312 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
313 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
314 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
315 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
316 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
317 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
318 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
319};
320
aaa36a97
AD
321static const u32 golden_settings_iceland_a11[] =
322{
323 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
324 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
325 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
326 mmGB_GPU_ID, 0x0000000f, 0x00000000,
327 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
328 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
329 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
330 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
6a00a09e 331 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
332 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
333 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 334 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
335 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
336 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
337 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
338};
339
340static const u32 iceland_golden_common_all[] =
341{
342 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
343 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
344 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
345 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
346 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
347 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
348 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
349 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
350};
351
352static const u32 iceland_mgcg_cgcg_init[] =
353{
354 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
355 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
356 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
357 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
358 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
359 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
360 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
361 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
362 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
363 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
364 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
365 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
366 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
367 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
368 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
370 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
372 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
373 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
374 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
375 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
376 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
377 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
379 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
380 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
381 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
382 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
383 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
384 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
385 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
386 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
387 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
388 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
389 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
390 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
391 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
392 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
393 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
394 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
395 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
396 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
397 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
398 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
399 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
400 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
401 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
402 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
403 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
404 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
405 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
406 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
407 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
408 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
409 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
410 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
411 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
412 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
413 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
414 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
415 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
416 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
417 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
418};
419
420static const u32 cz_golden_settings_a11[] =
421{
422 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
423 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
424 mmGB_GPU_ID, 0x0000000f, 0x00000000,
425 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
426 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
6a00a09e 427 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 428 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
6a00a09e 429 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
430 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
431 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
432};
433
434static const u32 cz_golden_common_all[] =
435{
436 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
438 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
439 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
440 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
441 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
442 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
443 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
444};
445
446static const u32 cz_mgcg_cgcg_init[] =
447{
448 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
449 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
457 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
459 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
464 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
468 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
469 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
470 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
471 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
472 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
473 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
474 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
475 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
476 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
477 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
478 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
479 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
480 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
481 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
482 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
483 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
484 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
485 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
486 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
487 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
488 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
489 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
490 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
491 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
492 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
493 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
494 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
495 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
496 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
497 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
498 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
499 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
500 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
501 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
502 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
503 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
504 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
505 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
506 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
507 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
508 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
509 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
510 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
511 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
512 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
513 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
514 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
515 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
516 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
517 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
518 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
519 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
520 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
521 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
522 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
523};
524
e3c7656c
SL
525static const u32 stoney_golden_settings_a11[] =
526{
527 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
528 mmGB_GPU_ID, 0x0000000f, 0x00000000,
529 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
530 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
531 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
532 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
533 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
534 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
535 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
536 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
537};
538
539static const u32 stoney_golden_common_all[] =
540{
541 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
542 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
543 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
544 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
545 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
546 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
547 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
548 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
549};
550
551static const u32 stoney_mgcg_cgcg_init[] =
552{
553 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
554 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
555 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
556 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
557 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
558 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
559};
560
aaa36a97
AD
561static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
562static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
563static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
564
565static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
566{
567 switch (adev->asic_type) {
568 case CHIP_TOPAZ:
569 amdgpu_program_register_sequence(adev,
570 iceland_mgcg_cgcg_init,
571 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
572 amdgpu_program_register_sequence(adev,
573 golden_settings_iceland_a11,
574 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
575 amdgpu_program_register_sequence(adev,
576 iceland_golden_common_all,
577 (const u32)ARRAY_SIZE(iceland_golden_common_all));
578 break;
af15a2d5
DZ
579 case CHIP_FIJI:
580 amdgpu_program_register_sequence(adev,
581 fiji_mgcg_cgcg_init,
582 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
583 amdgpu_program_register_sequence(adev,
584 golden_settings_fiji_a10,
585 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
586 amdgpu_program_register_sequence(adev,
587 fiji_golden_common_all,
588 (const u32)ARRAY_SIZE(fiji_golden_common_all));
589 break;
590
aaa36a97
AD
591 case CHIP_TONGA:
592 amdgpu_program_register_sequence(adev,
593 tonga_mgcg_cgcg_init,
594 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
595 amdgpu_program_register_sequence(adev,
596 golden_settings_tonga_a11,
597 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
598 amdgpu_program_register_sequence(adev,
599 tonga_golden_common_all,
600 (const u32)ARRAY_SIZE(tonga_golden_common_all));
601 break;
602 case CHIP_CARRIZO:
603 amdgpu_program_register_sequence(adev,
604 cz_mgcg_cgcg_init,
605 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
606 amdgpu_program_register_sequence(adev,
607 cz_golden_settings_a11,
608 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
609 amdgpu_program_register_sequence(adev,
610 cz_golden_common_all,
611 (const u32)ARRAY_SIZE(cz_golden_common_all));
612 break;
e3c7656c
SL
613 case CHIP_STONEY:
614 amdgpu_program_register_sequence(adev,
615 stoney_mgcg_cgcg_init,
616 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
617 amdgpu_program_register_sequence(adev,
618 stoney_golden_settings_a11,
619 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
620 amdgpu_program_register_sequence(adev,
621 stoney_golden_common_all,
622 (const u32)ARRAY_SIZE(stoney_golden_common_all));
623 break;
aaa36a97
AD
624 default:
625 break;
626 }
627}
628
629static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
630{
631 int i;
632
633 adev->gfx.scratch.num_reg = 7;
634 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
635 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
636 adev->gfx.scratch.free[i] = true;
637 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
638 }
639}
640
641static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
642{
643 struct amdgpu_device *adev = ring->adev;
644 uint32_t scratch;
645 uint32_t tmp = 0;
646 unsigned i;
647 int r;
648
649 r = amdgpu_gfx_scratch_get(adev, &scratch);
650 if (r) {
651 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
652 return r;
653 }
654 WREG32(scratch, 0xCAFEDEAD);
a27de35c 655 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
656 if (r) {
657 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
658 ring->idx, r);
659 amdgpu_gfx_scratch_free(adev, scratch);
660 return r;
661 }
662 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
663 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
664 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 665 amdgpu_ring_commit(ring);
aaa36a97
AD
666
667 for (i = 0; i < adev->usec_timeout; i++) {
668 tmp = RREG32(scratch);
669 if (tmp == 0xDEADBEEF)
670 break;
671 DRM_UDELAY(1);
672 }
673 if (i < adev->usec_timeout) {
674 DRM_INFO("ring test on %d succeeded in %d usecs\n",
675 ring->idx, i);
676 } else {
677 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
678 ring->idx, scratch, tmp);
679 r = -EINVAL;
680 }
681 amdgpu_gfx_scratch_free(adev, scratch);
682 return r;
683}
684
685static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
686{
687 struct amdgpu_device *adev = ring->adev;
688 struct amdgpu_ib ib;
1763552e 689 struct fence *f = NULL;
aaa36a97
AD
690 uint32_t scratch;
691 uint32_t tmp = 0;
692 unsigned i;
693 int r;
694
695 r = amdgpu_gfx_scratch_get(adev, &scratch);
696 if (r) {
697 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
698 return r;
699 }
700 WREG32(scratch, 0xCAFEDEAD);
b203dd95 701 memset(&ib, 0, sizeof(ib));
b07c60c0 702 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97
AD
703 if (r) {
704 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
42d13693 705 goto err1;
aaa36a97
AD
706 }
707 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
708 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
709 ib.ptr[2] = 0xDEADBEEF;
710 ib.length_dw = 3;
42d13693 711
e86f9cee
CK
712 r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
713 NULL, &f);
42d13693
CZ
714 if (r)
715 goto err2;
716
1763552e 717 r = fence_wait(f, false);
aaa36a97
AD
718 if (r) {
719 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
42d13693 720 goto err2;
aaa36a97
AD
721 }
722 for (i = 0; i < adev->usec_timeout; i++) {
723 tmp = RREG32(scratch);
724 if (tmp == 0xDEADBEEF)
725 break;
726 DRM_UDELAY(1);
727 }
728 if (i < adev->usec_timeout) {
729 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
42d13693
CZ
730 ring->idx, i);
731 goto err2;
aaa36a97
AD
732 } else {
733 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
734 scratch, tmp);
735 r = -EINVAL;
736 }
42d13693 737err2:
281b4223 738 fence_put(f);
aaa36a97 739 amdgpu_ib_free(adev, &ib);
42d13693
CZ
740err1:
741 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
742 return r;
743}
744
745static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
746{
747 const char *chip_name;
748 char fw_name[30];
749 int err;
750 struct amdgpu_firmware_info *info = NULL;
751 const struct common_firmware_header *header = NULL;
595fd013 752 const struct gfx_firmware_header_v1_0 *cp_hdr;
aaa36a97
AD
753
754 DRM_DEBUG("\n");
755
756 switch (adev->asic_type) {
757 case CHIP_TOPAZ:
758 chip_name = "topaz";
759 break;
760 case CHIP_TONGA:
761 chip_name = "tonga";
762 break;
763 case CHIP_CARRIZO:
764 chip_name = "carrizo";
765 break;
af15a2d5
DZ
766 case CHIP_FIJI:
767 chip_name = "fiji";
768 break;
e3c7656c
SL
769 case CHIP_STONEY:
770 chip_name = "stoney";
771 break;
aaa36a97
AD
772 default:
773 BUG();
774 }
775
c65444fe 776 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
777 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
778 if (err)
779 goto out;
780 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
781 if (err)
782 goto out;
595fd013
JZ
783 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
784 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
785 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 786
c65444fe 787 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
788 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
789 if (err)
790 goto out;
791 err = amdgpu_ucode_validate(adev->gfx.me_fw);
792 if (err)
793 goto out;
595fd013
JZ
794 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
795 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
796 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 797
c65444fe 798 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
799 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
800 if (err)
801 goto out;
802 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
803 if (err)
804 goto out;
595fd013
JZ
805 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
806 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
807 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 808
c65444fe 809 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
810 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
811 if (err)
812 goto out;
813 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
595fd013
JZ
814 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
815 adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
816 adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 817
c65444fe 818 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
819 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
820 if (err)
821 goto out;
822 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
823 if (err)
824 goto out;
595fd013
JZ
825 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
826 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
827 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 828
97dde76a
AD
829 if ((adev->asic_type != CHIP_STONEY) &&
830 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
831 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
832 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
833 if (!err) {
834 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
835 if (err)
836 goto out;
837 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
838 adev->gfx.mec2_fw->data;
839 adev->gfx.mec2_fw_version =
840 le32_to_cpu(cp_hdr->header.ucode_version);
841 adev->gfx.mec2_feature_version =
842 le32_to_cpu(cp_hdr->ucode_feature_version);
843 } else {
844 err = 0;
845 adev->gfx.mec2_fw = NULL;
846 }
aaa36a97
AD
847 }
848
849 if (adev->firmware.smu_load) {
850 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
851 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
852 info->fw = adev->gfx.pfp_fw;
853 header = (const struct common_firmware_header *)info->fw->data;
854 adev->firmware.fw_size +=
855 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
856
857 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
858 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
859 info->fw = adev->gfx.me_fw;
860 header = (const struct common_firmware_header *)info->fw->data;
861 adev->firmware.fw_size +=
862 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
863
864 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
865 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
866 info->fw = adev->gfx.ce_fw;
867 header = (const struct common_firmware_header *)info->fw->data;
868 adev->firmware.fw_size +=
869 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
870
871 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
872 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
873 info->fw = adev->gfx.rlc_fw;
874 header = (const struct common_firmware_header *)info->fw->data;
875 adev->firmware.fw_size +=
876 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
877
878 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
879 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
880 info->fw = adev->gfx.mec_fw;
881 header = (const struct common_firmware_header *)info->fw->data;
882 adev->firmware.fw_size +=
883 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
884
885 if (adev->gfx.mec2_fw) {
886 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
887 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
888 info->fw = adev->gfx.mec2_fw;
889 header = (const struct common_firmware_header *)info->fw->data;
890 adev->firmware.fw_size +=
891 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
892 }
893
894 }
895
896out:
897 if (err) {
898 dev_err(adev->dev,
899 "gfx8: Failed to load firmware \"%s\"\n",
900 fw_name);
901 release_firmware(adev->gfx.pfp_fw);
902 adev->gfx.pfp_fw = NULL;
903 release_firmware(adev->gfx.me_fw);
904 adev->gfx.me_fw = NULL;
905 release_firmware(adev->gfx.ce_fw);
906 adev->gfx.ce_fw = NULL;
907 release_firmware(adev->gfx.rlc_fw);
908 adev->gfx.rlc_fw = NULL;
909 release_firmware(adev->gfx.mec_fw);
910 adev->gfx.mec_fw = NULL;
911 release_firmware(adev->gfx.mec2_fw);
912 adev->gfx.mec2_fw = NULL;
913 }
914 return err;
915}
916
917static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
918{
919 int r;
920
921 if (adev->gfx.mec.hpd_eop_obj) {
922 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
923 if (unlikely(r != 0))
924 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
925 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
926 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
927
928 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
929 adev->gfx.mec.hpd_eop_obj = NULL;
930 }
931}
932
933#define MEC_HPD_SIZE 2048
934
935static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
936{
937 int r;
938 u32 *hpd;
939
940 /*
941 * we assign only 1 pipe because all other pipes will
942 * be handled by KFD
943 */
944 adev->gfx.mec.num_mec = 1;
945 adev->gfx.mec.num_pipe = 1;
946 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
947
948 if (adev->gfx.mec.hpd_eop_obj == NULL) {
949 r = amdgpu_bo_create(adev,
950 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
951 PAGE_SIZE, true,
72d7668b 952 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
953 &adev->gfx.mec.hpd_eop_obj);
954 if (r) {
955 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
956 return r;
957 }
958 }
959
960 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
961 if (unlikely(r != 0)) {
962 gfx_v8_0_mec_fini(adev);
963 return r;
964 }
965 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
966 &adev->gfx.mec.hpd_eop_gpu_addr);
967 if (r) {
968 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
969 gfx_v8_0_mec_fini(adev);
970 return r;
971 }
972 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
973 if (r) {
974 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
975 gfx_v8_0_mec_fini(adev);
976 return r;
977 }
978
979 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
980
981 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
982 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
983
984 return 0;
985}
986
ccba7691
AD
987static const u32 vgpr_init_compute_shader[] =
988{
989 0x7e000209, 0x7e020208,
990 0x7e040207, 0x7e060206,
991 0x7e080205, 0x7e0a0204,
992 0x7e0c0203, 0x7e0e0202,
993 0x7e100201, 0x7e120200,
994 0x7e140209, 0x7e160208,
995 0x7e180207, 0x7e1a0206,
996 0x7e1c0205, 0x7e1e0204,
997 0x7e200203, 0x7e220202,
998 0x7e240201, 0x7e260200,
999 0x7e280209, 0x7e2a0208,
1000 0x7e2c0207, 0x7e2e0206,
1001 0x7e300205, 0x7e320204,
1002 0x7e340203, 0x7e360202,
1003 0x7e380201, 0x7e3a0200,
1004 0x7e3c0209, 0x7e3e0208,
1005 0x7e400207, 0x7e420206,
1006 0x7e440205, 0x7e460204,
1007 0x7e480203, 0x7e4a0202,
1008 0x7e4c0201, 0x7e4e0200,
1009 0x7e500209, 0x7e520208,
1010 0x7e540207, 0x7e560206,
1011 0x7e580205, 0x7e5a0204,
1012 0x7e5c0203, 0x7e5e0202,
1013 0x7e600201, 0x7e620200,
1014 0x7e640209, 0x7e660208,
1015 0x7e680207, 0x7e6a0206,
1016 0x7e6c0205, 0x7e6e0204,
1017 0x7e700203, 0x7e720202,
1018 0x7e740201, 0x7e760200,
1019 0x7e780209, 0x7e7a0208,
1020 0x7e7c0207, 0x7e7e0206,
1021 0xbf8a0000, 0xbf810000,
1022};
1023
1024static const u32 sgpr_init_compute_shader[] =
1025{
1026 0xbe8a0100, 0xbe8c0102,
1027 0xbe8e0104, 0xbe900106,
1028 0xbe920108, 0xbe940100,
1029 0xbe960102, 0xbe980104,
1030 0xbe9a0106, 0xbe9c0108,
1031 0xbe9e0100, 0xbea00102,
1032 0xbea20104, 0xbea40106,
1033 0xbea60108, 0xbea80100,
1034 0xbeaa0102, 0xbeac0104,
1035 0xbeae0106, 0xbeb00108,
1036 0xbeb20100, 0xbeb40102,
1037 0xbeb60104, 0xbeb80106,
1038 0xbeba0108, 0xbebc0100,
1039 0xbebe0102, 0xbec00104,
1040 0xbec20106, 0xbec40108,
1041 0xbec60100, 0xbec80102,
1042 0xbee60004, 0xbee70005,
1043 0xbeea0006, 0xbeeb0007,
1044 0xbee80008, 0xbee90009,
1045 0xbefc0000, 0xbf8a0000,
1046 0xbf810000, 0x00000000,
1047};
1048
1049static const u32 vgpr_init_regs[] =
1050{
1051 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1052 mmCOMPUTE_RESOURCE_LIMITS, 0,
1053 mmCOMPUTE_NUM_THREAD_X, 256*4,
1054 mmCOMPUTE_NUM_THREAD_Y, 1,
1055 mmCOMPUTE_NUM_THREAD_Z, 1,
1056 mmCOMPUTE_PGM_RSRC2, 20,
1057 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1058 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1059 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1060 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1061 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1062 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1063 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1064 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1065 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1066 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1067};
1068
1069static const u32 sgpr1_init_regs[] =
1070{
1071 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1072 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1073 mmCOMPUTE_NUM_THREAD_X, 256*5,
1074 mmCOMPUTE_NUM_THREAD_Y, 1,
1075 mmCOMPUTE_NUM_THREAD_Z, 1,
1076 mmCOMPUTE_PGM_RSRC2, 20,
1077 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1078 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1079 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1080 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1081 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1082 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1083 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1084 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1085 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1086 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1087};
1088
1089static const u32 sgpr2_init_regs[] =
1090{
1091 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1092 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1093 mmCOMPUTE_NUM_THREAD_X, 256*5,
1094 mmCOMPUTE_NUM_THREAD_Y, 1,
1095 mmCOMPUTE_NUM_THREAD_Z, 1,
1096 mmCOMPUTE_PGM_RSRC2, 20,
1097 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1098 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1099 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1100 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1101 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1102 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1103 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1104 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1105 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1106 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1107};
1108
1109static const u32 sec_ded_counter_registers[] =
1110{
1111 mmCPC_EDC_ATC_CNT,
1112 mmCPC_EDC_SCRATCH_CNT,
1113 mmCPC_EDC_UCODE_CNT,
1114 mmCPF_EDC_ATC_CNT,
1115 mmCPF_EDC_ROQ_CNT,
1116 mmCPF_EDC_TAG_CNT,
1117 mmCPG_EDC_ATC_CNT,
1118 mmCPG_EDC_DMA_CNT,
1119 mmCPG_EDC_TAG_CNT,
1120 mmDC_EDC_CSINVOC_CNT,
1121 mmDC_EDC_RESTORE_CNT,
1122 mmDC_EDC_STATE_CNT,
1123 mmGDS_EDC_CNT,
1124 mmGDS_EDC_GRBM_CNT,
1125 mmGDS_EDC_OA_DED,
1126 mmSPI_EDC_CNT,
1127 mmSQC_ATC_EDC_GATCL1_CNT,
1128 mmSQC_EDC_CNT,
1129 mmSQ_EDC_DED_CNT,
1130 mmSQ_EDC_INFO,
1131 mmSQ_EDC_SEC_CNT,
1132 mmTCC_EDC_CNT,
1133 mmTCP_ATC_EDC_GATCL1_CNT,
1134 mmTCP_EDC_CNT,
1135 mmTD_EDC_CNT
1136};
1137
1138static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1139{
1140 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1141 struct amdgpu_ib ib;
1142 struct fence *f = NULL;
1143 int r, i;
1144 u32 tmp;
1145 unsigned total_size, vgpr_offset, sgpr_offset;
1146 u64 gpu_addr;
1147
1148 /* only supported on CZ */
1149 if (adev->asic_type != CHIP_CARRIZO)
1150 return 0;
1151
1152 /* bail if the compute ring is not ready */
1153 if (!ring->ready)
1154 return 0;
1155
1156 tmp = RREG32(mmGB_EDC_MODE);
1157 WREG32(mmGB_EDC_MODE, 0);
1158
1159 total_size =
1160 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1161 total_size +=
1162 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1163 total_size +=
1164 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1165 total_size = ALIGN(total_size, 256);
1166 vgpr_offset = total_size;
1167 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1168 sgpr_offset = total_size;
1169 total_size += sizeof(sgpr_init_compute_shader);
1170
1171 /* allocate an indirect buffer to put the commands in */
1172 memset(&ib, 0, sizeof(ib));
b07c60c0 1173 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1174 if (r) {
1175 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1176 return r;
1177 }
1178
1179 /* load the compute shaders */
1180 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1181 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1182
1183 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1184 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1185
1186 /* init the ib length to 0 */
1187 ib.length_dw = 0;
1188
1189 /* VGPR */
1190 /* write the register state for the compute dispatch */
1191 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1192 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1193 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1194 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1195 }
1196 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1197 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1198 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1199 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1200 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1201 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1202
1203 /* write dispatch packet */
1204 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1205 ib.ptr[ib.length_dw++] = 8; /* x */
1206 ib.ptr[ib.length_dw++] = 1; /* y */
1207 ib.ptr[ib.length_dw++] = 1; /* z */
1208 ib.ptr[ib.length_dw++] =
1209 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1210
1211 /* write CS partial flush packet */
1212 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1213 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1214
1215 /* SGPR1 */
1216 /* write the register state for the compute dispatch */
1217 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1218 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1219 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1220 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1221 }
1222 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1223 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1224 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1225 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1226 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1227 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1228
1229 /* write dispatch packet */
1230 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1231 ib.ptr[ib.length_dw++] = 8; /* x */
1232 ib.ptr[ib.length_dw++] = 1; /* y */
1233 ib.ptr[ib.length_dw++] = 1; /* z */
1234 ib.ptr[ib.length_dw++] =
1235 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1236
1237 /* write CS partial flush packet */
1238 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1239 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1240
1241 /* SGPR2 */
1242 /* write the register state for the compute dispatch */
1243 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1244 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1245 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1246 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1247 }
1248 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1249 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1250 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1251 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1252 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1253 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1254
1255 /* write dispatch packet */
1256 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1257 ib.ptr[ib.length_dw++] = 8; /* x */
1258 ib.ptr[ib.length_dw++] = 1; /* y */
1259 ib.ptr[ib.length_dw++] = 1; /* z */
1260 ib.ptr[ib.length_dw++] =
1261 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1262
1263 /* write CS partial flush packet */
1264 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1265 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1266
1267 /* shedule the ib on the ring */
e86f9cee
CK
1268 r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
1269 NULL, &f);
ccba7691
AD
1270 if (r) {
1271 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1272 goto fail;
1273 }
1274
1275 /* wait for the GPU to finish processing the IB */
1276 r = fence_wait(f, false);
1277 if (r) {
1278 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1279 goto fail;
1280 }
1281
1282 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1283 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1284 WREG32(mmGB_EDC_MODE, tmp);
1285
1286 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1287 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1288 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1289
1290
1291 /* read back registers to clear the counters */
1292 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1293 RREG32(sec_ded_counter_registers[i]);
1294
1295fail:
1296 fence_put(f);
1297 amdgpu_ib_free(adev, &ib);
1298
1299 return r;
1300}
1301
0bde3a95
AD
1302static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1303{
1304 u32 gb_addr_config;
1305 u32 mc_shared_chmap, mc_arb_ramcfg;
1306 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1307 u32 tmp;
1308
1309 switch (adev->asic_type) {
1310 case CHIP_TOPAZ:
1311 adev->gfx.config.max_shader_engines = 1;
1312 adev->gfx.config.max_tile_pipes = 2;
1313 adev->gfx.config.max_cu_per_sh = 6;
1314 adev->gfx.config.max_sh_per_se = 1;
1315 adev->gfx.config.max_backends_per_se = 2;
1316 adev->gfx.config.max_texture_channel_caches = 2;
1317 adev->gfx.config.max_gprs = 256;
1318 adev->gfx.config.max_gs_threads = 32;
1319 adev->gfx.config.max_hw_contexts = 8;
1320
1321 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1322 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1323 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1324 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1325 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1326 break;
1327 case CHIP_FIJI:
1328 adev->gfx.config.max_shader_engines = 4;
1329 adev->gfx.config.max_tile_pipes = 16;
1330 adev->gfx.config.max_cu_per_sh = 16;
1331 adev->gfx.config.max_sh_per_se = 1;
1332 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1333 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1334 adev->gfx.config.max_gprs = 256;
1335 adev->gfx.config.max_gs_threads = 32;
1336 adev->gfx.config.max_hw_contexts = 8;
1337
1338 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1339 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1340 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1341 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1342 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1343 break;
1344 case CHIP_TONGA:
1345 adev->gfx.config.max_shader_engines = 4;
1346 adev->gfx.config.max_tile_pipes = 8;
1347 adev->gfx.config.max_cu_per_sh = 8;
1348 adev->gfx.config.max_sh_per_se = 1;
1349 adev->gfx.config.max_backends_per_se = 2;
1350 adev->gfx.config.max_texture_channel_caches = 8;
1351 adev->gfx.config.max_gprs = 256;
1352 adev->gfx.config.max_gs_threads = 32;
1353 adev->gfx.config.max_hw_contexts = 8;
1354
1355 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1356 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1357 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1358 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1359 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1360 break;
1361 case CHIP_CARRIZO:
1362 adev->gfx.config.max_shader_engines = 1;
1363 adev->gfx.config.max_tile_pipes = 2;
1364 adev->gfx.config.max_sh_per_se = 1;
1365 adev->gfx.config.max_backends_per_se = 2;
1366
1367 switch (adev->pdev->revision) {
1368 case 0xc4:
1369 case 0x84:
1370 case 0xc8:
1371 case 0xcc:
b8b339ea
AD
1372 case 0xe1:
1373 case 0xe3:
0bde3a95
AD
1374 /* B10 */
1375 adev->gfx.config.max_cu_per_sh = 8;
1376 break;
1377 case 0xc5:
1378 case 0x81:
1379 case 0x85:
1380 case 0xc9:
1381 case 0xcd:
b8b339ea
AD
1382 case 0xe2:
1383 case 0xe4:
0bde3a95
AD
1384 /* B8 */
1385 adev->gfx.config.max_cu_per_sh = 6;
1386 break;
1387 case 0xc6:
1388 case 0xca:
1389 case 0xce:
b8b339ea 1390 case 0x88:
0bde3a95
AD
1391 /* B6 */
1392 adev->gfx.config.max_cu_per_sh = 6;
1393 break;
1394 case 0xc7:
1395 case 0x87:
1396 case 0xcb:
b8b339ea
AD
1397 case 0xe5:
1398 case 0x89:
0bde3a95
AD
1399 default:
1400 /* B4 */
1401 adev->gfx.config.max_cu_per_sh = 4;
1402 break;
1403 }
1404
1405 adev->gfx.config.max_texture_channel_caches = 2;
1406 adev->gfx.config.max_gprs = 256;
1407 adev->gfx.config.max_gs_threads = 32;
1408 adev->gfx.config.max_hw_contexts = 8;
1409
e3c7656c
SL
1410 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1411 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1412 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1413 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1414 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1415 break;
1416 case CHIP_STONEY:
1417 adev->gfx.config.max_shader_engines = 1;
1418 adev->gfx.config.max_tile_pipes = 2;
1419 adev->gfx.config.max_sh_per_se = 1;
1420 adev->gfx.config.max_backends_per_se = 1;
1421
1422 switch (adev->pdev->revision) {
1423 case 0xc0:
1424 case 0xc1:
1425 case 0xc2:
1426 case 0xc4:
1427 case 0xc8:
1428 case 0xc9:
1429 adev->gfx.config.max_cu_per_sh = 3;
1430 break;
1431 case 0xd0:
1432 case 0xd1:
1433 case 0xd2:
1434 default:
1435 adev->gfx.config.max_cu_per_sh = 2;
1436 break;
1437 }
1438
1439 adev->gfx.config.max_texture_channel_caches = 2;
1440 adev->gfx.config.max_gprs = 256;
1441 adev->gfx.config.max_gs_threads = 16;
1442 adev->gfx.config.max_hw_contexts = 8;
1443
0bde3a95
AD
1444 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1445 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1446 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1447 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1448 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1449 break;
1450 default:
1451 adev->gfx.config.max_shader_engines = 2;
1452 adev->gfx.config.max_tile_pipes = 4;
1453 adev->gfx.config.max_cu_per_sh = 2;
1454 adev->gfx.config.max_sh_per_se = 1;
1455 adev->gfx.config.max_backends_per_se = 2;
1456 adev->gfx.config.max_texture_channel_caches = 4;
1457 adev->gfx.config.max_gprs = 256;
1458 adev->gfx.config.max_gs_threads = 32;
1459 adev->gfx.config.max_hw_contexts = 8;
1460
1461 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1462 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1463 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1464 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1465 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1466 break;
1467 }
1468
1469 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1470 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1471 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1472
1473 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1474 adev->gfx.config.mem_max_burst_length_bytes = 256;
1475 if (adev->flags & AMD_IS_APU) {
1476 /* Get memory bank mapping mode. */
1477 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1478 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1479 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1480
1481 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1482 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1483 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1484
1485 /* Validate settings in case only one DIMM installed. */
1486 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1487 dimm00_addr_map = 0;
1488 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1489 dimm01_addr_map = 0;
1490 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1491 dimm10_addr_map = 0;
1492 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1493 dimm11_addr_map = 0;
1494
1495 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1496 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1497 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1498 adev->gfx.config.mem_row_size_in_kb = 2;
1499 else
1500 adev->gfx.config.mem_row_size_in_kb = 1;
1501 } else {
1502 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1503 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1504 if (adev->gfx.config.mem_row_size_in_kb > 4)
1505 adev->gfx.config.mem_row_size_in_kb = 4;
1506 }
1507
1508 adev->gfx.config.shader_engine_tile_size = 32;
1509 adev->gfx.config.num_gpus = 1;
1510 adev->gfx.config.multi_gpu_tile_size = 64;
1511
1512 /* fix up row size */
1513 switch (adev->gfx.config.mem_row_size_in_kb) {
1514 case 1:
1515 default:
1516 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1517 break;
1518 case 2:
1519 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1520 break;
1521 case 4:
1522 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1523 break;
1524 }
1525 adev->gfx.config.gb_addr_config = gb_addr_config;
1526}
1527
5fc3aeeb 1528static int gfx_v8_0_sw_init(void *handle)
aaa36a97
AD
1529{
1530 int i, r;
1531 struct amdgpu_ring *ring;
5fc3aeeb 1532 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
1533
1534 /* EOP Event */
1535 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1536 if (r)
1537 return r;
1538
1539 /* Privileged reg */
1540 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1541 if (r)
1542 return r;
1543
1544 /* Privileged inst */
1545 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1546 if (r)
1547 return r;
1548
1549 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1550
1551 gfx_v8_0_scratch_init(adev);
1552
1553 r = gfx_v8_0_init_microcode(adev);
1554 if (r) {
1555 DRM_ERROR("Failed to load gfx firmware!\n");
1556 return r;
1557 }
1558
1559 r = gfx_v8_0_mec_init(adev);
1560 if (r) {
1561 DRM_ERROR("Failed to init MEC BOs!\n");
1562 return r;
1563 }
1564
aaa36a97
AD
1565 /* set up the gfx ring */
1566 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1567 ring = &adev->gfx.gfx_ring[i];
1568 ring->ring_obj = NULL;
1569 sprintf(ring->name, "gfx");
1570 /* no gfx doorbells on iceland */
1571 if (adev->asic_type != CHIP_TOPAZ) {
1572 ring->use_doorbell = true;
1573 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1574 }
1575
1576 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1577 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1578 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1579 AMDGPU_RING_TYPE_GFX);
1580 if (r)
1581 return r;
1582 }
1583
1584 /* set up the compute queues */
1585 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1586 unsigned irq_type;
1587
1588 /* max 32 queues per MEC */
1589 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1590 DRM_ERROR("Too many (%d) compute rings!\n", i);
1591 break;
1592 }
1593 ring = &adev->gfx.compute_ring[i];
1594 ring->ring_obj = NULL;
1595 ring->use_doorbell = true;
1596 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1597 ring->me = 1; /* first MEC */
1598 ring->pipe = i / 8;
1599 ring->queue = i % 8;
1600 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1601 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1602 /* type-2 packets are deprecated on MEC, use type-3 instead */
1603 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1604 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1605 &adev->gfx.eop_irq, irq_type,
1606 AMDGPU_RING_TYPE_COMPUTE);
1607 if (r)
1608 return r;
1609 }
1610
1611 /* reserve GDS, GWS and OA resource for gfx */
1612 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1613 PAGE_SIZE, true,
72d7668b 1614 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
aaa36a97
AD
1615 NULL, &adev->gds.gds_gfx_bo);
1616 if (r)
1617 return r;
1618
1619 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1620 PAGE_SIZE, true,
72d7668b 1621 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
aaa36a97
AD
1622 NULL, &adev->gds.gws_gfx_bo);
1623 if (r)
1624 return r;
1625
1626 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1627 PAGE_SIZE, true,
72d7668b 1628 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
aaa36a97
AD
1629 NULL, &adev->gds.oa_gfx_bo);
1630 if (r)
1631 return r;
1632
a101a899
KW
1633 adev->gfx.ce_ram_size = 0x8000;
1634
0bde3a95
AD
1635 gfx_v8_0_gpu_early_init(adev);
1636
aaa36a97
AD
1637 return 0;
1638}
1639
5fc3aeeb 1640static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
1641{
1642 int i;
5fc3aeeb 1643 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
1644
1645 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1646 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1647 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1648
1649 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1650 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1651 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1652 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1653
aaa36a97
AD
1654 gfx_v8_0_mec_fini(adev);
1655
1656 return 0;
1657}
1658
1659static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1660{
90bea0ab 1661 uint32_t *modearray, *mod2array;
eb64526f
TSD
1662 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1663 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 1664 u32 reg_offset;
aaa36a97 1665
90bea0ab
TSD
1666 modearray = adev->gfx.config.tile_mode_array;
1667 mod2array = adev->gfx.config.macrotile_mode_array;
1668
1669 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1670 modearray[reg_offset] = 0;
1671
1672 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1673 mod2array[reg_offset] = 0;
aaa36a97
AD
1674
1675 switch (adev->asic_type) {
1676 case CHIP_TOPAZ:
90bea0ab
TSD
1677 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1678 PIPE_CONFIG(ADDR_SURF_P2) |
1679 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1680 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1681 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1682 PIPE_CONFIG(ADDR_SURF_P2) |
1683 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1684 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1685 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1686 PIPE_CONFIG(ADDR_SURF_P2) |
1687 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1688 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1689 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1690 PIPE_CONFIG(ADDR_SURF_P2) |
1691 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1692 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1693 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1694 PIPE_CONFIG(ADDR_SURF_P2) |
1695 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1696 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1697 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1698 PIPE_CONFIG(ADDR_SURF_P2) |
1699 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1700 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1701 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1702 PIPE_CONFIG(ADDR_SURF_P2) |
1703 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1704 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1705 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1706 PIPE_CONFIG(ADDR_SURF_P2));
1707 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1708 PIPE_CONFIG(ADDR_SURF_P2) |
1709 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1711 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1712 PIPE_CONFIG(ADDR_SURF_P2) |
1713 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1715 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1716 PIPE_CONFIG(ADDR_SURF_P2) |
1717 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1719 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1720 PIPE_CONFIG(ADDR_SURF_P2) |
1721 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1722 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1723 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1724 PIPE_CONFIG(ADDR_SURF_P2) |
1725 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1727 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1728 PIPE_CONFIG(ADDR_SURF_P2) |
1729 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1730 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1731 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1732 PIPE_CONFIG(ADDR_SURF_P2) |
1733 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1734 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1735 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1736 PIPE_CONFIG(ADDR_SURF_P2) |
1737 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1738 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1739 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1740 PIPE_CONFIG(ADDR_SURF_P2) |
1741 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1742 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1743 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1744 PIPE_CONFIG(ADDR_SURF_P2) |
1745 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1746 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1747 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1748 PIPE_CONFIG(ADDR_SURF_P2) |
1749 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1750 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1751 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1752 PIPE_CONFIG(ADDR_SURF_P2) |
1753 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1754 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1755 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1756 PIPE_CONFIG(ADDR_SURF_P2) |
1757 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1758 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1759 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1760 PIPE_CONFIG(ADDR_SURF_P2) |
1761 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1762 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1763 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1764 PIPE_CONFIG(ADDR_SURF_P2) |
1765 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1766 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1767 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1768 PIPE_CONFIG(ADDR_SURF_P2) |
1769 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1770 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1771 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1772 PIPE_CONFIG(ADDR_SURF_P2) |
1773 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1774 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1775 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1776 PIPE_CONFIG(ADDR_SURF_P2) |
1777 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1778 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1779
1780 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1783 NUM_BANKS(ADDR_SURF_8_BANK));
1784 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1785 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1786 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1787 NUM_BANKS(ADDR_SURF_8_BANK));
1788 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1789 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1790 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1791 NUM_BANKS(ADDR_SURF_8_BANK));
1792 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1793 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1794 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1795 NUM_BANKS(ADDR_SURF_8_BANK));
1796 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1797 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1798 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1799 NUM_BANKS(ADDR_SURF_8_BANK));
1800 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1801 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1802 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1803 NUM_BANKS(ADDR_SURF_8_BANK));
1804 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1805 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1806 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1807 NUM_BANKS(ADDR_SURF_8_BANK));
1808 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1809 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1810 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1811 NUM_BANKS(ADDR_SURF_16_BANK));
1812 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1813 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1814 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1815 NUM_BANKS(ADDR_SURF_16_BANK));
1816 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1817 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1818 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1819 NUM_BANKS(ADDR_SURF_16_BANK));
1820 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1821 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1822 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1823 NUM_BANKS(ADDR_SURF_16_BANK));
1824 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1825 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1826 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1827 NUM_BANKS(ADDR_SURF_16_BANK));
1828 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1829 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1830 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1831 NUM_BANKS(ADDR_SURF_16_BANK));
1832 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1833 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1834 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1835 NUM_BANKS(ADDR_SURF_8_BANK));
1836
1837 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1838 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1839 reg_offset != 23)
1840 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1841
1842 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1843 if (reg_offset != 7)
1844 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1845
8cdacf44 1846 break;
af15a2d5 1847 case CHIP_FIJI:
90bea0ab
TSD
1848 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1849 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1850 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1851 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1852 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1853 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1854 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1855 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1856 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1857 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1858 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1859 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1860 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1861 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1862 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1863 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1864 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1865 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1866 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1867 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1868 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1869 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1870 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1871 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1872 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1873 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1874 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1875 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1876 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1877 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1878 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1879 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1880 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1881 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1882 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1883 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1884 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1886 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1887 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1888 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1889 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1890 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1891 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1892 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1893 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1894 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1895 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1896 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1897 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1898 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1899 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1900 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1901 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1902 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1903 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1904 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1905 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1906 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1907 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1908 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1909 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1910 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1911 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1912 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1913 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1914 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1915 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1916 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1917 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1918 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1919 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1920 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1921 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1922 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1923 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1924 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1925 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1926 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1927 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1928 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1929 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1930 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1931 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1932 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1933 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1934 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1935 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1936 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1937 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1938 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1939 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1940 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1941 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1942 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1943 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1944 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1945 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1946 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1947 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1948 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1949 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1950 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1951 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1952 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1953 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1954 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1955 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1956 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1957 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1958 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1959 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1960 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1961 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1962 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1963 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1964 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1965 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1966 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1967 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1968 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1969 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1970
1971 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1972 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1973 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1974 NUM_BANKS(ADDR_SURF_8_BANK));
1975 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1976 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1977 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1978 NUM_BANKS(ADDR_SURF_8_BANK));
1979 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1980 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1981 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1982 NUM_BANKS(ADDR_SURF_8_BANK));
1983 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1986 NUM_BANKS(ADDR_SURF_8_BANK));
1987 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1990 NUM_BANKS(ADDR_SURF_8_BANK));
1991 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1992 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1993 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1994 NUM_BANKS(ADDR_SURF_8_BANK));
1995 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1996 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1997 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1998 NUM_BANKS(ADDR_SURF_8_BANK));
1999 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2000 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2001 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2002 NUM_BANKS(ADDR_SURF_8_BANK));
2003 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2004 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2005 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2006 NUM_BANKS(ADDR_SURF_8_BANK));
2007 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2008 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2009 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2010 NUM_BANKS(ADDR_SURF_8_BANK));
2011 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2012 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2013 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2014 NUM_BANKS(ADDR_SURF_8_BANK));
2015 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2016 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2017 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2018 NUM_BANKS(ADDR_SURF_8_BANK));
2019 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2020 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2021 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2022 NUM_BANKS(ADDR_SURF_8_BANK));
2023 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2024 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2025 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2026 NUM_BANKS(ADDR_SURF_4_BANK));
2027
2028 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2029 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2030
2031 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2032 if (reg_offset != 7)
2033 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2034
5f2e816b 2035 break;
aaa36a97 2036 case CHIP_TONGA:
90bea0ab
TSD
2037 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2038 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2039 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2040 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2041 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2042 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2043 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2044 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2045 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2047 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2048 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2049 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2050 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2051 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2052 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2053 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2055 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2056 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2057 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2058 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2059 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2060 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2061 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2063 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2064 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2065 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2066 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2067 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2068 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2069 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2071 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2072 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2073 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2075 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2076 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2077 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2080 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2081 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2083 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2085 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2087 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2088 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2089 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2091 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2092 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2093 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2095 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2096 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2097 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2099 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2100 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2101 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2103 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2104 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2105 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2107 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2108 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2109 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2111 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2112 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2113 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2115 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2116 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2117 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2119 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2120 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2121 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2123 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2124 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2125 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2127 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2128 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2129 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2131 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2132 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2133 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2135 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2136 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2137 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2138 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2139 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2140 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2141 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2143 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2144 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2145 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2146 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2147 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2149 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2150 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2151 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2152 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2153 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2155 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2156 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2157 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2159
2160 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2163 NUM_BANKS(ADDR_SURF_16_BANK));
2164 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2167 NUM_BANKS(ADDR_SURF_16_BANK));
2168 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2171 NUM_BANKS(ADDR_SURF_16_BANK));
2172 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2175 NUM_BANKS(ADDR_SURF_16_BANK));
2176 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2179 NUM_BANKS(ADDR_SURF_16_BANK));
2180 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2183 NUM_BANKS(ADDR_SURF_16_BANK));
2184 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2187 NUM_BANKS(ADDR_SURF_16_BANK));
2188 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2191 NUM_BANKS(ADDR_SURF_16_BANK));
2192 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2195 NUM_BANKS(ADDR_SURF_16_BANK));
2196 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2199 NUM_BANKS(ADDR_SURF_16_BANK));
2200 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2203 NUM_BANKS(ADDR_SURF_16_BANK));
2204 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2207 NUM_BANKS(ADDR_SURF_8_BANK));
2208 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2211 NUM_BANKS(ADDR_SURF_4_BANK));
2212 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2215 NUM_BANKS(ADDR_SURF_4_BANK));
2216
2217 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2218 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2219
2220 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2221 if (reg_offset != 7)
2222 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2223
aaa36a97 2224 break;
e3c7656c 2225 case CHIP_STONEY:
90bea0ab
TSD
2226 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2227 PIPE_CONFIG(ADDR_SURF_P2) |
2228 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2229 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2230 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 PIPE_CONFIG(ADDR_SURF_P2) |
2232 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2233 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2234 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2235 PIPE_CONFIG(ADDR_SURF_P2) |
2236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2238 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2239 PIPE_CONFIG(ADDR_SURF_P2) |
2240 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2241 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2242 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 PIPE_CONFIG(ADDR_SURF_P2) |
2244 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2245 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2246 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2247 PIPE_CONFIG(ADDR_SURF_P2) |
2248 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2249 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2251 PIPE_CONFIG(ADDR_SURF_P2) |
2252 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2253 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2254 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2255 PIPE_CONFIG(ADDR_SURF_P2));
2256 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2260 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2261 PIPE_CONFIG(ADDR_SURF_P2) |
2262 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2264 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2265 PIPE_CONFIG(ADDR_SURF_P2) |
2266 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2268 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269 PIPE_CONFIG(ADDR_SURF_P2) |
2270 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2272 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 PIPE_CONFIG(ADDR_SURF_P2) |
2274 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2277 PIPE_CONFIG(ADDR_SURF_P2) |
2278 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2280 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2281 PIPE_CONFIG(ADDR_SURF_P2) |
2282 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2284 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2285 PIPE_CONFIG(ADDR_SURF_P2) |
2286 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2288 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2289 PIPE_CONFIG(ADDR_SURF_P2) |
2290 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2291 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2292 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2293 PIPE_CONFIG(ADDR_SURF_P2) |
2294 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2296 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2297 PIPE_CONFIG(ADDR_SURF_P2) |
2298 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2299 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2300 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2301 PIPE_CONFIG(ADDR_SURF_P2) |
2302 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2303 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2304 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2305 PIPE_CONFIG(ADDR_SURF_P2) |
2306 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2308 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2309 PIPE_CONFIG(ADDR_SURF_P2) |
2310 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2312 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2313 PIPE_CONFIG(ADDR_SURF_P2) |
2314 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2316 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2317 PIPE_CONFIG(ADDR_SURF_P2) |
2318 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2321 PIPE_CONFIG(ADDR_SURF_P2) |
2322 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2324 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2325 PIPE_CONFIG(ADDR_SURF_P2) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2328
2329 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2332 NUM_BANKS(ADDR_SURF_8_BANK));
2333 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2336 NUM_BANKS(ADDR_SURF_8_BANK));
2337 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2340 NUM_BANKS(ADDR_SURF_8_BANK));
2341 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2344 NUM_BANKS(ADDR_SURF_8_BANK));
2345 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2346 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2347 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2348 NUM_BANKS(ADDR_SURF_8_BANK));
2349 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2351 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352 NUM_BANKS(ADDR_SURF_8_BANK));
2353 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2355 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2356 NUM_BANKS(ADDR_SURF_8_BANK));
2357 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2358 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2359 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2360 NUM_BANKS(ADDR_SURF_16_BANK));
2361 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2362 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2363 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2364 NUM_BANKS(ADDR_SURF_16_BANK));
2365 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2366 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2367 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2368 NUM_BANKS(ADDR_SURF_16_BANK));
2369 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2370 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2371 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2372 NUM_BANKS(ADDR_SURF_16_BANK));
2373 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2375 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2376 NUM_BANKS(ADDR_SURF_16_BANK));
2377 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2378 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2379 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2380 NUM_BANKS(ADDR_SURF_16_BANK));
2381 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2384 NUM_BANKS(ADDR_SURF_8_BANK));
2385
2386 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2387 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2388 reg_offset != 23)
2389 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2390
2391 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2392 if (reg_offset != 7)
2393 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2394
e3c7656c 2395 break;
aaa36a97 2396 default:
90bea0ab
TSD
2397 dev_warn(adev->dev,
2398 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2399 adev->asic_type);
2400
2401 case CHIP_CARRIZO:
2402 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 PIPE_CONFIG(ADDR_SURF_P2) |
2404 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2405 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2406 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2407 PIPE_CONFIG(ADDR_SURF_P2) |
2408 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2409 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2410 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2411 PIPE_CONFIG(ADDR_SURF_P2) |
2412 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2413 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2414 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2415 PIPE_CONFIG(ADDR_SURF_P2) |
2416 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2417 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2418 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419 PIPE_CONFIG(ADDR_SURF_P2) |
2420 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2421 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2422 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2423 PIPE_CONFIG(ADDR_SURF_P2) |
2424 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2425 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2426 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2427 PIPE_CONFIG(ADDR_SURF_P2) |
2428 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2430 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2431 PIPE_CONFIG(ADDR_SURF_P2));
2432 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2433 PIPE_CONFIG(ADDR_SURF_P2) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2436 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2437 PIPE_CONFIG(ADDR_SURF_P2) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2441 PIPE_CONFIG(ADDR_SURF_P2) |
2442 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2444 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2445 PIPE_CONFIG(ADDR_SURF_P2) |
2446 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2448 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2449 PIPE_CONFIG(ADDR_SURF_P2) |
2450 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2452 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2453 PIPE_CONFIG(ADDR_SURF_P2) |
2454 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2456 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2457 PIPE_CONFIG(ADDR_SURF_P2) |
2458 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2460 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2461 PIPE_CONFIG(ADDR_SURF_P2) |
2462 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2464 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2465 PIPE_CONFIG(ADDR_SURF_P2) |
2466 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2468 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2469 PIPE_CONFIG(ADDR_SURF_P2) |
2470 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2472 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2473 PIPE_CONFIG(ADDR_SURF_P2) |
2474 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2476 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2477 PIPE_CONFIG(ADDR_SURF_P2) |
2478 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2480 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2481 PIPE_CONFIG(ADDR_SURF_P2) |
2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2484 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2485 PIPE_CONFIG(ADDR_SURF_P2) |
2486 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2488 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2489 PIPE_CONFIG(ADDR_SURF_P2) |
2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2492 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2493 PIPE_CONFIG(ADDR_SURF_P2) |
2494 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 PIPE_CONFIG(ADDR_SURF_P2) |
2498 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2500 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2501 PIPE_CONFIG(ADDR_SURF_P2) |
2502 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2504
2505 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2508 NUM_BANKS(ADDR_SURF_8_BANK));
2509 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2512 NUM_BANKS(ADDR_SURF_8_BANK));
2513 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2516 NUM_BANKS(ADDR_SURF_8_BANK));
2517 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2520 NUM_BANKS(ADDR_SURF_8_BANK));
2521 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2524 NUM_BANKS(ADDR_SURF_8_BANK));
2525 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2527 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2528 NUM_BANKS(ADDR_SURF_8_BANK));
2529 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2530 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2531 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2532 NUM_BANKS(ADDR_SURF_8_BANK));
2533 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2534 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2535 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2536 NUM_BANKS(ADDR_SURF_16_BANK));
2537 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2538 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2539 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2540 NUM_BANKS(ADDR_SURF_16_BANK));
2541 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2542 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2543 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2544 NUM_BANKS(ADDR_SURF_16_BANK));
2545 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2546 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2547 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2548 NUM_BANKS(ADDR_SURF_16_BANK));
2549 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2550 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2551 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2552 NUM_BANKS(ADDR_SURF_16_BANK));
2553 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2556 NUM_BANKS(ADDR_SURF_16_BANK));
2557 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2560 NUM_BANKS(ADDR_SURF_8_BANK));
2561
2562 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2563 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2564 reg_offset != 23)
2565 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2566
2567 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2568 if (reg_offset != 7)
2569 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2570
2571 break;
aaa36a97
AD
2572 }
2573}
2574
aaa36a97
AD
2575void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2576{
2577 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2578
2579 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2580 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2581 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2582 } else if (se_num == 0xffffffff) {
2583 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2584 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2585 } else if (sh_num == 0xffffffff) {
2586 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2587 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2588 } else {
2589 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2590 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2591 }
2592 WREG32(mmGRBM_GFX_INDEX, data);
2593}
2594
8f8e00c1
AD
2595static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2596{
2597 return (u32)((1ULL << bit_width) - 1);
2598}
2599
2600static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
2601{
2602 u32 data, mask;
2603
2604 data = RREG32(mmCC_RB_BACKEND_DISABLE);
aaa36a97
AD
2605 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2606
8f8e00c1 2607 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
aaa36a97
AD
2608 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2609
8f8e00c1
AD
2610 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2611 adev->gfx.config.max_sh_per_se);
aaa36a97 2612
8f8e00c1 2613 return (~data) & mask;
aaa36a97
AD
2614}
2615
8f8e00c1 2616static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
2617{
2618 int i, j;
8f8e00c1
AD
2619 u32 data, tmp, num_rbs = 0;
2620 u32 active_rbs = 0;
aaa36a97
AD
2621
2622 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
2623 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2624 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
aaa36a97 2625 gfx_v8_0_select_se_sh(adev, i, j);
8f8e00c1
AD
2626 data = gfx_v8_0_get_rb_active_bitmap(adev);
2627 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2628 RB_BITMAP_WIDTH_PER_SH);
aaa36a97
AD
2629 }
2630 }
2631 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2632 mutex_unlock(&adev->grbm_idx_mutex);
2633
8f8e00c1
AD
2634 adev->gfx.config.backend_enable_mask = active_rbs;
2635 tmp = active_rbs;
2636 while (tmp >>= 1)
2637 num_rbs++;
2638 adev->gfx.config.num_rbs = num_rbs;
aaa36a97
AD
2639}
2640
cd06bf68 2641/**
35c7a952 2642 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68
BG
2643 *
2644 * @rdev: amdgpu_device pointer
2645 *
2646 * Initialize compute vmid sh_mem registers
2647 *
2648 */
2649#define DEFAULT_SH_MEM_BASES (0x6000)
2650#define FIRST_COMPUTE_VMID (8)
2651#define LAST_COMPUTE_VMID (16)
35c7a952 2652static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
2653{
2654 int i;
2655 uint32_t sh_mem_config;
2656 uint32_t sh_mem_bases;
2657
2658 /*
2659 * Configure apertures:
2660 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2661 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2662 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2663 */
2664 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2665
2666 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2667 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2668 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2669 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2670 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2671 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2672
2673 mutex_lock(&adev->srbm_mutex);
2674 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2675 vi_srbm_select(adev, 0, 0, 0, i);
2676 /* CP and shaders */
2677 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2678 WREG32(mmSH_MEM_APE1_BASE, 1);
2679 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2680 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2681 }
2682 vi_srbm_select(adev, 0, 0, 0, 0);
2683 mutex_unlock(&adev->srbm_mutex);
2684}
2685
aaa36a97
AD
2686static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2687{
aaa36a97
AD
2688 u32 tmp;
2689 int i;
2690
aaa36a97
AD
2691 tmp = RREG32(mmGRBM_CNTL);
2692 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2693 WREG32(mmGRBM_CNTL, tmp);
2694
0bde3a95
AD
2695 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2696 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2697 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97 2698 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
0bde3a95 2699 adev->gfx.config.gb_addr_config & 0x70);
aaa36a97 2700 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
0bde3a95
AD
2701 adev->gfx.config.gb_addr_config & 0x70);
2702 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2703 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2704 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
aaa36a97
AD
2705
2706 gfx_v8_0_tiling_mode_table_init(adev);
2707
8f8e00c1 2708 gfx_v8_0_setup_rb(adev);
aaa36a97
AD
2709
2710 /* XXX SH_MEM regs */
2711 /* where to put LDS, scratch, GPUVM in FSA64 space */
2712 mutex_lock(&adev->srbm_mutex);
2713 for (i = 0; i < 16; i++) {
2714 vi_srbm_select(adev, 0, 0, 0, i);
2715 /* CP and shaders */
2716 if (i == 0) {
2717 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2718 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 2719 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 2720 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97
AD
2721 WREG32(mmSH_MEM_CONFIG, tmp);
2722 } else {
2723 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2724 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
0bde3a95 2725 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 2726 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97
AD
2727 WREG32(mmSH_MEM_CONFIG, tmp);
2728 }
2729
2730 WREG32(mmSH_MEM_APE1_BASE, 1);
2731 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2732 WREG32(mmSH_MEM_BASES, 0);
2733 }
2734 vi_srbm_select(adev, 0, 0, 0, 0);
2735 mutex_unlock(&adev->srbm_mutex);
2736
35c7a952 2737 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 2738
aaa36a97
AD
2739 mutex_lock(&adev->grbm_idx_mutex);
2740 /*
2741 * making sure that the following register writes will be broadcasted
2742 * to all the shaders
2743 */
2744 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2745
2746 WREG32(mmPA_SC_FIFO_SIZE,
2747 (adev->gfx.config.sc_prim_fifo_size_frontend <<
2748 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2749 (adev->gfx.config.sc_prim_fifo_size_backend <<
2750 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2751 (adev->gfx.config.sc_hiz_tile_fifo_size <<
2752 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2753 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2754 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2755 mutex_unlock(&adev->grbm_idx_mutex);
2756
2757}
2758
2759static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2760{
2761 u32 i, j, k;
2762 u32 mask;
2763
2764 mutex_lock(&adev->grbm_idx_mutex);
2765 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2766 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2767 gfx_v8_0_select_se_sh(adev, i, j);
2768 for (k = 0; k < adev->usec_timeout; k++) {
2769 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2770 break;
2771 udelay(1);
2772 }
2773 }
2774 }
2775 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2776 mutex_unlock(&adev->grbm_idx_mutex);
2777
2778 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2779 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2780 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2781 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2782 for (k = 0; k < adev->usec_timeout; k++) {
2783 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2784 break;
2785 udelay(1);
2786 }
2787}
2788
2789static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2790 bool enable)
2791{
2792 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2793
0d07db7e
TSD
2794 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2795 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2796 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2797 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2798
aaa36a97
AD
2799 WREG32(mmCP_INT_CNTL_RING0, tmp);
2800}
2801
2802void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2803{
2804 u32 tmp = RREG32(mmRLC_CNTL);
2805
2806 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2807 WREG32(mmRLC_CNTL, tmp);
2808
2809 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2810
2811 gfx_v8_0_wait_for_rlc_serdes(adev);
2812}
2813
2814static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2815{
2816 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2817
2818 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2819 WREG32(mmGRBM_SOFT_RESET, tmp);
2820 udelay(50);
2821 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2822 WREG32(mmGRBM_SOFT_RESET, tmp);
2823 udelay(50);
2824}
2825
2826static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2827{
2828 u32 tmp = RREG32(mmRLC_CNTL);
2829
2830 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2831 WREG32(mmRLC_CNTL, tmp);
2832
2833 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 2834 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
2835 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2836
2837 udelay(50);
2838}
2839
2840static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2841{
2842 const struct rlc_firmware_header_v2_0 *hdr;
2843 const __le32 *fw_data;
2844 unsigned i, fw_size;
2845
2846 if (!adev->gfx.rlc_fw)
2847 return -EINVAL;
2848
2849 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2850 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
2851
2852 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2853 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2854 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2855
2856 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2857 for (i = 0; i < fw_size; i++)
2858 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2859 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2860
2861 return 0;
2862}
2863
2864static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2865{
2866 int r;
2867
2868 gfx_v8_0_rlc_stop(adev);
2869
2870 /* disable CG */
2871 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2872
2873 /* disable PG */
2874 WREG32(mmRLC_PG_CNTL, 0);
2875
2876 gfx_v8_0_rlc_reset(adev);
2877
e61710c5 2878 if (!adev->pp_enabled) {
ba5c2a87
RZ
2879 if (!adev->firmware.smu_load) {
2880 /* legacy rlc firmware loading */
2881 r = gfx_v8_0_rlc_load_microcode(adev);
2882 if (r)
2883 return r;
2884 } else {
2885 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2886 AMDGPU_UCODE_ID_RLC_G);
2887 if (r)
2888 return -EINVAL;
2889 }
aaa36a97
AD
2890 }
2891
2892 gfx_v8_0_rlc_start(adev);
2893
2894 return 0;
2895}
2896
2897static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2898{
2899 int i;
2900 u32 tmp = RREG32(mmCP_ME_CNTL);
2901
2902 if (enable) {
2903 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2904 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2905 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2906 } else {
2907 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2908 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2909 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2910 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2911 adev->gfx.gfx_ring[i].ready = false;
2912 }
2913 WREG32(mmCP_ME_CNTL, tmp);
2914 udelay(50);
2915}
2916
2917static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2918{
2919 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2920 const struct gfx_firmware_header_v1_0 *ce_hdr;
2921 const struct gfx_firmware_header_v1_0 *me_hdr;
2922 const __le32 *fw_data;
2923 unsigned i, fw_size;
2924
2925 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2926 return -EINVAL;
2927
2928 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2929 adev->gfx.pfp_fw->data;
2930 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2931 adev->gfx.ce_fw->data;
2932 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2933 adev->gfx.me_fw->data;
2934
2935 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2936 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2937 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
2938
2939 gfx_v8_0_cp_gfx_enable(adev, false);
2940
2941 /* PFP */
2942 fw_data = (const __le32 *)
2943 (adev->gfx.pfp_fw->data +
2944 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2945 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2946 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2947 for (i = 0; i < fw_size; i++)
2948 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2949 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2950
2951 /* CE */
2952 fw_data = (const __le32 *)
2953 (adev->gfx.ce_fw->data +
2954 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2955 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2956 WREG32(mmCP_CE_UCODE_ADDR, 0);
2957 for (i = 0; i < fw_size; i++)
2958 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2959 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2960
2961 /* ME */
2962 fw_data = (const __le32 *)
2963 (adev->gfx.me_fw->data +
2964 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2965 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2966 WREG32(mmCP_ME_RAM_WADDR, 0);
2967 for (i = 0; i < fw_size; i++)
2968 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2969 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2970
2971 return 0;
2972}
2973
2974static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2975{
2976 u32 count = 0;
2977 const struct cs_section_def *sect = NULL;
2978 const struct cs_extent_def *ext = NULL;
2979
2980 /* begin clear state */
2981 count += 2;
2982 /* context control state */
2983 count += 3;
2984
2985 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2986 for (ext = sect->section; ext->extent != NULL; ++ext) {
2987 if (sect->id == SECT_CONTEXT)
2988 count += 2 + ext->reg_count;
2989 else
2990 return 0;
2991 }
2992 }
2993 /* pa_sc_raster_config/pa_sc_raster_config1 */
2994 count += 4;
2995 /* end clear state */
2996 count += 2;
2997 /* clear state */
2998 count += 2;
2999
3000 return count;
3001}
3002
3003static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3004{
3005 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3006 const struct cs_section_def *sect = NULL;
3007 const struct cs_extent_def *ext = NULL;
3008 int r, i;
3009
3010 /* init the CP */
3011 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3012 WREG32(mmCP_ENDIAN_SWAP, 0);
3013 WREG32(mmCP_DEVICE_ID, 1);
3014
3015 gfx_v8_0_cp_gfx_enable(adev, true);
3016
a27de35c 3017 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
3018 if (r) {
3019 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3020 return r;
3021 }
3022
3023 /* clear state buffer */
3024 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3025 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3026
3027 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3028 amdgpu_ring_write(ring, 0x80000000);
3029 amdgpu_ring_write(ring, 0x80000000);
3030
3031 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3032 for (ext = sect->section; ext->extent != NULL; ++ext) {
3033 if (sect->id == SECT_CONTEXT) {
3034 amdgpu_ring_write(ring,
3035 PACKET3(PACKET3_SET_CONTEXT_REG,
3036 ext->reg_count));
3037 amdgpu_ring_write(ring,
3038 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3039 for (i = 0; i < ext->reg_count; i++)
3040 amdgpu_ring_write(ring, ext->extent[i]);
3041 }
3042 }
3043 }
3044
3045 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3046 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3047 switch (adev->asic_type) {
3048 case CHIP_TONGA:
3049 amdgpu_ring_write(ring, 0x16000012);
3050 amdgpu_ring_write(ring, 0x0000002A);
3051 break;
fa676048
FC
3052 case CHIP_FIJI:
3053 amdgpu_ring_write(ring, 0x3a00161a);
3054 amdgpu_ring_write(ring, 0x0000002e);
3055 break;
aaa36a97
AD
3056 case CHIP_TOPAZ:
3057 case CHIP_CARRIZO:
3058 amdgpu_ring_write(ring, 0x00000002);
3059 amdgpu_ring_write(ring, 0x00000000);
3060 break;
e3c7656c
SL
3061 case CHIP_STONEY:
3062 amdgpu_ring_write(ring, 0x00000000);
3063 amdgpu_ring_write(ring, 0x00000000);
3064 break;
aaa36a97
AD
3065 default:
3066 BUG();
3067 }
3068
3069 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3070 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3071
3072 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3073 amdgpu_ring_write(ring, 0);
3074
3075 /* init the CE partitions */
3076 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3077 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3078 amdgpu_ring_write(ring, 0x8000);
3079 amdgpu_ring_write(ring, 0x8000);
3080
a27de35c 3081 amdgpu_ring_commit(ring);
aaa36a97
AD
3082
3083 return 0;
3084}
3085
3086static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3087{
3088 struct amdgpu_ring *ring;
3089 u32 tmp;
3090 u32 rb_bufsz;
3091 u64 rb_addr, rptr_addr;
3092 int r;
3093
3094 /* Set the write pointer delay */
3095 WREG32(mmCP_RB_WPTR_DELAY, 0);
3096
3097 /* set the RB to use vmid 0 */
3098 WREG32(mmCP_RB_VMID, 0);
3099
3100 /* Set ring buffer size */
3101 ring = &adev->gfx.gfx_ring[0];
3102 rb_bufsz = order_base_2(ring->ring_size / 8);
3103 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3104 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3105 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3106 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3107#ifdef __BIG_ENDIAN
3108 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3109#endif
3110 WREG32(mmCP_RB0_CNTL, tmp);
3111
3112 /* Initialize the ring buffer's read and write pointers */
3113 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3114 ring->wptr = 0;
3115 WREG32(mmCP_RB0_WPTR, ring->wptr);
3116
3117 /* set the wb address wether it's enabled or not */
3118 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3119 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3120 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3121
3122 mdelay(1);
3123 WREG32(mmCP_RB0_CNTL, tmp);
3124
3125 rb_addr = ring->gpu_addr >> 8;
3126 WREG32(mmCP_RB0_BASE, rb_addr);
3127 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3128
3129 /* no gfx doorbells on iceland */
3130 if (adev->asic_type != CHIP_TOPAZ) {
3131 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3132 if (ring->use_doorbell) {
3133 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3134 DOORBELL_OFFSET, ring->doorbell_index);
3135 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3136 DOORBELL_EN, 1);
3137 } else {
3138 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3139 DOORBELL_EN, 0);
3140 }
3141 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3142
3143 if (adev->asic_type == CHIP_TONGA) {
3144 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3145 DOORBELL_RANGE_LOWER,
3146 AMDGPU_DOORBELL_GFX_RING0);
3147 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3148
3149 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3150 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3151 }
3152
3153 }
3154
3155 /* start the ring */
3156 gfx_v8_0_cp_gfx_start(adev);
3157 ring->ready = true;
3158 r = amdgpu_ring_test_ring(ring);
3159 if (r) {
3160 ring->ready = false;
3161 return r;
3162 }
3163
3164 return 0;
3165}
3166
3167static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3168{
3169 int i;
3170
3171 if (enable) {
3172 WREG32(mmCP_MEC_CNTL, 0);
3173 } else {
3174 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3175 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3176 adev->gfx.compute_ring[i].ready = false;
3177 }
3178 udelay(50);
3179}
3180
aaa36a97
AD
3181static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3182{
3183 const struct gfx_firmware_header_v1_0 *mec_hdr;
3184 const __le32 *fw_data;
3185 unsigned i, fw_size;
3186
3187 if (!adev->gfx.mec_fw)
3188 return -EINVAL;
3189
3190 gfx_v8_0_cp_compute_enable(adev, false);
3191
3192 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3193 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
3194
3195 fw_data = (const __le32 *)
3196 (adev->gfx.mec_fw->data +
3197 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3198 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3199
3200 /* MEC1 */
3201 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3202 for (i = 0; i < fw_size; i++)
3203 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3204 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3205
3206 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3207 if (adev->gfx.mec2_fw) {
3208 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3209
3210 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3211 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
3212
3213 fw_data = (const __le32 *)
3214 (adev->gfx.mec2_fw->data +
3215 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3216 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3217
3218 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3219 for (i = 0; i < fw_size; i++)
3220 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3221 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3222 }
3223
3224 return 0;
3225}
3226
3227struct vi_mqd {
3228 uint32_t header; /* ordinal0 */
3229 uint32_t compute_dispatch_initiator; /* ordinal1 */
3230 uint32_t compute_dim_x; /* ordinal2 */
3231 uint32_t compute_dim_y; /* ordinal3 */
3232 uint32_t compute_dim_z; /* ordinal4 */
3233 uint32_t compute_start_x; /* ordinal5 */
3234 uint32_t compute_start_y; /* ordinal6 */
3235 uint32_t compute_start_z; /* ordinal7 */
3236 uint32_t compute_num_thread_x; /* ordinal8 */
3237 uint32_t compute_num_thread_y; /* ordinal9 */
3238 uint32_t compute_num_thread_z; /* ordinal10 */
3239 uint32_t compute_pipelinestat_enable; /* ordinal11 */
3240 uint32_t compute_perfcount_enable; /* ordinal12 */
3241 uint32_t compute_pgm_lo; /* ordinal13 */
3242 uint32_t compute_pgm_hi; /* ordinal14 */
3243 uint32_t compute_tba_lo; /* ordinal15 */
3244 uint32_t compute_tba_hi; /* ordinal16 */
3245 uint32_t compute_tma_lo; /* ordinal17 */
3246 uint32_t compute_tma_hi; /* ordinal18 */
3247 uint32_t compute_pgm_rsrc1; /* ordinal19 */
3248 uint32_t compute_pgm_rsrc2; /* ordinal20 */
3249 uint32_t compute_vmid; /* ordinal21 */
3250 uint32_t compute_resource_limits; /* ordinal22 */
3251 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
3252 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
3253 uint32_t compute_tmpring_size; /* ordinal25 */
3254 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
3255 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
3256 uint32_t compute_restart_x; /* ordinal28 */
3257 uint32_t compute_restart_y; /* ordinal29 */
3258 uint32_t compute_restart_z; /* ordinal30 */
3259 uint32_t compute_thread_trace_enable; /* ordinal31 */
3260 uint32_t compute_misc_reserved; /* ordinal32 */
3261 uint32_t compute_dispatch_id; /* ordinal33 */
3262 uint32_t compute_threadgroup_id; /* ordinal34 */
3263 uint32_t compute_relaunch; /* ordinal35 */
3264 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
3265 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
3266 uint32_t compute_wave_restore_control; /* ordinal38 */
3267 uint32_t reserved9; /* ordinal39 */
3268 uint32_t reserved10; /* ordinal40 */
3269 uint32_t reserved11; /* ordinal41 */
3270 uint32_t reserved12; /* ordinal42 */
3271 uint32_t reserved13; /* ordinal43 */
3272 uint32_t reserved14; /* ordinal44 */
3273 uint32_t reserved15; /* ordinal45 */
3274 uint32_t reserved16; /* ordinal46 */
3275 uint32_t reserved17; /* ordinal47 */
3276 uint32_t reserved18; /* ordinal48 */
3277 uint32_t reserved19; /* ordinal49 */
3278 uint32_t reserved20; /* ordinal50 */
3279 uint32_t reserved21; /* ordinal51 */
3280 uint32_t reserved22; /* ordinal52 */
3281 uint32_t reserved23; /* ordinal53 */
3282 uint32_t reserved24; /* ordinal54 */
3283 uint32_t reserved25; /* ordinal55 */
3284 uint32_t reserved26; /* ordinal56 */
3285 uint32_t reserved27; /* ordinal57 */
3286 uint32_t reserved28; /* ordinal58 */
3287 uint32_t reserved29; /* ordinal59 */
3288 uint32_t reserved30; /* ordinal60 */
3289 uint32_t reserved31; /* ordinal61 */
3290 uint32_t reserved32; /* ordinal62 */
3291 uint32_t reserved33; /* ordinal63 */
3292 uint32_t reserved34; /* ordinal64 */
3293 uint32_t compute_user_data_0; /* ordinal65 */
3294 uint32_t compute_user_data_1; /* ordinal66 */
3295 uint32_t compute_user_data_2; /* ordinal67 */
3296 uint32_t compute_user_data_3; /* ordinal68 */
3297 uint32_t compute_user_data_4; /* ordinal69 */
3298 uint32_t compute_user_data_5; /* ordinal70 */
3299 uint32_t compute_user_data_6; /* ordinal71 */
3300 uint32_t compute_user_data_7; /* ordinal72 */
3301 uint32_t compute_user_data_8; /* ordinal73 */
3302 uint32_t compute_user_data_9; /* ordinal74 */
3303 uint32_t compute_user_data_10; /* ordinal75 */
3304 uint32_t compute_user_data_11; /* ordinal76 */
3305 uint32_t compute_user_data_12; /* ordinal77 */
3306 uint32_t compute_user_data_13; /* ordinal78 */
3307 uint32_t compute_user_data_14; /* ordinal79 */
3308 uint32_t compute_user_data_15; /* ordinal80 */
3309 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
3310 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
3311 uint32_t reserved35; /* ordinal83 */
3312 uint32_t reserved36; /* ordinal84 */
3313 uint32_t reserved37; /* ordinal85 */
3314 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
3315 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
3316 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
3317 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
3318 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
3319 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
3320 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
3321 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
3322 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
3323 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
3324 uint32_t reserved38; /* ordinal96 */
3325 uint32_t reserved39; /* ordinal97 */
3326 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
3327 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
3328 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
3329 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
3330 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
3331 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
3332 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
3333 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
3334 uint32_t reserved40; /* ordinal106 */
3335 uint32_t reserved41; /* ordinal107 */
3336 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
3337 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
3338 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
3339 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
3340 uint32_t reserved42; /* ordinal112 */
3341 uint32_t reserved43; /* ordinal113 */
3342 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
3343 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
3344 uint32_t cp_packet_id_lo; /* ordinal116 */
3345 uint32_t cp_packet_id_hi; /* ordinal117 */
3346 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
3347 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
3348 uint32_t gds_save_base_addr_lo; /* ordinal120 */
3349 uint32_t gds_save_base_addr_hi; /* ordinal121 */
3350 uint32_t gds_save_mask_lo; /* ordinal122 */
3351 uint32_t gds_save_mask_hi; /* ordinal123 */
3352 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
3353 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
3354 uint32_t reserved44; /* ordinal126 */
3355 uint32_t reserved45; /* ordinal127 */
3356 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
3357 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
3358 uint32_t cp_hqd_active; /* ordinal130 */
3359 uint32_t cp_hqd_vmid; /* ordinal131 */
3360 uint32_t cp_hqd_persistent_state; /* ordinal132 */
3361 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
3362 uint32_t cp_hqd_queue_priority; /* ordinal134 */
3363 uint32_t cp_hqd_quantum; /* ordinal135 */
3364 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
3365 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
3366 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
3367 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
3368 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
3369 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
3370 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
3371 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
3372 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
3373 uint32_t cp_hqd_pq_control; /* ordinal145 */
3374 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
3375 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
3376 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
3377 uint32_t cp_hqd_ib_control; /* ordinal149 */
3378 uint32_t cp_hqd_iq_timer; /* ordinal150 */
3379 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
3380 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
3381 uint32_t cp_hqd_dma_offload; /* ordinal153 */
3382 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
3383 uint32_t cp_hqd_msg_type; /* ordinal155 */
3384 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
3385 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
3386 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
3387 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
3388 uint32_t cp_hqd_hq_status0; /* ordinal160 */
3389 uint32_t cp_hqd_hq_control0; /* ordinal161 */
3390 uint32_t cp_mqd_control; /* ordinal162 */
3391 uint32_t cp_hqd_hq_status1; /* ordinal163 */
3392 uint32_t cp_hqd_hq_control1; /* ordinal164 */
3393 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
3394 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
3395 uint32_t cp_hqd_eop_control; /* ordinal167 */
3396 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
3397 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
3398 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
3399 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
3400 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
3401 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
3402 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
3403 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
3404 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
3405 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
3406 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
3407 uint32_t cp_hqd_error; /* ordinal179 */
3408 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
3409 uint32_t cp_hqd_eop_dones; /* ordinal181 */
3410 uint32_t reserved46; /* ordinal182 */
3411 uint32_t reserved47; /* ordinal183 */
3412 uint32_t reserved48; /* ordinal184 */
3413 uint32_t reserved49; /* ordinal185 */
3414 uint32_t reserved50; /* ordinal186 */
3415 uint32_t reserved51; /* ordinal187 */
3416 uint32_t reserved52; /* ordinal188 */
3417 uint32_t reserved53; /* ordinal189 */
3418 uint32_t reserved54; /* ordinal190 */
3419 uint32_t reserved55; /* ordinal191 */
3420 uint32_t iqtimer_pkt_header; /* ordinal192 */
3421 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
3422 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
3423 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
3424 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
3425 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
3426 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
3427 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
3428 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
3429 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
3430 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
3431 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
3432 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
3433 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
3434 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
3435 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
3436 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
3437 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
3438 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
3439 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
3440 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
3441 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
3442 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
3443 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
3444 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
3445 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
3446 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
3447 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
3448 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
3449 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
3450 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
3451 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
3452 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
3453 uint32_t reserved56; /* ordinal225 */
3454 uint32_t reserved57; /* ordinal226 */
3455 uint32_t reserved58; /* ordinal227 */
3456 uint32_t set_resources_header; /* ordinal228 */
3457 uint32_t set_resources_dw1; /* ordinal229 */
3458 uint32_t set_resources_dw2; /* ordinal230 */
3459 uint32_t set_resources_dw3; /* ordinal231 */
3460 uint32_t set_resources_dw4; /* ordinal232 */
3461 uint32_t set_resources_dw5; /* ordinal233 */
3462 uint32_t set_resources_dw6; /* ordinal234 */
3463 uint32_t set_resources_dw7; /* ordinal235 */
3464 uint32_t reserved59; /* ordinal236 */
3465 uint32_t reserved60; /* ordinal237 */
3466 uint32_t reserved61; /* ordinal238 */
3467 uint32_t reserved62; /* ordinal239 */
3468 uint32_t reserved63; /* ordinal240 */
3469 uint32_t reserved64; /* ordinal241 */
3470 uint32_t reserved65; /* ordinal242 */
3471 uint32_t reserved66; /* ordinal243 */
3472 uint32_t reserved67; /* ordinal244 */
3473 uint32_t reserved68; /* ordinal245 */
3474 uint32_t reserved69; /* ordinal246 */
3475 uint32_t reserved70; /* ordinal247 */
3476 uint32_t reserved71; /* ordinal248 */
3477 uint32_t reserved72; /* ordinal249 */
3478 uint32_t reserved73; /* ordinal250 */
3479 uint32_t reserved74; /* ordinal251 */
3480 uint32_t reserved75; /* ordinal252 */
3481 uint32_t reserved76; /* ordinal253 */
3482 uint32_t reserved77; /* ordinal254 */
3483 uint32_t reserved78; /* ordinal255 */
3484
3485 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3486};
3487
3488static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3489{
3490 int i, r;
3491
3492 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3493 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3494
3495 if (ring->mqd_obj) {
3496 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3497 if (unlikely(r != 0))
3498 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3499
3500 amdgpu_bo_unpin(ring->mqd_obj);
3501 amdgpu_bo_unreserve(ring->mqd_obj);
3502
3503 amdgpu_bo_unref(&ring->mqd_obj);
3504 ring->mqd_obj = NULL;
3505 }
3506 }
3507}
3508
3509static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3510{
3511 int r, i, j;
3512 u32 tmp;
3513 bool use_doorbell = true;
3514 u64 hqd_gpu_addr;
3515 u64 mqd_gpu_addr;
3516 u64 eop_gpu_addr;
3517 u64 wb_gpu_addr;
3518 u32 *buf;
3519 struct vi_mqd *mqd;
3520
3521 /* init the pipes */
3522 mutex_lock(&adev->srbm_mutex);
3523 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3524 int me = (i < 4) ? 1 : 2;
3525 int pipe = (i < 4) ? i : (i - 4);
3526
3527 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3528 eop_gpu_addr >>= 8;
3529
3530 vi_srbm_select(adev, me, pipe, 0, 0);
3531
3532 /* write the EOP addr */
3533 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3534 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3535
3536 /* set the VMID assigned */
3537 WREG32(mmCP_HQD_VMID, 0);
3538
3539 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3540 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3541 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3542 (order_base_2(MEC_HPD_SIZE / 4) - 1));
3543 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3544 }
3545 vi_srbm_select(adev, 0, 0, 0, 0);
3546 mutex_unlock(&adev->srbm_mutex);
3547
3548 /* init the queues. Just two for now. */
3549 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3550 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3551
3552 if (ring->mqd_obj == NULL) {
3553 r = amdgpu_bo_create(adev,
3554 sizeof(struct vi_mqd),
3555 PAGE_SIZE, true,
3556 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
72d7668b 3557 NULL, &ring->mqd_obj);
aaa36a97
AD
3558 if (r) {
3559 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3560 return r;
3561 }
3562 }
3563
3564 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3565 if (unlikely(r != 0)) {
3566 gfx_v8_0_cp_compute_fini(adev);
3567 return r;
3568 }
3569 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3570 &mqd_gpu_addr);
3571 if (r) {
3572 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3573 gfx_v8_0_cp_compute_fini(adev);
3574 return r;
3575 }
3576 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3577 if (r) {
3578 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3579 gfx_v8_0_cp_compute_fini(adev);
3580 return r;
3581 }
3582
3583 /* init the mqd struct */
3584 memset(buf, 0, sizeof(struct vi_mqd));
3585
3586 mqd = (struct vi_mqd *)buf;
3587 mqd->header = 0xC0310800;
3588 mqd->compute_pipelinestat_enable = 0x00000001;
3589 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3590 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3591 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3592 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3593 mqd->compute_misc_reserved = 0x00000003;
3594
3595 mutex_lock(&adev->srbm_mutex);
3596 vi_srbm_select(adev, ring->me,
3597 ring->pipe,
3598 ring->queue, 0);
3599
3600 /* disable wptr polling */
3601 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3602 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3603 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3604
3605 mqd->cp_hqd_eop_base_addr_lo =
3606 RREG32(mmCP_HQD_EOP_BASE_ADDR);
3607 mqd->cp_hqd_eop_base_addr_hi =
3608 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3609
3610 /* enable doorbell? */
3611 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3612 if (use_doorbell) {
3613 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3614 } else {
3615 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3616 }
3617 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3618 mqd->cp_hqd_pq_doorbell_control = tmp;
3619
3620 /* disable the queue if it's active */
3621 mqd->cp_hqd_dequeue_request = 0;
3622 mqd->cp_hqd_pq_rptr = 0;
3623 mqd->cp_hqd_pq_wptr= 0;
3624 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3625 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3626 for (j = 0; j < adev->usec_timeout; j++) {
3627 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3628 break;
3629 udelay(1);
3630 }
3631 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3632 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3633 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3634 }
3635
3636 /* set the pointer to the MQD */
3637 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3638 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3639 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3640 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3641
3642 /* set MQD vmid to 0 */
3643 tmp = RREG32(mmCP_MQD_CONTROL);
3644 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3645 WREG32(mmCP_MQD_CONTROL, tmp);
3646 mqd->cp_mqd_control = tmp;
3647
3648 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3649 hqd_gpu_addr = ring->gpu_addr >> 8;
3650 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3651 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3652 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3653 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3654
3655 /* set up the HQD, this is similar to CP_RB0_CNTL */
3656 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3657 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3658 (order_base_2(ring->ring_size / 4) - 1));
3659 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3660 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3661#ifdef __BIG_ENDIAN
3662 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3663#endif
3664 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3665 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3666 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3667 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3668 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3669 mqd->cp_hqd_pq_control = tmp;
3670
3671 /* set the wb address wether it's enabled or not */
3672 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3673 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3674 mqd->cp_hqd_pq_rptr_report_addr_hi =
3675 upper_32_bits(wb_gpu_addr) & 0xffff;
3676 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3677 mqd->cp_hqd_pq_rptr_report_addr_lo);
3678 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3679 mqd->cp_hqd_pq_rptr_report_addr_hi);
3680
3681 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3682 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3683 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3684 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3685 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3686 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3687 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3688
3689 /* enable the doorbell if requested */
3690 if (use_doorbell) {
bddf8026 3691 if ((adev->asic_type == CHIP_CARRIZO) ||
e3c7656c
SL
3692 (adev->asic_type == CHIP_FIJI) ||
3693 (adev->asic_type == CHIP_STONEY)) {
aaa36a97
AD
3694 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3695 AMDGPU_DOORBELL_KIQ << 2);
3696 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
b8826b0c 3697 AMDGPU_DOORBELL_MEC_RING7 << 2);
aaa36a97
AD
3698 }
3699 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3700 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3701 DOORBELL_OFFSET, ring->doorbell_index);
3702 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3703 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3704 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3705 mqd->cp_hqd_pq_doorbell_control = tmp;
3706
3707 } else {
3708 mqd->cp_hqd_pq_doorbell_control = 0;
3709 }
3710 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3711 mqd->cp_hqd_pq_doorbell_control);
3712
845253e7
SJ
3713 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3714 ring->wptr = 0;
3715 mqd->cp_hqd_pq_wptr = ring->wptr;
3716 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3717 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3718
aaa36a97
AD
3719 /* set the vmid for the queue */
3720 mqd->cp_hqd_vmid = 0;
3721 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3722
3723 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3724 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3725 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3726 mqd->cp_hqd_persistent_state = tmp;
3b55ddad
FC
3727 if (adev->asic_type == CHIP_STONEY) {
3728 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3729 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3730 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3731 }
aaa36a97
AD
3732
3733 /* activate the queue */
3734 mqd->cp_hqd_active = 1;
3735 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3736
3737 vi_srbm_select(adev, 0, 0, 0, 0);
3738 mutex_unlock(&adev->srbm_mutex);
3739
3740 amdgpu_bo_kunmap(ring->mqd_obj);
3741 amdgpu_bo_unreserve(ring->mqd_obj);
3742 }
3743
3744 if (use_doorbell) {
3745 tmp = RREG32(mmCP_PQ_STATUS);
3746 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3747 WREG32(mmCP_PQ_STATUS, tmp);
3748 }
3749
6e9821b2 3750 gfx_v8_0_cp_compute_enable(adev, true);
aaa36a97
AD
3751
3752 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3753 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3754
3755 ring->ready = true;
3756 r = amdgpu_ring_test_ring(ring);
3757 if (r)
3758 ring->ready = false;
3759 }
3760
3761 return 0;
3762}
3763
3764static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3765{
3766 int r;
3767
e3c7656c 3768 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
3769 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3770
e61710c5 3771 if (!adev->pp_enabled) {
ba5c2a87
RZ
3772 if (!adev->firmware.smu_load) {
3773 /* legacy firmware loading */
3774 r = gfx_v8_0_cp_gfx_load_microcode(adev);
3775 if (r)
3776 return r;
aaa36a97 3777
ba5c2a87
RZ
3778 r = gfx_v8_0_cp_compute_load_microcode(adev);
3779 if (r)
3780 return r;
3781 } else {
3782 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3783 AMDGPU_UCODE_ID_CP_CE);
3784 if (r)
3785 return -EINVAL;
3786
3787 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3788 AMDGPU_UCODE_ID_CP_PFP);
3789 if (r)
3790 return -EINVAL;
3791
3792 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3793 AMDGPU_UCODE_ID_CP_ME);
3794 if (r)
3795 return -EINVAL;
3796
951e0962
AD
3797 if (adev->asic_type == CHIP_TOPAZ) {
3798 r = gfx_v8_0_cp_compute_load_microcode(adev);
3799 if (r)
3800 return r;
3801 } else {
3802 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3803 AMDGPU_UCODE_ID_CP_MEC1);
3804 if (r)
3805 return -EINVAL;
3806 }
ba5c2a87 3807 }
aaa36a97
AD
3808 }
3809
3810 r = gfx_v8_0_cp_gfx_resume(adev);
3811 if (r)
3812 return r;
3813
3814 r = gfx_v8_0_cp_compute_resume(adev);
3815 if (r)
3816 return r;
3817
3818 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3819
3820 return 0;
3821}
3822
3823static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3824{
3825 gfx_v8_0_cp_gfx_enable(adev, enable);
3826 gfx_v8_0_cp_compute_enable(adev, enable);
3827}
3828
5fc3aeeb 3829static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
3830{
3831 int r;
5fc3aeeb 3832 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
3833
3834 gfx_v8_0_init_golden_registers(adev);
3835
3836 gfx_v8_0_gpu_init(adev);
3837
3838 r = gfx_v8_0_rlc_resume(adev);
3839 if (r)
3840 return r;
3841
3842 r = gfx_v8_0_cp_resume(adev);
3843 if (r)
3844 return r;
3845
3846 return r;
3847}
3848
5fc3aeeb 3849static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 3850{
5fc3aeeb 3851 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3852
1d22a454
AD
3853 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3854 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
aaa36a97
AD
3855 gfx_v8_0_cp_enable(adev, false);
3856 gfx_v8_0_rlc_stop(adev);
3857 gfx_v8_0_cp_compute_fini(adev);
3858
3859 return 0;
3860}
3861
5fc3aeeb 3862static int gfx_v8_0_suspend(void *handle)
aaa36a97 3863{
5fc3aeeb 3864 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3865
aaa36a97
AD
3866 return gfx_v8_0_hw_fini(adev);
3867}
3868
5fc3aeeb 3869static int gfx_v8_0_resume(void *handle)
aaa36a97 3870{
5fc3aeeb 3871 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3872
aaa36a97
AD
3873 return gfx_v8_0_hw_init(adev);
3874}
3875
5fc3aeeb 3876static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 3877{
5fc3aeeb 3878 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3879
aaa36a97
AD
3880 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3881 return false;
3882 else
3883 return true;
3884}
3885
5fc3aeeb 3886static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
3887{
3888 unsigned i;
3889 u32 tmp;
5fc3aeeb 3890 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
3891
3892 for (i = 0; i < adev->usec_timeout; i++) {
3893 /* read MC_STATUS */
3894 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3895
3896 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3897 return 0;
3898 udelay(1);
3899 }
3900 return -ETIMEDOUT;
3901}
3902
5fc3aeeb 3903static void gfx_v8_0_print_status(void *handle)
aaa36a97
AD
3904{
3905 int i;
5fc3aeeb 3906 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
3907
3908 dev_info(adev->dev, "GFX 8.x registers\n");
3909 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
3910 RREG32(mmGRBM_STATUS));
3911 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
3912 RREG32(mmGRBM_STATUS2));
3913 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3914 RREG32(mmGRBM_STATUS_SE0));
3915 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3916 RREG32(mmGRBM_STATUS_SE1));
3917 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3918 RREG32(mmGRBM_STATUS_SE2));
3919 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3920 RREG32(mmGRBM_STATUS_SE3));
3921 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3922 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3923 RREG32(mmCP_STALLED_STAT1));
3924 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3925 RREG32(mmCP_STALLED_STAT2));
3926 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3927 RREG32(mmCP_STALLED_STAT3));
3928 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3929 RREG32(mmCP_CPF_BUSY_STAT));
3930 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3931 RREG32(mmCP_CPF_STALLED_STAT1));
3932 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3933 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3934 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3935 RREG32(mmCP_CPC_STALLED_STAT1));
3936 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3937
3938 for (i = 0; i < 32; i++) {
3939 dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n",
3940 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3941 }
3942 for (i = 0; i < 16; i++) {
3943 dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n",
3944 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3945 }
3946 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3947 dev_info(adev->dev, " se: %d\n", i);
3948 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3949 dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n",
3950 RREG32(mmPA_SC_RASTER_CONFIG));
3951 dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
3952 RREG32(mmPA_SC_RASTER_CONFIG_1));
3953 }
3954 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3955
3956 dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n",
3957 RREG32(mmGB_ADDR_CONFIG));
3958 dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n",
3959 RREG32(mmHDP_ADDR_CONFIG));
3960 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
3961 RREG32(mmDMIF_ADDR_CALC));
3962 dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
3963 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
3964 dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
3965 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
3966 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
3967 RREG32(mmUVD_UDEC_ADDR_CONFIG));
3968 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
3969 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
3970 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
3971 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
3972
3973 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
3974 RREG32(mmCP_MEQ_THRESHOLDS));
3975 dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n",
3976 RREG32(mmSX_DEBUG_1));
3977 dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n",
3978 RREG32(mmTA_CNTL_AUX));
3979 dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n",
3980 RREG32(mmSPI_CONFIG_CNTL));
3981 dev_info(adev->dev, " SQ_CONFIG=0x%08X\n",
3982 RREG32(mmSQ_CONFIG));
3983 dev_info(adev->dev, " DB_DEBUG=0x%08X\n",
3984 RREG32(mmDB_DEBUG));
3985 dev_info(adev->dev, " DB_DEBUG2=0x%08X\n",
3986 RREG32(mmDB_DEBUG2));
3987 dev_info(adev->dev, " DB_DEBUG3=0x%08X\n",
3988 RREG32(mmDB_DEBUG3));
3989 dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n",
3990 RREG32(mmCB_HW_CONTROL));
3991 dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n",
3992 RREG32(mmSPI_CONFIG_CNTL_1));
3993 dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n",
3994 RREG32(mmPA_SC_FIFO_SIZE));
3995 dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n",
3996 RREG32(mmVGT_NUM_INSTANCES));
3997 dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n",
3998 RREG32(mmCP_PERFMON_CNTL));
3999 dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4000 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4001 dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n",
4002 RREG32(mmVGT_CACHE_INVALIDATION));
4003 dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n",
4004 RREG32(mmVGT_GS_VERTEX_REUSE));
4005 dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4006 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4007 dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n",
4008 RREG32(mmPA_CL_ENHANCE));
4009 dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n",
4010 RREG32(mmPA_SC_ENHANCE));
4011
4012 dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n",
4013 RREG32(mmCP_ME_CNTL));
4014 dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n",
4015 RREG32(mmCP_MAX_CONTEXT));
4016 dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n",
4017 RREG32(mmCP_ENDIAN_SWAP));
4018 dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n",
4019 RREG32(mmCP_DEVICE_ID));
4020
4021 dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n",
4022 RREG32(mmCP_SEM_WAIT_TIMER));
4023
4024 dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n",
4025 RREG32(mmCP_RB_WPTR_DELAY));
4026 dev_info(adev->dev, " CP_RB_VMID=0x%08X\n",
4027 RREG32(mmCP_RB_VMID));
4028 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4029 RREG32(mmCP_RB0_CNTL));
4030 dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n",
4031 RREG32(mmCP_RB0_WPTR));
4032 dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n",
4033 RREG32(mmCP_RB0_RPTR_ADDR));
4034 dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4035 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4036 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4037 RREG32(mmCP_RB0_CNTL));
4038 dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n",
4039 RREG32(mmCP_RB0_BASE));
4040 dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n",
4041 RREG32(mmCP_RB0_BASE_HI));
4042 dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n",
4043 RREG32(mmCP_MEC_CNTL));
4044 dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n",
4045 RREG32(mmCP_CPF_DEBUG));
4046
4047 dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n",
4048 RREG32(mmSCRATCH_ADDR));
4049 dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n",
4050 RREG32(mmSCRATCH_UMSK));
4051
4052 dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n",
4053 RREG32(mmCP_INT_CNTL_RING0));
4054 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4055 RREG32(mmRLC_LB_CNTL));
4056 dev_info(adev->dev, " RLC_CNTL=0x%08X\n",
4057 RREG32(mmRLC_CNTL));
4058 dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
4059 RREG32(mmRLC_CGCG_CGLS_CTRL));
4060 dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n",
4061 RREG32(mmRLC_LB_CNTR_INIT));
4062 dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n",
4063 RREG32(mmRLC_LB_CNTR_MAX));
4064 dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n",
4065 RREG32(mmRLC_LB_INIT_CU_MASK));
4066 dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n",
4067 RREG32(mmRLC_LB_PARAMS));
4068 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4069 RREG32(mmRLC_LB_CNTL));
4070 dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n",
4071 RREG32(mmRLC_MC_CNTL));
4072 dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n",
4073 RREG32(mmRLC_UCODE_CNTL));
4074
4075 mutex_lock(&adev->srbm_mutex);
4076 for (i = 0; i < 16; i++) {
4077 vi_srbm_select(adev, 0, 0, 0, i);
4078 dev_info(adev->dev, " VM %d:\n", i);
4079 dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n",
4080 RREG32(mmSH_MEM_CONFIG));
4081 dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n",
4082 RREG32(mmSH_MEM_APE1_BASE));
4083 dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n",
4084 RREG32(mmSH_MEM_APE1_LIMIT));
4085 dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n",
4086 RREG32(mmSH_MEM_BASES));
4087 }
4088 vi_srbm_select(adev, 0, 0, 0, 0);
4089 mutex_unlock(&adev->srbm_mutex);
4090}
4091
5fc3aeeb 4092static int gfx_v8_0_soft_reset(void *handle)
aaa36a97
AD
4093{
4094 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4095 u32 tmp;
5fc3aeeb 4096 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
4097
4098 /* GRBM_STATUS */
4099 tmp = RREG32(mmGRBM_STATUS);
4100 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4101 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4102 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4103 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4104 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4105 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4106 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4107 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4108 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4109 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4110 }
4111
4112 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4113 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4114 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4115 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4116 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4117 }
4118
4119 /* GRBM_STATUS2 */
4120 tmp = RREG32(mmGRBM_STATUS2);
4121 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4122 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4123 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4124
4125 /* SRBM_STATUS */
4126 tmp = RREG32(mmSRBM_STATUS);
4127 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4128 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4129 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4130
4131 if (grbm_soft_reset || srbm_soft_reset) {
5fc3aeeb 4132 gfx_v8_0_print_status((void *)adev);
aaa36a97
AD
4133 /* stop the rlc */
4134 gfx_v8_0_rlc_stop(adev);
4135
4136 /* Disable GFX parsing/prefetching */
4137 gfx_v8_0_cp_gfx_enable(adev, false);
4138
4139 /* Disable MEC parsing/prefetching */
7776a693
AD
4140 gfx_v8_0_cp_compute_enable(adev, false);
4141
4142 if (grbm_soft_reset || srbm_soft_reset) {
4143 tmp = RREG32(mmGMCON_DEBUG);
4144 tmp = REG_SET_FIELD(tmp,
4145 GMCON_DEBUG, GFX_STALL, 1);
4146 tmp = REG_SET_FIELD(tmp,
4147 GMCON_DEBUG, GFX_CLEAR, 1);
4148 WREG32(mmGMCON_DEBUG, tmp);
4149
4150 udelay(50);
4151 }
aaa36a97
AD
4152
4153 if (grbm_soft_reset) {
4154 tmp = RREG32(mmGRBM_SOFT_RESET);
4155 tmp |= grbm_soft_reset;
4156 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4157 WREG32(mmGRBM_SOFT_RESET, tmp);
4158 tmp = RREG32(mmGRBM_SOFT_RESET);
4159
4160 udelay(50);
4161
4162 tmp &= ~grbm_soft_reset;
4163 WREG32(mmGRBM_SOFT_RESET, tmp);
4164 tmp = RREG32(mmGRBM_SOFT_RESET);
4165 }
4166
4167 if (srbm_soft_reset) {
4168 tmp = RREG32(mmSRBM_SOFT_RESET);
4169 tmp |= srbm_soft_reset;
4170 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4171 WREG32(mmSRBM_SOFT_RESET, tmp);
4172 tmp = RREG32(mmSRBM_SOFT_RESET);
4173
4174 udelay(50);
4175
4176 tmp &= ~srbm_soft_reset;
4177 WREG32(mmSRBM_SOFT_RESET, tmp);
4178 tmp = RREG32(mmSRBM_SOFT_RESET);
4179 }
7776a693
AD
4180
4181 if (grbm_soft_reset || srbm_soft_reset) {
4182 tmp = RREG32(mmGMCON_DEBUG);
4183 tmp = REG_SET_FIELD(tmp,
4184 GMCON_DEBUG, GFX_STALL, 0);
4185 tmp = REG_SET_FIELD(tmp,
4186 GMCON_DEBUG, GFX_CLEAR, 0);
4187 WREG32(mmGMCON_DEBUG, tmp);
4188 }
4189
aaa36a97
AD
4190 /* Wait a little for things to settle down */
4191 udelay(50);
5fc3aeeb 4192 gfx_v8_0_print_status((void *)adev);
aaa36a97
AD
4193 }
4194 return 0;
4195}
4196
4197/**
4198 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4199 *
4200 * @adev: amdgpu_device pointer
4201 *
4202 * Fetches a GPU clock counter snapshot.
4203 * Returns the 64 bit clock counter snapshot.
4204 */
4205uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4206{
4207 uint64_t clock;
4208
4209 mutex_lock(&adev->gfx.gpu_clock_mutex);
4210 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4211 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4212 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4213 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4214 return clock;
4215}
4216
4217static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4218 uint32_t vmid,
4219 uint32_t gds_base, uint32_t gds_size,
4220 uint32_t gws_base, uint32_t gws_size,
4221 uint32_t oa_base, uint32_t oa_size)
4222{
4223 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4224 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4225
4226 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4227 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4228
4229 oa_base = oa_base >> AMDGPU_OA_SHIFT;
4230 oa_size = oa_size >> AMDGPU_OA_SHIFT;
4231
4232 /* GDS Base */
4233 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4234 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4235 WRITE_DATA_DST_SEL(0)));
4236 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4237 amdgpu_ring_write(ring, 0);
4238 amdgpu_ring_write(ring, gds_base);
4239
4240 /* GDS Size */
4241 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4242 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4243 WRITE_DATA_DST_SEL(0)));
4244 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4245 amdgpu_ring_write(ring, 0);
4246 amdgpu_ring_write(ring, gds_size);
4247
4248 /* GWS */
4249 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4250 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4251 WRITE_DATA_DST_SEL(0)));
4252 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4253 amdgpu_ring_write(ring, 0);
4254 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4255
4256 /* OA */
4257 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4258 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4259 WRITE_DATA_DST_SEL(0)));
4260 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4261 amdgpu_ring_write(ring, 0);
4262 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4263}
4264
5fc3aeeb 4265static int gfx_v8_0_early_init(void *handle)
aaa36a97 4266{
5fc3aeeb 4267 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
4268
4269 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4270 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4271 gfx_v8_0_set_ring_funcs(adev);
4272 gfx_v8_0_set_irq_funcs(adev);
4273 gfx_v8_0_set_gds_init(adev);
4274
4275 return 0;
4276}
4277
ccba7691
AD
4278static int gfx_v8_0_late_init(void *handle)
4279{
4280 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4281 int r;
4282
1d22a454
AD
4283 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4284 if (r)
4285 return r;
4286
4287 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4288 if (r)
4289 return r;
4290
ccba7691
AD
4291 /* requires IBs so do in late init after IB pool is initialized */
4292 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4293 if (r)
4294 return r;
4295
4296 return 0;
4297}
4298
5fc3aeeb 4299static int gfx_v8_0_set_powergating_state(void *handle,
4300 enum amd_powergating_state state)
aaa36a97
AD
4301{
4302 return 0;
4303}
4304
6e378858
EH
4305static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4306 uint32_t reg_addr, uint32_t cmd)
4307{
4308 uint32_t data;
4309
4310 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4311
4312 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4313 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4314
4315 data = RREG32(mmRLC_SERDES_WR_CTRL);
4316 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4317 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4318 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4319 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4320 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4321 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4322 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4323 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4324 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4325 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4326 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4327 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4328 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4329 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4330 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4331
4332 WREG32(mmRLC_SERDES_WR_CTRL, data);
4333}
4334
4335static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4336 bool enable)
4337{
4338 uint32_t temp, data;
4339
4340 /* It is disabled by HW by default */
4341 if (enable) {
4342 /* 1 - RLC memory Light sleep */
4343 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4344 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4345 if (temp != data)
4346 WREG32(mmRLC_MEM_SLP_CNTL, data);
4347
4348 /* 2 - CP memory Light sleep */
4349 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4350 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4351 if (temp != data)
4352 WREG32(mmCP_MEM_SLP_CNTL, data);
4353
4354 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
4355 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4356 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4357 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4358 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4359 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4360
4361 if (temp != data)
4362 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4363
4364 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4365 gfx_v8_0_wait_for_rlc_serdes(adev);
4366
4367 /* 5 - clear mgcg override */
4368 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4369
4370 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4371 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4372 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4373 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4374 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4375 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4376 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4377 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4378 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4379 if (temp != data)
4380 WREG32(mmCGTS_SM_CTRL_REG, data);
4381 udelay(50);
4382
4383 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4384 gfx_v8_0_wait_for_rlc_serdes(adev);
4385 } else {
4386 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4387 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4388 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4389 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4390 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4391 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4392 if (temp != data)
4393 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4394
4395 /* 2 - disable MGLS in RLC */
4396 data = RREG32(mmRLC_MEM_SLP_CNTL);
4397 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4398 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4399 WREG32(mmRLC_MEM_SLP_CNTL, data);
4400 }
4401
4402 /* 3 - disable MGLS in CP */
4403 data = RREG32(mmCP_MEM_SLP_CNTL);
4404 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4405 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4406 WREG32(mmCP_MEM_SLP_CNTL, data);
4407 }
4408
4409 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4410 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4411 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4412 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4413 if (temp != data)
4414 WREG32(mmCGTS_SM_CTRL_REG, data);
4415
4416 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4417 gfx_v8_0_wait_for_rlc_serdes(adev);
4418
4419 /* 6 - set mgcg override */
4420 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4421
4422 udelay(50);
4423
4424 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4425 gfx_v8_0_wait_for_rlc_serdes(adev);
4426 }
4427}
4428
4429static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4430 bool enable)
4431{
4432 uint32_t temp, temp1, data, data1;
4433
4434 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4435
4436 if (enable) {
4437 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4438 * Cmp_busy/GFX_Idle interrupts
4439 */
4440 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4441
4442 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4443 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4444 if (temp1 != data1)
4445 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4446
4447 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4448 gfx_v8_0_wait_for_rlc_serdes(adev);
4449
4450 /* 3 - clear cgcg override */
4451 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4452
4453 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4454 gfx_v8_0_wait_for_rlc_serdes(adev);
4455
4456 /* 4 - write cmd to set CGLS */
4457 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4458
4459 /* 5 - enable cgcg */
4460 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4461
4462 /* enable cgls*/
4463 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4464
4465 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4466 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4467
4468 if (temp1 != data1)
4469 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4470
4471 if (temp != data)
4472 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4473 } else {
4474 /* disable cntx_empty_int_enable & GFX Idle interrupt */
4475 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4476
4477 /* TEST CGCG */
4478 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4479 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4480 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4481 if (temp1 != data1)
4482 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4483
4484 /* read gfx register to wake up cgcg */
4485 RREG32(mmCB_CGTT_SCLK_CTRL);
4486 RREG32(mmCB_CGTT_SCLK_CTRL);
4487 RREG32(mmCB_CGTT_SCLK_CTRL);
4488 RREG32(mmCB_CGTT_SCLK_CTRL);
4489
4490 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4491 gfx_v8_0_wait_for_rlc_serdes(adev);
4492
4493 /* write cmd to Set CGCG Overrride */
4494 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4495
4496 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4497 gfx_v8_0_wait_for_rlc_serdes(adev);
4498
4499 /* write cmd to Clear CGLS */
4500 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4501
4502 /* disable cgcg, cgls should be disabled too. */
4503 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4504 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4505 if (temp != data)
4506 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4507 }
4508}
4509static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4510 bool enable)
4511{
4512 if (enable) {
4513 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4514 * === MGCG + MGLS + TS(CG/LS) ===
4515 */
4516 fiji_update_medium_grain_clock_gating(adev, enable);
4517 fiji_update_coarse_grain_clock_gating(adev, enable);
4518 } else {
4519 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4520 * === CGCG + CGLS ===
4521 */
4522 fiji_update_coarse_grain_clock_gating(adev, enable);
4523 fiji_update_medium_grain_clock_gating(adev, enable);
4524 }
4525 return 0;
4526}
4527
5fc3aeeb 4528static int gfx_v8_0_set_clockgating_state(void *handle,
4529 enum amd_clockgating_state state)
aaa36a97 4530{
6e378858
EH
4531 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4532
4533 switch (adev->asic_type) {
4534 case CHIP_FIJI:
4535 fiji_update_gfx_clock_gating(adev,
4536 state == AMD_CG_STATE_GATE ? true : false);
4537 break;
4538 default:
4539 break;
4540 }
aaa36a97
AD
4541 return 0;
4542}
4543
4544static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4545{
4546 u32 rptr;
4547
4548 rptr = ring->adev->wb.wb[ring->rptr_offs];
4549
4550 return rptr;
4551}
4552
4553static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4554{
4555 struct amdgpu_device *adev = ring->adev;
4556 u32 wptr;
4557
4558 if (ring->use_doorbell)
4559 /* XXX check if swapping is necessary on BE */
4560 wptr = ring->adev->wb.wb[ring->wptr_offs];
4561 else
4562 wptr = RREG32(mmCP_RB0_WPTR);
4563
4564 return wptr;
4565}
4566
4567static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4568{
4569 struct amdgpu_device *adev = ring->adev;
4570
4571 if (ring->use_doorbell) {
4572 /* XXX check if swapping is necessary on BE */
4573 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4574 WDOORBELL32(ring->doorbell_index, ring->wptr);
4575 } else {
4576 WREG32(mmCP_RB0_WPTR, ring->wptr);
4577 (void)RREG32(mmCP_RB0_WPTR);
4578 }
4579}
4580
d2edb07b 4581static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
4582{
4583 u32 ref_and_mask, reg_mem_engine;
4584
4585 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4586 switch (ring->me) {
4587 case 1:
4588 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4589 break;
4590 case 2:
4591 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4592 break;
4593 default:
4594 return;
4595 }
4596 reg_mem_engine = 0;
4597 } else {
4598 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4599 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4600 }
4601
4602 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4603 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4604 WAIT_REG_MEM_FUNCTION(3) | /* == */
4605 reg_mem_engine));
4606 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4607 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4608 amdgpu_ring_write(ring, ref_and_mask);
4609 amdgpu_ring_write(ring, ref_and_mask);
4610 amdgpu_ring_write(ring, 0x20); /* poll interval */
4611}
4612
93323131 4613static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
aaa36a97
AD
4614 struct amdgpu_ib *ib)
4615{
3cb485f3 4616 bool need_ctx_switch = ring->current_ctx != ib->ctx;
aaa36a97
AD
4617 u32 header, control = 0;
4618 u32 next_rptr = ring->wptr + 5;
aa2bdb24
JZ
4619
4620 /* drop the CE preamble IB for the same context */
93323131 4621 if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
aa2bdb24
JZ
4622 return;
4623
93323131 4624 if (need_ctx_switch)
aaa36a97
AD
4625 next_rptr += 2;
4626
4627 next_rptr += 4;
4628 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4629 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4630 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4631 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4632 amdgpu_ring_write(ring, next_rptr);
4633
aaa36a97 4634 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
93323131 4635 if (need_ctx_switch) {
aaa36a97
AD
4636 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4637 amdgpu_ring_write(ring, 0);
aaa36a97
AD
4638 }
4639
de807f81 4640 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
4641 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4642 else
4643 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4644
4645 control |= ib->length_dw |
4646 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4647
4648 amdgpu_ring_write(ring, header);
4649 amdgpu_ring_write(ring,
4650#ifdef __BIG_ENDIAN
4651 (2 << 0) |
4652#endif
4653 (ib->gpu_addr & 0xFFFFFFFC));
4654 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4655 amdgpu_ring_write(ring, control);
4656}
4657
93323131 4658static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4659 struct amdgpu_ib *ib)
4660{
4661 u32 header, control = 0;
4662 u32 next_rptr = ring->wptr + 5;
4663
4664 control |= INDIRECT_BUFFER_VALID;
4665
4666 next_rptr += 4;
4667 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4668 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4669 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4670 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4671 amdgpu_ring_write(ring, next_rptr);
4672
4673 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4674
4675 control |= ib->length_dw |
4676 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4677
4678 amdgpu_ring_write(ring, header);
4679 amdgpu_ring_write(ring,
4680#ifdef __BIG_ENDIAN
4681 (2 << 0) |
4682#endif
4683 (ib->gpu_addr & 0xFFFFFFFC));
4684 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4685 amdgpu_ring_write(ring, control);
4686}
4687
aaa36a97 4688static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 4689 u64 seq, unsigned flags)
aaa36a97 4690{
890ee23f
CZ
4691 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4692 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4693
aaa36a97
AD
4694 /* EVENT_WRITE_EOP - flush caches, send int */
4695 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4696 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4697 EOP_TC_ACTION_EN |
4698 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4699 EVENT_INDEX(5)));
4700 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 4701 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 4702 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
4703 amdgpu_ring_write(ring, lower_32_bits(seq));
4704 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 4705
aaa36a97
AD
4706}
4707
aaa36a97
AD
4708static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4709 unsigned vm_id, uint64_t pd_addr)
4710{
4711 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 4712 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
4713 uint64_t addr = ring->fence_drv.gpu_addr;
4714
4715 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4716 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4717 WAIT_REG_MEM_FUNCTION(3))); /* equal */
4718 amdgpu_ring_write(ring, addr & 0xfffffffc);
4719 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4720 amdgpu_ring_write(ring, seq);
4721 amdgpu_ring_write(ring, 0xffffffff);
4722 amdgpu_ring_write(ring, 4); /* poll interval */
aaa36a97 4723
5c3422b0 4724 if (usepfp) {
4725 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4726 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4727 amdgpu_ring_write(ring, 0);
4728 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4729 amdgpu_ring_write(ring, 0);
4730 }
4731
aaa36a97
AD
4732 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4733 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
4734 WRITE_DATA_DST_SEL(0)) |
4735 WR_CONFIRM);
aaa36a97
AD
4736 if (vm_id < 8) {
4737 amdgpu_ring_write(ring,
4738 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4739 } else {
4740 amdgpu_ring_write(ring,
4741 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4742 }
4743 amdgpu_ring_write(ring, 0);
4744 amdgpu_ring_write(ring, pd_addr >> 12);
4745
aaa36a97
AD
4746 /* bits 0-15 are the VM contexts0-15 */
4747 /* invalidate the cache */
4748 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4749 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4750 WRITE_DATA_DST_SEL(0)));
4751 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4752 amdgpu_ring_write(ring, 0);
4753 amdgpu_ring_write(ring, 1 << vm_id);
4754
4755 /* wait for the invalidate to complete */
4756 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4757 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4758 WAIT_REG_MEM_FUNCTION(0) | /* always */
4759 WAIT_REG_MEM_ENGINE(0))); /* me */
4760 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4761 amdgpu_ring_write(ring, 0);
4762 amdgpu_ring_write(ring, 0); /* ref */
4763 amdgpu_ring_write(ring, 0); /* mask */
4764 amdgpu_ring_write(ring, 0x20); /* poll interval */
4765
4766 /* compute doesn't have PFP */
4767 if (usepfp) {
4768 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4769 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4770 amdgpu_ring_write(ring, 0x0);
5c3422b0 4771 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4772 amdgpu_ring_write(ring, 0);
4773 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4774 amdgpu_ring_write(ring, 0);
aaa36a97
AD
4775 }
4776}
4777
aaa36a97
AD
4778static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4779{
4780 return ring->adev->wb.wb[ring->rptr_offs];
4781}
4782
4783static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4784{
4785 return ring->adev->wb.wb[ring->wptr_offs];
4786}
4787
4788static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4789{
4790 struct amdgpu_device *adev = ring->adev;
4791
4792 /* XXX check if swapping is necessary on BE */
4793 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4794 WDOORBELL32(ring->doorbell_index, ring->wptr);
4795}
4796
4797static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4798 u64 addr, u64 seq,
890ee23f 4799 unsigned flags)
aaa36a97 4800{
890ee23f
CZ
4801 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4802 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4803
aaa36a97
AD
4804 /* RELEASE_MEM - flush caches, send int */
4805 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4806 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4807 EOP_TC_ACTION_EN |
a3d5aaa8 4808 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
4809 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4810 EVENT_INDEX(5)));
890ee23f 4811 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
4812 amdgpu_ring_write(ring, addr & 0xfffffffc);
4813 amdgpu_ring_write(ring, upper_32_bits(addr));
4814 amdgpu_ring_write(ring, lower_32_bits(seq));
4815 amdgpu_ring_write(ring, upper_32_bits(seq));
4816}
4817
4818static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4819 enum amdgpu_interrupt_state state)
4820{
4821 u32 cp_int_cntl;
4822
4823 switch (state) {
4824 case AMDGPU_IRQ_STATE_DISABLE:
4825 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4826 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4827 TIME_STAMP_INT_ENABLE, 0);
4828 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4829 break;
4830 case AMDGPU_IRQ_STATE_ENABLE:
4831 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4832 cp_int_cntl =
4833 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4834 TIME_STAMP_INT_ENABLE, 1);
4835 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4836 break;
4837 default:
4838 break;
4839 }
4840}
4841
4842static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4843 int me, int pipe,
4844 enum amdgpu_interrupt_state state)
4845{
4846 u32 mec_int_cntl, mec_int_cntl_reg;
4847
4848 /*
4849 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4850 * handles the setting of interrupts for this specific pipe. All other
4851 * pipes' interrupts are set by amdkfd.
4852 */
4853
4854 if (me == 1) {
4855 switch (pipe) {
4856 case 0:
4857 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4858 break;
4859 default:
4860 DRM_DEBUG("invalid pipe %d\n", pipe);
4861 return;
4862 }
4863 } else {
4864 DRM_DEBUG("invalid me %d\n", me);
4865 return;
4866 }
4867
4868 switch (state) {
4869 case AMDGPU_IRQ_STATE_DISABLE:
4870 mec_int_cntl = RREG32(mec_int_cntl_reg);
4871 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4872 TIME_STAMP_INT_ENABLE, 0);
4873 WREG32(mec_int_cntl_reg, mec_int_cntl);
4874 break;
4875 case AMDGPU_IRQ_STATE_ENABLE:
4876 mec_int_cntl = RREG32(mec_int_cntl_reg);
4877 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4878 TIME_STAMP_INT_ENABLE, 1);
4879 WREG32(mec_int_cntl_reg, mec_int_cntl);
4880 break;
4881 default:
4882 break;
4883 }
4884}
4885
4886static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4887 struct amdgpu_irq_src *source,
4888 unsigned type,
4889 enum amdgpu_interrupt_state state)
4890{
4891 u32 cp_int_cntl;
4892
4893 switch (state) {
4894 case AMDGPU_IRQ_STATE_DISABLE:
4895 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4896 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4897 PRIV_REG_INT_ENABLE, 0);
4898 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4899 break;
4900 case AMDGPU_IRQ_STATE_ENABLE:
4901 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4902 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4903 PRIV_REG_INT_ENABLE, 0);
4904 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4905 break;
4906 default:
4907 break;
4908 }
4909
4910 return 0;
4911}
4912
4913static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4914 struct amdgpu_irq_src *source,
4915 unsigned type,
4916 enum amdgpu_interrupt_state state)
4917{
4918 u32 cp_int_cntl;
4919
4920 switch (state) {
4921 case AMDGPU_IRQ_STATE_DISABLE:
4922 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4923 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4924 PRIV_INSTR_INT_ENABLE, 0);
4925 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4926 break;
4927 case AMDGPU_IRQ_STATE_ENABLE:
4928 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4929 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4930 PRIV_INSTR_INT_ENABLE, 1);
4931 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4932 break;
4933 default:
4934 break;
4935 }
4936
4937 return 0;
4938}
4939
4940static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4941 struct amdgpu_irq_src *src,
4942 unsigned type,
4943 enum amdgpu_interrupt_state state)
4944{
4945 switch (type) {
4946 case AMDGPU_CP_IRQ_GFX_EOP:
4947 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4948 break;
4949 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4950 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4951 break;
4952 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4953 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4954 break;
4955 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4956 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4957 break;
4958 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4959 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4960 break;
4961 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4962 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4963 break;
4964 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4965 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4966 break;
4967 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4968 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4969 break;
4970 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4971 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4972 break;
4973 default:
4974 break;
4975 }
4976 return 0;
4977}
4978
4979static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4980 struct amdgpu_irq_src *source,
4981 struct amdgpu_iv_entry *entry)
4982{
4983 int i;
4984 u8 me_id, pipe_id, queue_id;
4985 struct amdgpu_ring *ring;
4986
4987 DRM_DEBUG("IH: CP EOP\n");
4988 me_id = (entry->ring_id & 0x0c) >> 2;
4989 pipe_id = (entry->ring_id & 0x03) >> 0;
4990 queue_id = (entry->ring_id & 0x70) >> 4;
4991
4992 switch (me_id) {
4993 case 0:
4994 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4995 break;
4996 case 1:
4997 case 2:
4998 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4999 ring = &adev->gfx.compute_ring[i];
5000 /* Per-queue interrupt is supported for MEC starting from VI.
5001 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5002 */
5003 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5004 amdgpu_fence_process(ring);
5005 }
5006 break;
5007 }
5008 return 0;
5009}
5010
5011static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5012 struct amdgpu_irq_src *source,
5013 struct amdgpu_iv_entry *entry)
5014{
5015 DRM_ERROR("Illegal register access in command stream\n");
5016 schedule_work(&adev->reset_work);
5017 return 0;
5018}
5019
5020static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5021 struct amdgpu_irq_src *source,
5022 struct amdgpu_iv_entry *entry)
5023{
5024 DRM_ERROR("Illegal instruction in command stream\n");
5025 schedule_work(&adev->reset_work);
5026 return 0;
5027}
5028
5fc3aeeb 5029const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
aaa36a97 5030 .early_init = gfx_v8_0_early_init,
ccba7691 5031 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
5032 .sw_init = gfx_v8_0_sw_init,
5033 .sw_fini = gfx_v8_0_sw_fini,
5034 .hw_init = gfx_v8_0_hw_init,
5035 .hw_fini = gfx_v8_0_hw_fini,
5036 .suspend = gfx_v8_0_suspend,
5037 .resume = gfx_v8_0_resume,
5038 .is_idle = gfx_v8_0_is_idle,
5039 .wait_for_idle = gfx_v8_0_wait_for_idle,
5040 .soft_reset = gfx_v8_0_soft_reset,
5041 .print_status = gfx_v8_0_print_status,
5042 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5043 .set_powergating_state = gfx_v8_0_set_powergating_state,
5044};
5045
5046static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5047 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5048 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5049 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5050 .parse_cs = NULL,
93323131 5051 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 5052 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
aaa36a97
AD
5053 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5054 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 5055 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
aaa36a97
AD
5056 .test_ring = gfx_v8_0_ring_test_ring,
5057 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 5058 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 5059 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
5060};
5061
5062static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5063 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5064 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5065 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5066 .parse_cs = NULL,
93323131 5067 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 5068 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
aaa36a97
AD
5069 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5070 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 5071 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
aaa36a97
AD
5072 .test_ring = gfx_v8_0_ring_test_ring,
5073 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 5074 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 5075 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
5076};
5077
5078static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5079{
5080 int i;
5081
5082 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5083 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5084
5085 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5086 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5087}
5088
5089static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5090 .set = gfx_v8_0_set_eop_interrupt_state,
5091 .process = gfx_v8_0_eop_irq,
5092};
5093
5094static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5095 .set = gfx_v8_0_set_priv_reg_fault_state,
5096 .process = gfx_v8_0_priv_reg_irq,
5097};
5098
5099static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5100 .set = gfx_v8_0_set_priv_inst_fault_state,
5101 .process = gfx_v8_0_priv_inst_irq,
5102};
5103
5104static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5105{
5106 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5107 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5108
5109 adev->gfx.priv_reg_irq.num_types = 1;
5110 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5111
5112 adev->gfx.priv_inst_irq.num_types = 1;
5113 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5114}
5115
5116static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5117{
5118 /* init asci gds info */
5119 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5120 adev->gds.gws.total_size = 64;
5121 adev->gds.oa.total_size = 16;
5122
5123 if (adev->gds.mem.total_size == 64 * 1024) {
5124 adev->gds.mem.gfx_partition_size = 4096;
5125 adev->gds.mem.cs_partition_size = 4096;
5126
5127 adev->gds.gws.gfx_partition_size = 4;
5128 adev->gds.gws.cs_partition_size = 4;
5129
5130 adev->gds.oa.gfx_partition_size = 4;
5131 adev->gds.oa.cs_partition_size = 1;
5132 } else {
5133 adev->gds.mem.gfx_partition_size = 1024;
5134 adev->gds.mem.cs_partition_size = 1024;
5135
5136 adev->gds.gws.gfx_partition_size = 16;
5137 adev->gds.gws.cs_partition_size = 16;
5138
5139 adev->gds.oa.gfx_partition_size = 4;
5140 adev->gds.oa.cs_partition_size = 4;
5141 }
5142}
5143
8f8e00c1 5144static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 5145{
8f8e00c1 5146 u32 data, mask;
aaa36a97 5147
8f8e00c1
AD
5148 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5149 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 5150
8f8e00c1
AD
5151 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5152 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
aaa36a97 5153
8f8e00c1
AD
5154 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
5155 adev->gfx.config.max_sh_per_se);
aaa36a97 5156
8f8e00c1 5157 return (~data) & mask;
aaa36a97
AD
5158}
5159
5160int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
8f8e00c1 5161 struct amdgpu_cu_info *cu_info)
aaa36a97
AD
5162{
5163 int i, j, k, counter, active_cu_number = 0;
5164 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5165
5166 if (!adev || !cu_info)
5167 return -EINVAL;
5168
5169 mutex_lock(&adev->grbm_idx_mutex);
5170 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5171 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5172 mask = 1;
5173 ao_bitmap = 0;
5174 counter = 0;
8f8e00c1
AD
5175 gfx_v8_0_select_se_sh(adev, i, j);
5176 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
5177 cu_info->bitmap[i][j] = bitmap;
5178
8f8e00c1 5179 for (k = 0; k < 16; k ++) {
aaa36a97
AD
5180 if (bitmap & mask) {
5181 if (counter < 2)
5182 ao_bitmap |= mask;
5183 counter ++;
5184 }
5185 mask <<= 1;
5186 }
5187 active_cu_number += counter;
5188 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5189 }
5190 }
8f8e00c1
AD
5191 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5192 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
5193
5194 cu_info->number = active_cu_number;
5195 cu_info->ao_cu_mask = ao_cu_mask;
8f8e00c1 5196
aaa36a97
AD
5197 return 0;
5198}
This page took 0.291177 seconds and 5 git commands to generate.