i7core_edac: Fix refcount error at PCI devices
[deliverable/linux.git] / drivers / edac / i7core_edac.c
CommitLineData
52707f91
MCC
1/* Intel i7 core/Nehalem Memory Controller kernel module
2 *
3 * This driver supports yhe memory controllers found on the Intel
4 * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5 * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6 * and Westmere-EP.
a0c36a1f
MCC
7 *
8 * This file may be distributed under the terms of the
9 * GNU General Public License version 2 only.
10 *
52707f91 11 * Copyright (c) 2009-2010 by:
a0c36a1f
MCC
12 * Mauro Carvalho Chehab <mchehab@redhat.com>
13 *
14 * Red Hat Inc. http://www.redhat.com
15 *
16 * Forked and adapted from the i5400_edac driver
17 *
18 * Based on the following public Intel datasheets:
19 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20 * Datasheet, Volume 2:
21 * http://download.intel.com/design/processor/datashts/320835.pdf
22 * Intel Xeon Processor 5500 Series Datasheet Volume 2
23 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24 * also available at:
25 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26 */
27
a0c36a1f
MCC
28#include <linux/module.h>
29#include <linux/init.h>
30#include <linux/pci.h>
31#include <linux/pci_ids.h>
32#include <linux/slab.h>
3b918c12 33#include <linux/delay.h>
a0c36a1f
MCC
34#include <linux/edac.h>
35#include <linux/mmzone.h>
d5381642 36#include <linux/edac_mce.h>
f4742949 37#include <linux/smp.h>
14d2c083 38#include <asm/processor.h>
a0c36a1f
MCC
39
40#include "edac_core.h"
41
18c29002
MCC
42/* Static vars */
43static LIST_HEAD(i7core_edac_list);
44static DEFINE_MUTEX(i7core_edac_lock);
45static int probed;
46
54a08ab1
MCC
47static int use_pci_fixup;
48module_param(use_pci_fixup, int, 0444);
49MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
f4742949
MCC
50/*
51 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52 * registers start at bus 255, and are not reported by BIOS.
53 * We currently find devices with only 2 sockets. In order to support more QPI
54 * Quick Path Interconnect, just increment this number.
55 */
56#define MAX_SOCKET_BUSES 2
57
58
a0c36a1f
MCC
59/*
60 * Alter this version for the module when modifications are made
61 */
62#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
63#define EDAC_MOD_STR "i7core_edac"
64
a0c36a1f
MCC
65/*
66 * Debug macros
67 */
68#define i7core_printk(level, fmt, arg...) \
69 edac_printk(level, "i7core", fmt, ##arg)
70
71#define i7core_mc_printk(mci, level, fmt, arg...) \
72 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
73
74/*
75 * i7core Memory Controller Registers
76 */
77
e9bd2e73
MCC
78 /* OFFSETS for Device 0 Function 0 */
79
80#define MC_CFG_CONTROL 0x90
81
a0c36a1f
MCC
82 /* OFFSETS for Device 3 Function 0 */
83
84#define MC_CONTROL 0x48
85#define MC_STATUS 0x4c
86#define MC_MAX_DOD 0x64
87
442305b1
MCC
88/*
89 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
90 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
91 */
92
93#define MC_TEST_ERR_RCV1 0x60
94 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
95
96#define MC_TEST_ERR_RCV0 0x64
97 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
98 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
99
b4e8f0b6
MCC
100/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
101#define MC_COR_ECC_CNT_0 0x80
102#define MC_COR_ECC_CNT_1 0x84
103#define MC_COR_ECC_CNT_2 0x88
104#define MC_COR_ECC_CNT_3 0x8c
105#define MC_COR_ECC_CNT_4 0x90
106#define MC_COR_ECC_CNT_5 0x94
107
108#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
109#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
110
111
a0c36a1f
MCC
112 /* OFFSETS for Devices 4,5 and 6 Function 0 */
113
0b2b7b7e
MCC
114#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
115 #define THREE_DIMMS_PRESENT (1 << 24)
116 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
117 #define QUAD_RANK_PRESENT (1 << 22)
118 #define REGISTERED_DIMM (1 << 15)
119
f122a892
MCC
120#define MC_CHANNEL_MAPPER 0x60
121 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
122 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
123
0b2b7b7e
MCC
124#define MC_CHANNEL_RANK_PRESENT 0x7c
125 #define RANK_PRESENT_MASK 0xffff
126
a0c36a1f 127#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
128#define MC_CHANNEL_ERROR_MASK 0xf8
129#define MC_CHANNEL_ERROR_INJECT 0xfc
130 #define INJECT_ADDR_PARITY 0x10
131 #define INJECT_ECC 0x08
132 #define MASK_CACHELINE 0x06
133 #define MASK_FULL_CACHELINE 0x06
134 #define MASK_MSB32_CACHELINE 0x04
135 #define MASK_LSB32_CACHELINE 0x02
136 #define NO_MASK_CACHELINE 0x00
137 #define REPEAT_EN 0x01
a0c36a1f 138
0b2b7b7e 139 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 140
0b2b7b7e
MCC
141#define MC_DOD_CH_DIMM0 0x48
142#define MC_DOD_CH_DIMM1 0x4c
143#define MC_DOD_CH_DIMM2 0x50
144 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
145 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
146 #define DIMM_PRESENT_MASK (1 << 9)
147 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
148 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
149 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
150 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
151 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 152 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 153 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
154 #define MC_DOD_NUMCOL_MASK 3
155 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 156
f122a892
MCC
157#define MC_RANK_PRESENT 0x7c
158
0b2b7b7e
MCC
159#define MC_SAG_CH_0 0x80
160#define MC_SAG_CH_1 0x84
161#define MC_SAG_CH_2 0x88
162#define MC_SAG_CH_3 0x8c
163#define MC_SAG_CH_4 0x90
164#define MC_SAG_CH_5 0x94
165#define MC_SAG_CH_6 0x98
166#define MC_SAG_CH_7 0x9c
167
168#define MC_RIR_LIMIT_CH_0 0x40
169#define MC_RIR_LIMIT_CH_1 0x44
170#define MC_RIR_LIMIT_CH_2 0x48
171#define MC_RIR_LIMIT_CH_3 0x4C
172#define MC_RIR_LIMIT_CH_4 0x50
173#define MC_RIR_LIMIT_CH_5 0x54
174#define MC_RIR_LIMIT_CH_6 0x58
175#define MC_RIR_LIMIT_CH_7 0x5C
176#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
177
178#define MC_RIR_WAY_CH 0x80
179 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
180 #define MC_RIR_WAY_RANK_MASK 0x7
181
a0c36a1f
MCC
182/*
183 * i7core structs
184 */
185
186#define NUM_CHANS 3
442305b1
MCC
187#define MAX_DIMMS 3 /* Max DIMMS per channel */
188#define MAX_MCR_FUNC 4
189#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
190
191struct i7core_info {
192 u32 mc_control;
193 u32 mc_status;
194 u32 max_dod;
f122a892 195 u32 ch_map;
a0c36a1f
MCC
196};
197
194a40fe
MCC
198
199struct i7core_inject {
200 int enable;
201
202 u32 section;
203 u32 type;
204 u32 eccmask;
205
206 /* Error address mask */
207 int channel, dimm, rank, bank, page, col;
208};
209
0b2b7b7e 210struct i7core_channel {
442305b1
MCC
211 u32 ranks;
212 u32 dimms;
0b2b7b7e
MCC
213};
214
8f331907 215struct pci_id_descr {
66607706
MCC
216 int dev;
217 int func;
218 int dev_id;
de06eeef 219 int optional;
8f331907
MCC
220};
221
bd9e19ca 222struct pci_id_table {
1288c18f
MCC
223 const struct pci_id_descr *descr;
224 int n_devs;
bd9e19ca
VM
225};
226
f4742949
MCC
227struct i7core_dev {
228 struct list_head list;
229 u8 socket;
230 struct pci_dev **pdev;
de06eeef 231 int n_devs;
f4742949
MCC
232 struct mem_ctl_info *mci;
233};
234
a0c36a1f 235struct i7core_pvt {
f4742949
MCC
236 struct pci_dev *pci_noncore;
237 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
238 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
239
240 struct i7core_dev *i7core_dev;
67166af4 241
a0c36a1f 242 struct i7core_info info;
194a40fe 243 struct i7core_inject inject;
f4742949 244 struct i7core_channel channel[NUM_CHANS];
67166af4 245
f4742949
MCC
246 int ce_count_available;
247 int csrow_map[NUM_CHANS][MAX_DIMMS];
b4e8f0b6
MCC
248
249 /* ECC corrected errors counts per udimm */
f4742949
MCC
250 unsigned long udimm_ce_count[MAX_DIMMS];
251 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 252 /* ECC corrected errors counts per rdimm */
f4742949
MCC
253 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
254 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 255
f4742949 256 unsigned int is_registered;
14d2c083 257
d5381642
MCC
258 /* mcelog glue */
259 struct edac_mce edac_mce;
ca9c90ba
MCC
260
261 /* Fifo double buffers */
d5381642 262 struct mce mce_entry[MCE_LOG_LEN];
ca9c90ba
MCC
263 struct mce mce_outentry[MCE_LOG_LEN];
264
265 /* Fifo in/out counters */
266 unsigned mce_in, mce_out;
267
268 /* Count indicator to show errors not got */
269 unsigned mce_overrun;
939747bd
MCC
270
271 /* Struct to control EDAC polling */
272 struct edac_pci_ctl_info *i7core_pci;
a0c36a1f
MCC
273};
274
8f331907
MCC
275#define PCI_DESCR(device, function, device_id) \
276 .dev = (device), \
277 .func = (function), \
278 .dev_id = (device_id)
279
1288c18f 280static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
8f331907
MCC
281 /* Memory controller */
282 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
283 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
79daef20
MCC
284
285 /* Exists only for RDIMM */
de06eeef 286 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
8f331907
MCC
287 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
288
289 /* Channel 0 */
290 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
291 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
292 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
293 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
294
295 /* Channel 1 */
296 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
297 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
298 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
299 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
300
301 /* Channel 2 */
302 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
303 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
304 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
305 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
a0c36a1f 306};
8f331907 307
1288c18f 308static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
52a2e4fc
MCC
309 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR) },
310 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD) },
311 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST) },
312
313 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL) },
314 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR) },
315 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK) },
316 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC) },
317
508fa179
MCC
318 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL) },
319 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR) },
320 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK) },
321 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC) },
52a2e4fc
MCC
322};
323
1288c18f 324static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
bd9e19ca
VM
325 /* Memory controller */
326 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2) },
327 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2) },
328 /* Exists only for RDIMM */
329 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1 },
330 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2) },
331
332 /* Channel 0 */
333 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2) },
334 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2) },
335 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2) },
336 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2) },
337
338 /* Channel 1 */
339 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2) },
340 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2) },
341 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2) },
342 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2) },
343
344 /* Channel 2 */
345 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2) },
346 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2) },
347 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2) },
348 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2) },
bd9e19ca
VM
349};
350
1288c18f
MCC
351#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
352static const struct pci_id_table pci_dev_table[] = {
bd9e19ca
VM
353 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
354 PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
355 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
356};
357
8f331907
MCC
358/*
359 * pci_device_id table for which devices we are looking for
8f331907
MCC
360 */
361static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
d1fd4fb6 362 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
f05da2f7 363 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0)},
8f331907
MCC
364 {0,} /* 0 terminated list. */
365};
366
a0c36a1f
MCC
367/****************************************************************************
368 Anciliary status routines
369 ****************************************************************************/
370
371 /* MC_CONTROL bits */
ef708b53
MCC
372#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
373#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
374
375 /* MC_STATUS bits */
61053fde 376#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 377#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
378
379 /* MC_MAX_DOD read functions */
854d3349 380static inline int numdimms(u32 dimms)
a0c36a1f 381{
854d3349 382 return (dimms & 0x3) + 1;
a0c36a1f
MCC
383}
384
854d3349 385static inline int numrank(u32 rank)
a0c36a1f
MCC
386{
387 static int ranks[4] = { 1, 2, 4, -EINVAL };
388
854d3349 389 return ranks[rank & 0x3];
a0c36a1f
MCC
390}
391
854d3349 392static inline int numbank(u32 bank)
a0c36a1f
MCC
393{
394 static int banks[4] = { 4, 8, 16, -EINVAL };
395
854d3349 396 return banks[bank & 0x3];
a0c36a1f
MCC
397}
398
854d3349 399static inline int numrow(u32 row)
a0c36a1f
MCC
400{
401 static int rows[8] = {
402 1 << 12, 1 << 13, 1 << 14, 1 << 15,
403 1 << 16, -EINVAL, -EINVAL, -EINVAL,
404 };
405
854d3349 406 return rows[row & 0x7];
a0c36a1f
MCC
407}
408
854d3349 409static inline int numcol(u32 col)
a0c36a1f
MCC
410{
411 static int cols[8] = {
412 1 << 10, 1 << 11, 1 << 12, -EINVAL,
413 };
854d3349 414 return cols[col & 0x3];
a0c36a1f
MCC
415}
416
f4742949 417static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
418{
419 struct i7core_dev *i7core_dev;
420
421 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
422 if (i7core_dev->socket == socket)
423 return i7core_dev;
424 }
425
426 return NULL;
427}
428
848b2f7e
HS
429static struct i7core_dev *alloc_i7core_dev(u8 socket,
430 const struct pci_id_table *table)
431{
432 struct i7core_dev *i7core_dev;
433
434 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
435 if (!i7core_dev)
436 return NULL;
437
438 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
439 GFP_KERNEL);
440 if (!i7core_dev->pdev) {
441 kfree(i7core_dev);
442 return NULL;
443 }
444
445 i7core_dev->socket = socket;
446 i7core_dev->n_devs = table->n_devs;
447 list_add_tail(&i7core_dev->list, &i7core_edac_list);
448
449 return i7core_dev;
450}
451
2aa9be44
HS
452static void free_i7core_dev(struct i7core_dev *i7core_dev)
453{
454 list_del(&i7core_dev->list);
455 kfree(i7core_dev->pdev);
456 kfree(i7core_dev);
457}
458
a0c36a1f
MCC
459/****************************************************************************
460 Memory check routines
461 ****************************************************************************/
67166af4
MCC
462static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
463 unsigned func)
ef708b53 464{
66607706 465 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
ef708b53 466 int i;
ef708b53 467
66607706
MCC
468 if (!i7core_dev)
469 return NULL;
470
de06eeef 471 for (i = 0; i < i7core_dev->n_devs; i++) {
66607706 472 if (!i7core_dev->pdev[i])
ef708b53
MCC
473 continue;
474
66607706
MCC
475 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
476 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
477 return i7core_dev->pdev[i];
ef708b53
MCC
478 }
479 }
480
eb94fc40
MCC
481 return NULL;
482}
483
ec6df24c
MCC
484/**
485 * i7core_get_active_channels() - gets the number of channels and csrows
486 * @socket: Quick Path Interconnect socket
487 * @channels: Number of channels that will be returned
488 * @csrows: Number of csrows found
489 *
490 * Since EDAC core needs to know in advance the number of available channels
491 * and csrows, in order to allocate memory for csrows/channels, it is needed
492 * to run two similar steps. At the first step, implemented on this function,
493 * it checks the number of csrows/channels present at one socket.
494 * this is used in order to properly allocate the size of mci components.
495 *
496 * It should be noticed that none of the current available datasheets explain
497 * or even mention how csrows are seen by the memory controller. So, we need
498 * to add a fake description for csrows.
499 * So, this driver is attributing one DIMM memory for one csrow.
500 */
1288c18f 501static int i7core_get_active_channels(const u8 socket, unsigned *channels,
67166af4 502 unsigned *csrows)
eb94fc40
MCC
503{
504 struct pci_dev *pdev = NULL;
505 int i, j;
506 u32 status, control;
507
508 *channels = 0;
509 *csrows = 0;
510
67166af4 511 pdev = get_pdev_slot_func(socket, 3, 0);
b7c76151 512 if (!pdev) {
67166af4
MCC
513 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
514 socket);
ef708b53 515 return -ENODEV;
b7c76151 516 }
ef708b53
MCC
517
518 /* Device 3 function 0 reads */
519 pci_read_config_dword(pdev, MC_STATUS, &status);
520 pci_read_config_dword(pdev, MC_CONTROL, &control);
521
522 for (i = 0; i < NUM_CHANS; i++) {
eb94fc40 523 u32 dimm_dod[3];
ef708b53
MCC
524 /* Check if the channel is active */
525 if (!(control & (1 << (8 + i))))
526 continue;
527
528 /* Check if the channel is disabled */
41fcb7fe 529 if (status & (1 << i))
ef708b53 530 continue;
ef708b53 531
67166af4 532 pdev = get_pdev_slot_func(socket, i + 4, 1);
eb94fc40 533 if (!pdev) {
67166af4
MCC
534 i7core_printk(KERN_ERR, "Couldn't find socket %d "
535 "fn %d.%d!!!\n",
536 socket, i + 4, 1);
eb94fc40
MCC
537 return -ENODEV;
538 }
539 /* Devices 4-6 function 1 */
540 pci_read_config_dword(pdev,
541 MC_DOD_CH_DIMM0, &dimm_dod[0]);
542 pci_read_config_dword(pdev,
543 MC_DOD_CH_DIMM1, &dimm_dod[1]);
544 pci_read_config_dword(pdev,
545 MC_DOD_CH_DIMM2, &dimm_dod[2]);
546
ef708b53 547 (*channels)++;
eb94fc40
MCC
548
549 for (j = 0; j < 3; j++) {
550 if (!DIMM_PRESENT(dimm_dod[j]))
551 continue;
552 (*csrows)++;
553 }
ef708b53
MCC
554 }
555
c77720b9 556 debugf0("Number of active channels on socket %d: %d\n",
67166af4 557 socket, *channels);
1c6fed80 558
ef708b53
MCC
559 return 0;
560}
561
2e5185f7 562static int get_dimm_config(const struct mem_ctl_info *mci)
a0c36a1f
MCC
563{
564 struct i7core_pvt *pvt = mci->pvt_info;
1c6fed80 565 struct csrow_info *csr;
854d3349 566 struct pci_dev *pdev;
ba6c5c62 567 int i, j;
2e5185f7 568 int csrow = 0;
5566cb7c 569 unsigned long last_page = 0;
1c6fed80 570 enum edac_type mode;
854d3349 571 enum mem_type mtype;
a0c36a1f 572
854d3349 573 /* Get data from the MC register, function 0 */
f4742949 574 pdev = pvt->pci_mcr[0];
7dd6953c 575 if (!pdev)
8f331907
MCC
576 return -ENODEV;
577
f122a892 578 /* Device 3 function 0 reads */
7dd6953c
MCC
579 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
580 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
581 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
582 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 583
17cb7b0c 584 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
4af91889 585 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 586 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 587
1c6fed80 588 if (ECC_ENABLED(pvt)) {
41fcb7fe 589 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
590 if (ECCx8(pvt))
591 mode = EDAC_S8ECD8ED;
592 else
593 mode = EDAC_S4ECD4ED;
594 } else {
a0c36a1f 595 debugf0("ECC disabled\n");
1c6fed80
MCC
596 mode = EDAC_NONE;
597 }
a0c36a1f
MCC
598
599 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
600 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
601 "x%x x 0x%x\n",
854d3349
MCC
602 numdimms(pvt->info.max_dod),
603 numrank(pvt->info.max_dod >> 2),
276b824c 604 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
605 numrow(pvt->info.max_dod >> 6),
606 numcol(pvt->info.max_dod >> 9));
a0c36a1f 607
0b2b7b7e 608 for (i = 0; i < NUM_CHANS; i++) {
854d3349 609 u32 data, dimm_dod[3], value[8];
0b2b7b7e 610
52a2e4fc
MCC
611 if (!pvt->pci_ch[i][0])
612 continue;
613
0b2b7b7e
MCC
614 if (!CH_ACTIVE(pvt, i)) {
615 debugf0("Channel %i is not active\n", i);
616 continue;
617 }
618 if (CH_DISABLED(pvt, i)) {
619 debugf0("Channel %i is disabled\n", i);
620 continue;
621 }
622
f122a892 623 /* Devices 4-6 function 0 */
f4742949 624 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
625 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
626
f4742949 627 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
67166af4 628 4 : 2;
0b2b7b7e 629
854d3349
MCC
630 if (data & REGISTERED_DIMM)
631 mtype = MEM_RDDR3;
14d2c083 632 else
854d3349
MCC
633 mtype = MEM_DDR3;
634#if 0
0b2b7b7e
MCC
635 if (data & THREE_DIMMS_PRESENT)
636 pvt->channel[i].dimms = 3;
637 else if (data & SINGLE_QUAD_RANK_PRESENT)
638 pvt->channel[i].dimms = 1;
639 else
640 pvt->channel[i].dimms = 2;
854d3349
MCC
641#endif
642
643 /* Devices 4-6 function 1 */
f4742949 644 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 645 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 646 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 647 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 648 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 649 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 650
1c6fed80 651 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
854d3349 652 "%d ranks, %cDIMMs\n",
1c6fed80
MCC
653 i,
654 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
655 data,
f4742949 656 pvt->channel[i].ranks,
41fcb7fe 657 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
658
659 for (j = 0; j < 3; j++) {
660 u32 banks, ranks, rows, cols;
5566cb7c 661 u32 size, npages;
854d3349
MCC
662
663 if (!DIMM_PRESENT(dimm_dod[j]))
664 continue;
665
666 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
667 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
668 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
669 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
670
5566cb7c
MCC
671 /* DDR3 has 8 I/O banks */
672 size = (rows * cols * banks * ranks) >> (20 - 3);
673
f4742949 674 pvt->channel[i].dimms++;
854d3349 675
17cb7b0c
MCC
676 debugf0("\tdimm %d %d Mb offset: %x, "
677 "bank: %d, rank: %d, row: %#x, col: %#x\n",
678 j, size,
854d3349
MCC
679 RANKOFFSET(dimm_dod[j]),
680 banks, ranks, rows, cols);
681
e9144601 682 npages = MiB_TO_PAGES(size);
5566cb7c 683
2e5185f7 684 csr = &mci->csrows[csrow];
5566cb7c
MCC
685 csr->first_page = last_page + 1;
686 last_page += npages;
687 csr->last_page = last_page;
688 csr->nr_pages = npages;
689
854d3349 690 csr->page_mask = 0;
eb94fc40 691 csr->grain = 8;
2e5185f7 692 csr->csrow_idx = csrow;
eb94fc40
MCC
693 csr->nr_channels = 1;
694
695 csr->channels[0].chan_idx = i;
696 csr->channels[0].ce_count = 0;
854d3349 697
2e5185f7 698 pvt->csrow_map[i][j] = csrow;
b4e8f0b6 699
854d3349
MCC
700 switch (banks) {
701 case 4:
702 csr->dtype = DEV_X4;
703 break;
704 case 8:
705 csr->dtype = DEV_X8;
706 break;
707 case 16:
708 csr->dtype = DEV_X16;
709 break;
710 default:
711 csr->dtype = DEV_UNKNOWN;
712 }
713
714 csr->edac_mode = mode;
715 csr->mtype = mtype;
716
2e5185f7 717 csrow++;
854d3349 718 }
1c6fed80 719
854d3349
MCC
720 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
721 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
722 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
723 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
724 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
725 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
726 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
727 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 728 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 729 for (j = 0; j < 8; j++)
17cb7b0c 730 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
731 (value[j] >> 27) & 0x1,
732 (value[j] >> 24) & 0x7,
733 (value[j] && ((1 << 24) - 1)));
0b2b7b7e
MCC
734 }
735
a0c36a1f
MCC
736 return 0;
737}
738
194a40fe
MCC
739/****************************************************************************
740 Error insertion routines
741 ****************************************************************************/
742
743/* The i7core has independent error injection features per channel.
744 However, to have a simpler code, we don't allow enabling error injection
745 on more than one channel.
746 Also, since a change at an inject parameter will be applied only at enable,
747 we're disabling error injection on all write calls to the sysfs nodes that
748 controls the error code injection.
749 */
1288c18f 750static int disable_inject(const struct mem_ctl_info *mci)
194a40fe
MCC
751{
752 struct i7core_pvt *pvt = mci->pvt_info;
753
754 pvt->inject.enable = 0;
755
f4742949 756 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
757 return -ENODEV;
758
f4742949 759 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 760 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
761
762 return 0;
194a40fe
MCC
763}
764
765/*
766 * i7core inject inject.section
767 *
768 * accept and store error injection inject.section value
769 * bit 0 - refers to the lower 32-byte half cacheline
770 * bit 1 - refers to the upper 32-byte half cacheline
771 */
772static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
773 const char *data, size_t count)
774{
775 struct i7core_pvt *pvt = mci->pvt_info;
776 unsigned long value;
777 int rc;
778
779 if (pvt->inject.enable)
41fcb7fe 780 disable_inject(mci);
194a40fe
MCC
781
782 rc = strict_strtoul(data, 10, &value);
783 if ((rc < 0) || (value > 3))
2068def5 784 return -EIO;
194a40fe
MCC
785
786 pvt->inject.section = (u32) value;
787 return count;
788}
789
790static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
791 char *data)
792{
793 struct i7core_pvt *pvt = mci->pvt_info;
794 return sprintf(data, "0x%08x\n", pvt->inject.section);
795}
796
797/*
798 * i7core inject.type
799 *
800 * accept and store error injection inject.section value
801 * bit 0 - repeat enable - Enable error repetition
802 * bit 1 - inject ECC error
803 * bit 2 - inject parity error
804 */
805static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
806 const char *data, size_t count)
807{
808 struct i7core_pvt *pvt = mci->pvt_info;
809 unsigned long value;
810 int rc;
811
812 if (pvt->inject.enable)
41fcb7fe 813 disable_inject(mci);
194a40fe
MCC
814
815 rc = strict_strtoul(data, 10, &value);
816 if ((rc < 0) || (value > 7))
2068def5 817 return -EIO;
194a40fe
MCC
818
819 pvt->inject.type = (u32) value;
820 return count;
821}
822
823static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
824 char *data)
825{
826 struct i7core_pvt *pvt = mci->pvt_info;
827 return sprintf(data, "0x%08x\n", pvt->inject.type);
828}
829
830/*
831 * i7core_inject_inject.eccmask_store
832 *
833 * The type of error (UE/CE) will depend on the inject.eccmask value:
834 * Any bits set to a 1 will flip the corresponding ECC bit
835 * Correctable errors can be injected by flipping 1 bit or the bits within
836 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
837 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
838 * uncorrectable error to be injected.
839 */
840static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
841 const char *data, size_t count)
842{
843 struct i7core_pvt *pvt = mci->pvt_info;
844 unsigned long value;
845 int rc;
846
847 if (pvt->inject.enable)
41fcb7fe 848 disable_inject(mci);
194a40fe
MCC
849
850 rc = strict_strtoul(data, 10, &value);
851 if (rc < 0)
2068def5 852 return -EIO;
194a40fe
MCC
853
854 pvt->inject.eccmask = (u32) value;
855 return count;
856}
857
858static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
859 char *data)
860{
861 struct i7core_pvt *pvt = mci->pvt_info;
862 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
863}
864
865/*
866 * i7core_addrmatch
867 *
868 * The type of error (UE/CE) will depend on the inject.eccmask value:
869 * Any bits set to a 1 will flip the corresponding ECC bit
870 * Correctable errors can be injected by flipping 1 bit or the bits within
871 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
872 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
873 * uncorrectable error to be injected.
874 */
194a40fe 875
a5538e53
MCC
876#define DECLARE_ADDR_MATCH(param, limit) \
877static ssize_t i7core_inject_store_##param( \
878 struct mem_ctl_info *mci, \
879 const char *data, size_t count) \
880{ \
cc301b3a 881 struct i7core_pvt *pvt; \
a5538e53
MCC
882 long value; \
883 int rc; \
884 \
cc301b3a
MCC
885 debugf1("%s()\n", __func__); \
886 pvt = mci->pvt_info; \
887 \
a5538e53
MCC
888 if (pvt->inject.enable) \
889 disable_inject(mci); \
890 \
4f87fad1 891 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
a5538e53
MCC
892 value = -1; \
893 else { \
894 rc = strict_strtoul(data, 10, &value); \
895 if ((rc < 0) || (value >= limit)) \
896 return -EIO; \
897 } \
898 \
899 pvt->inject.param = value; \
900 \
901 return count; \
902} \
903 \
904static ssize_t i7core_inject_show_##param( \
905 struct mem_ctl_info *mci, \
906 char *data) \
907{ \
cc301b3a
MCC
908 struct i7core_pvt *pvt; \
909 \
910 pvt = mci->pvt_info; \
911 debugf1("%s() pvt=%p\n", __func__, pvt); \
a5538e53
MCC
912 if (pvt->inject.param < 0) \
913 return sprintf(data, "any\n"); \
914 else \
915 return sprintf(data, "%d\n", pvt->inject.param);\
194a40fe
MCC
916}
917
a5538e53
MCC
918#define ATTR_ADDR_MATCH(param) \
919 { \
920 .attr = { \
921 .name = #param, \
922 .mode = (S_IRUGO | S_IWUSR) \
923 }, \
924 .show = i7core_inject_show_##param, \
925 .store = i7core_inject_store_##param, \
926 }
194a40fe 927
a5538e53
MCC
928DECLARE_ADDR_MATCH(channel, 3);
929DECLARE_ADDR_MATCH(dimm, 3);
930DECLARE_ADDR_MATCH(rank, 4);
931DECLARE_ADDR_MATCH(bank, 32);
932DECLARE_ADDR_MATCH(page, 0x10000);
933DECLARE_ADDR_MATCH(col, 0x4000);
194a40fe 934
1288c18f 935static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
276b824c
MCC
936{
937 u32 read;
938 int count;
939
4157d9f5
MCC
940 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
941 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
942 where, val);
943
276b824c
MCC
944 for (count = 0; count < 10; count++) {
945 if (count)
b990538a 946 msleep(100);
276b824c
MCC
947 pci_write_config_dword(dev, where, val);
948 pci_read_config_dword(dev, where, &read);
949
950 if (read == val)
951 return 0;
952 }
953
4157d9f5
MCC
954 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
955 "write=%08x. Read=%08x\n",
956 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
957 where, val, read);
276b824c
MCC
958
959 return -EINVAL;
960}
961
194a40fe
MCC
962/*
963 * This routine prepares the Memory Controller for error injection.
964 * The error will be injected when some process tries to write to the
965 * memory that matches the given criteria.
966 * The criteria can be set in terms of a mask where dimm, rank, bank, page
967 * and col can be specified.
968 * A -1 value for any of the mask items will make the MCU to ignore
969 * that matching criteria for error injection.
970 *
971 * It should be noticed that the error will only happen after a write operation
972 * on a memory that matches the condition. if REPEAT_EN is not enabled at
973 * inject mask, then it will produce just one error. Otherwise, it will repeat
974 * until the injectmask would be cleaned.
975 *
976 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
977 * is reliable enough to check if the MC is using the
978 * three channels. However, this is not clear at the datasheet.
979 */
980static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
981 const char *data, size_t count)
982{
983 struct i7core_pvt *pvt = mci->pvt_info;
984 u32 injectmask;
985 u64 mask = 0;
986 int rc;
987 long enable;
988
f4742949 989 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
990 return 0;
991
194a40fe
MCC
992 rc = strict_strtoul(data, 10, &enable);
993 if ((rc < 0))
994 return 0;
995
996 if (enable) {
997 pvt->inject.enable = 1;
998 } else {
999 disable_inject(mci);
1000 return count;
1001 }
1002
1003 /* Sets pvt->inject.dimm mask */
1004 if (pvt->inject.dimm < 0)
486dd09f 1005 mask |= 1LL << 41;
194a40fe 1006 else {
f4742949 1007 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 1008 mask |= (pvt->inject.dimm & 0x3LL) << 35;
194a40fe 1009 else
486dd09f 1010 mask |= (pvt->inject.dimm & 0x1LL) << 36;
194a40fe
MCC
1011 }
1012
1013 /* Sets pvt->inject.rank mask */
1014 if (pvt->inject.rank < 0)
486dd09f 1015 mask |= 1LL << 40;
194a40fe 1016 else {
f4742949 1017 if (pvt->channel[pvt->inject.channel].dimms > 2)
486dd09f 1018 mask |= (pvt->inject.rank & 0x1LL) << 34;
194a40fe 1019 else
486dd09f 1020 mask |= (pvt->inject.rank & 0x3LL) << 34;
194a40fe
MCC
1021 }
1022
1023 /* Sets pvt->inject.bank mask */
1024 if (pvt->inject.bank < 0)
486dd09f 1025 mask |= 1LL << 39;
194a40fe 1026 else
486dd09f 1027 mask |= (pvt->inject.bank & 0x15LL) << 30;
194a40fe
MCC
1028
1029 /* Sets pvt->inject.page mask */
1030 if (pvt->inject.page < 0)
486dd09f 1031 mask |= 1LL << 38;
194a40fe 1032 else
486dd09f 1033 mask |= (pvt->inject.page & 0xffff) << 14;
194a40fe
MCC
1034
1035 /* Sets pvt->inject.column mask */
1036 if (pvt->inject.col < 0)
486dd09f 1037 mask |= 1LL << 37;
194a40fe 1038 else
486dd09f 1039 mask |= (pvt->inject.col & 0x3fff);
194a40fe 1040
276b824c
MCC
1041 /*
1042 * bit 0: REPEAT_EN
1043 * bits 1-2: MASK_HALF_CACHELINE
1044 * bit 3: INJECT_ECC
1045 * bit 4: INJECT_ADDR_PARITY
1046 */
1047
1048 injectmask = (pvt->inject.type & 1) |
1049 (pvt->inject.section & 0x3) << 1 |
1050 (pvt->inject.type & 0x6) << (3 - 1);
1051
1052 /* Unlock writes to registers - this register is write only */
f4742949 1053 pci_write_config_dword(pvt->pci_noncore,
67166af4 1054 MC_CFG_CONTROL, 0x2);
e9bd2e73 1055
f4742949 1056 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 1057 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 1058 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 1059 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 1060
f4742949 1061 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
1062 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1063
f4742949 1064 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1065 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 1066
194a40fe 1067 /*
276b824c
MCC
1068 * This is something undocumented, based on my tests
1069 * Without writing 8 to this register, errors aren't injected. Not sure
1070 * why.
194a40fe 1071 */
f4742949 1072 pci_write_config_dword(pvt->pci_noncore,
276b824c 1073 MC_CFG_CONTROL, 8);
194a40fe 1074
41fcb7fe
MCC
1075 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1076 " inject 0x%08x\n",
194a40fe
MCC
1077 mask, pvt->inject.eccmask, injectmask);
1078
7b029d03 1079
194a40fe
MCC
1080 return count;
1081}
1082
1083static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1084 char *data)
1085{
1086 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
1087 u32 injectmask;
1088
52a2e4fc
MCC
1089 if (!pvt->pci_ch[pvt->inject.channel][0])
1090 return 0;
1091
f4742949 1092 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1093 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1094
1095 debugf0("Inject error read: 0x%018x\n", injectmask);
1096
1097 if (injectmask & 0x0c)
1098 pvt->inject.enable = 1;
1099
194a40fe
MCC
1100 return sprintf(data, "%d\n", pvt->inject.enable);
1101}
1102
f338d736
MCC
1103#define DECLARE_COUNTER(param) \
1104static ssize_t i7core_show_counter_##param( \
1105 struct mem_ctl_info *mci, \
1106 char *data) \
1107{ \
1108 struct i7core_pvt *pvt = mci->pvt_info; \
1109 \
1110 debugf1("%s() \n", __func__); \
1111 if (!pvt->ce_count_available || (pvt->is_registered)) \
1112 return sprintf(data, "data unavailable\n"); \
1113 return sprintf(data, "%lu\n", \
1114 pvt->udimm_ce_count[param]); \
1115}
442305b1 1116
f338d736
MCC
1117#define ATTR_COUNTER(param) \
1118 { \
1119 .attr = { \
1120 .name = __stringify(udimm##param), \
1121 .mode = (S_IRUGO | S_IWUSR) \
1122 }, \
1123 .show = i7core_show_counter_##param \
d88b8507 1124 }
442305b1 1125
f338d736
MCC
1126DECLARE_COUNTER(0);
1127DECLARE_COUNTER(1);
1128DECLARE_COUNTER(2);
442305b1 1129
194a40fe
MCC
1130/*
1131 * Sysfs struct
1132 */
a5538e53 1133
1288c18f 1134static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
a5538e53
MCC
1135 ATTR_ADDR_MATCH(channel),
1136 ATTR_ADDR_MATCH(dimm),
1137 ATTR_ADDR_MATCH(rank),
1138 ATTR_ADDR_MATCH(bank),
1139 ATTR_ADDR_MATCH(page),
1140 ATTR_ADDR_MATCH(col),
1288c18f 1141 { } /* End of list */
a5538e53
MCC
1142};
1143
1288c18f 1144static const struct mcidev_sysfs_group i7core_inject_addrmatch = {
a5538e53
MCC
1145 .name = "inject_addrmatch",
1146 .mcidev_attr = i7core_addrmatch_attrs,
1147};
1148
1288c18f 1149static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
f338d736
MCC
1150 ATTR_COUNTER(0),
1151 ATTR_COUNTER(1),
1152 ATTR_COUNTER(2),
64aab720 1153 { .attr = { .name = NULL } }
f338d736
MCC
1154};
1155
1288c18f 1156static const struct mcidev_sysfs_group i7core_udimm_counters = {
f338d736
MCC
1157 .name = "all_channel_counts",
1158 .mcidev_attr = i7core_udimm_counters_attrs,
1159};
1160
1288c18f 1161static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs[] = {
194a40fe
MCC
1162 {
1163 .attr = {
1164 .name = "inject_section",
1165 .mode = (S_IRUGO | S_IWUSR)
1166 },
1167 .show = i7core_inject_section_show,
1168 .store = i7core_inject_section_store,
1169 }, {
1170 .attr = {
1171 .name = "inject_type",
1172 .mode = (S_IRUGO | S_IWUSR)
1173 },
1174 .show = i7core_inject_type_show,
1175 .store = i7core_inject_type_store,
1176 }, {
1177 .attr = {
1178 .name = "inject_eccmask",
1179 .mode = (S_IRUGO | S_IWUSR)
1180 },
1181 .show = i7core_inject_eccmask_show,
1182 .store = i7core_inject_eccmask_store,
1183 }, {
a5538e53 1184 .grp = &i7core_inject_addrmatch,
194a40fe
MCC
1185 }, {
1186 .attr = {
1187 .name = "inject_enable",
1188 .mode = (S_IRUGO | S_IWUSR)
1189 },
1190 .show = i7core_inject_enable_show,
1191 .store = i7core_inject_enable_store,
1192 },
1288c18f
MCC
1193 { } /* End of list */
1194};
1195
1196static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs[] = {
1197 {
1198 .attr = {
1199 .name = "inject_section",
1200 .mode = (S_IRUGO | S_IWUSR)
1201 },
1202 .show = i7core_inject_section_show,
1203 .store = i7core_inject_section_store,
1204 }, {
1205 .attr = {
1206 .name = "inject_type",
1207 .mode = (S_IRUGO | S_IWUSR)
1208 },
1209 .show = i7core_inject_type_show,
1210 .store = i7core_inject_type_store,
1211 }, {
1212 .attr = {
1213 .name = "inject_eccmask",
1214 .mode = (S_IRUGO | S_IWUSR)
1215 },
1216 .show = i7core_inject_eccmask_show,
1217 .store = i7core_inject_eccmask_store,
1218 }, {
1219 .grp = &i7core_inject_addrmatch,
1220 }, {
1221 .attr = {
1222 .name = "inject_enable",
1223 .mode = (S_IRUGO | S_IWUSR)
1224 },
1225 .show = i7core_inject_enable_show,
1226 .store = i7core_inject_enable_store,
1227 }, {
1228 .grp = &i7core_udimm_counters,
1229 },
1230 { } /* End of list */
194a40fe
MCC
1231};
1232
a0c36a1f
MCC
1233/****************************************************************************
1234 Device initialization routines: put/get, init/exit
1235 ****************************************************************************/
1236
1237/*
64c10f6e 1238 * i7core_put_all_devices 'put' all the devices that we have
a0c36a1f
MCC
1239 * reserved via 'get'
1240 */
13d6e9b6 1241static void i7core_put_devices(struct i7core_dev *i7core_dev)
a0c36a1f 1242{
13d6e9b6 1243 int i;
a0c36a1f 1244
22e6bcbd 1245 debugf0(__FILE__ ": %s()\n", __func__);
de06eeef 1246 for (i = 0; i < i7core_dev->n_devs; i++) {
22e6bcbd
MCC
1247 struct pci_dev *pdev = i7core_dev->pdev[i];
1248 if (!pdev)
1249 continue;
1250 debugf0("Removing dev %02x:%02x.%d\n",
1251 pdev->bus->number,
1252 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1253 pci_dev_put(pdev);
1254 }
13d6e9b6 1255}
66607706 1256
13d6e9b6
MCC
1257static void i7core_put_all_devices(void)
1258{
42538680 1259 struct i7core_dev *i7core_dev, *tmp;
13d6e9b6 1260
39300e71 1261 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
13d6e9b6 1262 i7core_put_devices(i7core_dev);
2aa9be44 1263 free_i7core_dev(i7core_dev);
39300e71 1264 }
a0c36a1f
MCC
1265}
1266
1288c18f 1267static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
bc2d7245
KM
1268{
1269 struct pci_dev *pdev = NULL;
1270 int i;
54a08ab1 1271
bc2d7245
KM
1272 /*
1273 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1274 * aren't announced by acpi. So, we need to use a legacy scan probing
1275 * to detect them
1276 */
bd9e19ca
VM
1277 while (table && table->descr) {
1278 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1279 if (unlikely(!pdev)) {
1280 for (i = 0; i < MAX_SOCKET_BUSES; i++)
1281 pcibios_scan_specific_bus(255-i);
1282 }
bda14289 1283 pci_dev_put(pdev);
bd9e19ca 1284 table++;
bc2d7245
KM
1285 }
1286}
1287
bda14289
MCC
1288static unsigned i7core_pci_lastbus(void)
1289{
1290 int last_bus = 0, bus;
1291 struct pci_bus *b = NULL;
1292
1293 while ((b = pci_find_next_bus(b)) != NULL) {
1294 bus = b->number;
1295 debugf0("Found bus %d\n", bus);
1296 if (bus > last_bus)
1297 last_bus = bus;
1298 }
1299
1300 debugf0("Last bus %d\n", last_bus);
1301
1302 return last_bus;
1303}
1304
a0c36a1f 1305/*
64c10f6e 1306 * i7core_get_all_devices Find and perform 'get' operation on the MCH's
a0c36a1f
MCC
1307 * device/functions we want to reference for this driver
1308 *
1309 * Need to 'get' device 16 func 1 and func 2
1310 */
b197cba0
HS
1311static int i7core_get_onedevice(struct pci_dev **prev,
1312 const struct pci_id_table *table,
1313 const unsigned devno,
1314 const unsigned last_bus)
a0c36a1f 1315{
66607706 1316 struct i7core_dev *i7core_dev;
b197cba0 1317 const struct pci_id_descr *dev_descr = &table->descr[devno];
66607706 1318
8f331907 1319 struct pci_dev *pdev = NULL;
67166af4
MCC
1320 u8 bus = 0;
1321 u8 socket = 0;
a0c36a1f 1322
c77720b9 1323 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
de06eeef 1324 dev_descr->dev_id, *prev);
c77720b9 1325
c77720b9
MCC
1326 if (!pdev) {
1327 if (*prev) {
1328 *prev = pdev;
1329 return 0;
d1fd4fb6
MCC
1330 }
1331
de06eeef 1332 if (dev_descr->optional)
c77720b9 1333 return 0;
310cbb72 1334
bd9e19ca
VM
1335 if (devno == 0)
1336 return -ENODEV;
1337
ab089374 1338 i7core_printk(KERN_INFO,
c77720b9 1339 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1340 dev_descr->dev, dev_descr->func,
1341 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
67166af4 1342
c77720b9
MCC
1343 /* End of list, leave */
1344 return -ENODEV;
1345 }
1346 bus = pdev->bus->number;
67166af4 1347
bda14289 1348 socket = last_bus - bus;
c77720b9 1349
66607706
MCC
1350 i7core_dev = get_i7core_dev(socket);
1351 if (!i7core_dev) {
848b2f7e 1352 i7core_dev = alloc_i7core_dev(socket, table);
2896637b
HS
1353 if (!i7core_dev) {
1354 pci_dev_put(pdev);
66607706 1355 return -ENOMEM;
2896637b 1356 }
c77720b9 1357 }
67166af4 1358
66607706 1359 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1360 i7core_printk(KERN_ERR,
1361 "Duplicated device for "
1362 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1363 bus, dev_descr->dev, dev_descr->func,
1364 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1365 pci_dev_put(pdev);
1366 return -ENODEV;
1367 }
67166af4 1368
66607706 1369 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1370
1371 /* Sanity check */
de06eeef
MCC
1372 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1373 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
c77720b9
MCC
1374 i7core_printk(KERN_ERR,
1375 "Device PCI ID %04x:%04x "
1376 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
de06eeef 1377 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
c77720b9 1378 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
de06eeef 1379 bus, dev_descr->dev, dev_descr->func);
c77720b9
MCC
1380 return -ENODEV;
1381 }
ef708b53 1382
c77720b9
MCC
1383 /* Be sure that the device is enabled */
1384 if (unlikely(pci_enable_device(pdev) < 0)) {
1385 i7core_printk(KERN_ERR,
1386 "Couldn't enable "
1387 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1388 bus, dev_descr->dev, dev_descr->func,
1389 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
c77720b9
MCC
1390 return -ENODEV;
1391 }
ef708b53 1392
d4c27795 1393 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
de06eeef
MCC
1394 socket, bus, dev_descr->dev,
1395 dev_descr->func,
1396 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
8f331907 1397
c77720b9 1398 *prev = pdev;
ef708b53 1399
c77720b9
MCC
1400 return 0;
1401}
a0c36a1f 1402
64c10f6e 1403static int i7core_get_all_devices(void)
c77720b9 1404{
64c10f6e 1405 int i, j, rc, last_bus;
c77720b9 1406 struct pci_dev *pdev = NULL;
64c10f6e 1407 const struct pci_id_table *table;
bd9e19ca 1408
bda14289
MCC
1409 last_bus = i7core_pci_lastbus();
1410
64c10f6e
HS
1411 for (j = 0; j < ARRAY_SIZE(pci_dev_table); j++) {
1412 table = &pci_dev_table[j];
bd9e19ca
VM
1413 for (i = 0; i < table->n_devs; i++) {
1414 pdev = NULL;
1415 do {
b197cba0 1416 rc = i7core_get_onedevice(&pdev, table, i,
bda14289 1417 last_bus);
bd9e19ca
VM
1418 if (rc < 0) {
1419 if (i == 0) {
1420 i = table->n_devs;
1421 break;
1422 }
1423 i7core_put_all_devices();
1424 return -ENODEV;
1425 }
1426 } while (pdev);
1427 }
c77720b9 1428 }
66607706 1429
ef708b53 1430 return 0;
ef708b53
MCC
1431}
1432
f4742949
MCC
1433static int mci_bind_devs(struct mem_ctl_info *mci,
1434 struct i7core_dev *i7core_dev)
ef708b53
MCC
1435{
1436 struct i7core_pvt *pvt = mci->pvt_info;
1437 struct pci_dev *pdev;
f4742949 1438 int i, func, slot;
ef708b53 1439
f4742949 1440 pvt->is_registered = 0;
de06eeef 1441 for (i = 0; i < i7core_dev->n_devs; i++) {
f4742949
MCC
1442 pdev = i7core_dev->pdev[i];
1443 if (!pdev)
66607706
MCC
1444 continue;
1445
f4742949
MCC
1446 func = PCI_FUNC(pdev->devfn);
1447 slot = PCI_SLOT(pdev->devfn);
1448 if (slot == 3) {
1449 if (unlikely(func > MAX_MCR_FUNC))
1450 goto error;
1451 pvt->pci_mcr[func] = pdev;
1452 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1453 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1454 goto error;
f4742949
MCC
1455 pvt->pci_ch[slot - 4][func] = pdev;
1456 } else if (!slot && !func)
1457 pvt->pci_noncore = pdev;
1458 else
1459 goto error;
ef708b53 1460
f4742949
MCC
1461 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1462 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1463 pdev, i7core_dev->socket);
14d2c083 1464
f4742949
MCC
1465 if (PCI_SLOT(pdev->devfn) == 3 &&
1466 PCI_FUNC(pdev->devfn) == 2)
1467 pvt->is_registered = 1;
a0c36a1f 1468 }
e9bd2e73 1469
a0c36a1f 1470 return 0;
ef708b53
MCC
1471
1472error:
1473 i7core_printk(KERN_ERR, "Device %d, function %d "
1474 "is out of the expected range\n",
1475 slot, func);
1476 return -EINVAL;
a0c36a1f
MCC
1477}
1478
442305b1
MCC
1479/****************************************************************************
1480 Error check routines
1481 ****************************************************************************/
f4742949 1482static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1288c18f
MCC
1483 const int chan,
1484 const int dimm,
1485 const int add)
b4e8f0b6
MCC
1486{
1487 char *msg;
1488 struct i7core_pvt *pvt = mci->pvt_info;
f4742949 1489 int row = pvt->csrow_map[chan][dimm], i;
b4e8f0b6
MCC
1490
1491 for (i = 0; i < add; i++) {
1492 msg = kasprintf(GFP_KERNEL, "Corrected error "
f4742949
MCC
1493 "(Socket=%d channel=%d dimm=%d)",
1494 pvt->i7core_dev->socket, chan, dimm);
b4e8f0b6
MCC
1495
1496 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1497 kfree (msg);
1498 }
1499}
1500
1501static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1288c18f
MCC
1502 const int chan,
1503 const int new0,
1504 const int new1,
1505 const int new2)
b4e8f0b6
MCC
1506{
1507 struct i7core_pvt *pvt = mci->pvt_info;
1508 int add0 = 0, add1 = 0, add2 = 0;
1509 /* Updates CE counters if it is not the first time here */
f4742949 1510 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1511 /* Updates CE counters */
1512
f4742949
MCC
1513 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1514 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1515 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1516
1517 if (add2 < 0)
1518 add2 += 0x7fff;
f4742949 1519 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1520
1521 if (add1 < 0)
1522 add1 += 0x7fff;
f4742949 1523 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1524
1525 if (add0 < 0)
1526 add0 += 0x7fff;
f4742949 1527 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1528 } else
f4742949 1529 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1530
1531 /* Store the new values */
f4742949
MCC
1532 pvt->rdimm_last_ce_count[chan][2] = new2;
1533 pvt->rdimm_last_ce_count[chan][1] = new1;
1534 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1535
1536 /*updated the edac core */
1537 if (add0 != 0)
f4742949 1538 i7core_rdimm_update_csrow(mci, chan, 0, add0);
b4e8f0b6 1539 if (add1 != 0)
f4742949 1540 i7core_rdimm_update_csrow(mci, chan, 1, add1);
b4e8f0b6 1541 if (add2 != 0)
f4742949 1542 i7core_rdimm_update_csrow(mci, chan, 2, add2);
b4e8f0b6
MCC
1543
1544}
1545
f4742949 1546static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1547{
1548 struct i7core_pvt *pvt = mci->pvt_info;
1549 u32 rcv[3][2];
1550 int i, new0, new1, new2;
1551
1552 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1553 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1554 &rcv[0][0]);
f4742949 1555 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1556 &rcv[0][1]);
f4742949 1557 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1558 &rcv[1][0]);
f4742949 1559 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1560 &rcv[1][1]);
f4742949 1561 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1562 &rcv[2][0]);
f4742949 1563 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1564 &rcv[2][1]);
1565 for (i = 0 ; i < 3; i++) {
1566 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1567 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1568 /*if the channel has 3 dimms*/
f4742949 1569 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1570 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1571 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1572 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1573 } else {
1574 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1575 DIMM_BOT_COR_ERR(rcv[i][0]);
1576 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1577 DIMM_BOT_COR_ERR(rcv[i][1]);
1578 new2 = 0;
1579 }
1580
f4742949 1581 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1582 }
1583}
442305b1
MCC
1584
1585/* This function is based on the device 3 function 4 registers as described on:
1586 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1587 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1588 * also available at:
1589 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1590 */
f4742949 1591static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1592{
1593 struct i7core_pvt *pvt = mci->pvt_info;
1594 u32 rcv1, rcv0;
1595 int new0, new1, new2;
1596
f4742949 1597 if (!pvt->pci_mcr[4]) {
b990538a 1598 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1599 return;
1600 }
1601
b4e8f0b6 1602 /* Corrected test errors */
f4742949
MCC
1603 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1604 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1605
1606 /* Store the new values */
1607 new2 = DIMM2_COR_ERR(rcv1);
1608 new1 = DIMM1_COR_ERR(rcv0);
1609 new0 = DIMM0_COR_ERR(rcv0);
1610
442305b1 1611 /* Updates CE counters if it is not the first time here */
f4742949 1612 if (pvt->ce_count_available) {
442305b1
MCC
1613 /* Updates CE counters */
1614 int add0, add1, add2;
1615
f4742949
MCC
1616 add2 = new2 - pvt->udimm_last_ce_count[2];
1617 add1 = new1 - pvt->udimm_last_ce_count[1];
1618 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1619
1620 if (add2 < 0)
1621 add2 += 0x7fff;
f4742949 1622 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1623
1624 if (add1 < 0)
1625 add1 += 0x7fff;
f4742949 1626 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1627
1628 if (add0 < 0)
1629 add0 += 0x7fff;
f4742949 1630 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1631
1632 if (add0 | add1 | add2)
1633 i7core_printk(KERN_ERR, "New Corrected error(s): "
1634 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1635 add0, add1, add2);
442305b1 1636 } else
f4742949 1637 pvt->ce_count_available = 1;
442305b1
MCC
1638
1639 /* Store the new values */
f4742949
MCC
1640 pvt->udimm_last_ce_count[2] = new2;
1641 pvt->udimm_last_ce_count[1] = new1;
1642 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1643}
1644
8a2f118e
MCC
1645/*
1646 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1647 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1648 * Nehalem are defined as family 0x06, model 0x1a
1649 *
1650 * The MCA registers used here are the following ones:
8a2f118e 1651 * struct mce field MCA Register
f237fcf2
MCC
1652 * m->status MSR_IA32_MC8_STATUS
1653 * m->addr MSR_IA32_MC8_ADDR
1654 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1655 * In the case of Nehalem, the error information is masked at .status and .misc
1656 * fields
1657 */
d5381642 1658static void i7core_mce_output_error(struct mem_ctl_info *mci,
1288c18f 1659 const struct mce *m)
d5381642 1660{
b4e8f0b6 1661 struct i7core_pvt *pvt = mci->pvt_info;
a639539f 1662 char *type, *optype, *err, *msg;
8a2f118e 1663 unsigned long error = m->status & 0x1ff0000l;
a639539f 1664 u32 optypenum = (m->status >> 4) & 0x07;
8a2f118e
MCC
1665 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1666 u32 dimm = (m->misc >> 16) & 0x3;
1667 u32 channel = (m->misc >> 18) & 0x3;
1668 u32 syndrome = m->misc >> 32;
1669 u32 errnum = find_first_bit(&error, 32);
b4e8f0b6 1670 int csrow;
8a2f118e 1671
c5d34528
MCC
1672 if (m->mcgstatus & 1)
1673 type = "FATAL";
1674 else
1675 type = "NON_FATAL";
1676
a639539f 1677 switch (optypenum) {
b990538a
MCC
1678 case 0:
1679 optype = "generic undef request";
1680 break;
1681 case 1:
1682 optype = "read error";
1683 break;
1684 case 2:
1685 optype = "write error";
1686 break;
1687 case 3:
1688 optype = "addr/cmd error";
1689 break;
1690 case 4:
1691 optype = "scrubbing error";
1692 break;
1693 default:
1694 optype = "reserved";
1695 break;
a639539f
MCC
1696 }
1697
8a2f118e
MCC
1698 switch (errnum) {
1699 case 16:
1700 err = "read ECC error";
1701 break;
1702 case 17:
1703 err = "RAS ECC error";
1704 break;
1705 case 18:
1706 err = "write parity error";
1707 break;
1708 case 19:
1709 err = "redundacy loss";
1710 break;
1711 case 20:
1712 err = "reserved";
1713 break;
1714 case 21:
1715 err = "memory range error";
1716 break;
1717 case 22:
1718 err = "RTID out of range";
1719 break;
1720 case 23:
1721 err = "address parity error";
1722 break;
1723 case 24:
1724 err = "byte enable parity error";
1725 break;
1726 default:
1727 err = "unknown";
d5381642 1728 }
d5381642 1729
f237fcf2 1730 /* FIXME: should convert addr into bank and rank information */
8a2f118e 1731 msg = kasprintf(GFP_ATOMIC,
f4742949 1732 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
a639539f 1733 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
f4742949 1734 type, (long long) m->addr, m->cpu, dimm, channel,
a639539f
MCC
1735 syndrome, core_err_cnt, (long long)m->status,
1736 (long long)m->misc, optype, err);
8a2f118e
MCC
1737
1738 debugf0("%s", msg);
d5381642 1739
f4742949 1740 csrow = pvt->csrow_map[channel][dimm];
b4e8f0b6 1741
d5381642 1742 /* Call the helper to output message */
b4e8f0b6
MCC
1743 if (m->mcgstatus & 1)
1744 edac_mc_handle_fbd_ue(mci, csrow, 0,
1745 0 /* FIXME: should be channel here */, msg);
f4742949 1746 else if (!pvt->is_registered)
b4e8f0b6
MCC
1747 edac_mc_handle_fbd_ce(mci, csrow,
1748 0 /* FIXME: should be channel here */, msg);
8a2f118e
MCC
1749
1750 kfree(msg);
d5381642
MCC
1751}
1752
87d1d272
MCC
1753/*
1754 * i7core_check_error Retrieve and process errors reported by the
1755 * hardware. Called by the Core module.
1756 */
1757static void i7core_check_error(struct mem_ctl_info *mci)
1758{
d5381642
MCC
1759 struct i7core_pvt *pvt = mci->pvt_info;
1760 int i;
1761 unsigned count = 0;
ca9c90ba 1762 struct mce *m;
d5381642 1763
ca9c90ba
MCC
1764 /*
1765 * MCE first step: Copy all mce errors into a temporary buffer
1766 * We use a double buffering here, to reduce the risk of
1767 * loosing an error.
1768 */
1769 smp_rmb();
321ece4d
MCC
1770 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1771 % MCE_LOG_LEN;
ca9c90ba 1772 if (!count)
8a311e17 1773 goto check_ce_error;
f4742949 1774
ca9c90ba 1775 m = pvt->mce_outentry;
321ece4d
MCC
1776 if (pvt->mce_in + count > MCE_LOG_LEN) {
1777 unsigned l = MCE_LOG_LEN - pvt->mce_in;
f4742949 1778
ca9c90ba
MCC
1779 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1780 smp_wmb();
1781 pvt->mce_in = 0;
1782 count -= l;
1783 m += l;
1784 }
1785 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1786 smp_wmb();
1787 pvt->mce_in += count;
1788
1789 smp_rmb();
1790 if (pvt->mce_overrun) {
1791 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1792 pvt->mce_overrun);
1793 smp_wmb();
1794 pvt->mce_overrun = 0;
1795 }
d5381642 1796
ca9c90ba
MCC
1797 /*
1798 * MCE second step: parse errors and display
1799 */
d5381642 1800 for (i = 0; i < count; i++)
ca9c90ba 1801 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
d5381642 1802
ca9c90ba
MCC
1803 /*
1804 * Now, let's increment CE error counts
1805 */
8a311e17 1806check_ce_error:
f4742949
MCC
1807 if (!pvt->is_registered)
1808 i7core_udimm_check_mc_ecc_err(mci);
1809 else
1810 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1811}
1812
d5381642
MCC
1813/*
1814 * i7core_mce_check_error Replicates mcelog routine to get errors
1815 * This routine simply queues mcelog errors, and
1816 * return. The error itself should be handled later
1817 * by i7core_check_error.
6e103be1
MCC
1818 * WARNING: As this routine should be called at NMI time, extra care should
1819 * be taken to avoid deadlocks, and to be as fast as possible.
d5381642
MCC
1820 */
1821static int i7core_mce_check_error(void *priv, struct mce *mce)
1822{
c5d34528
MCC
1823 struct mem_ctl_info *mci = priv;
1824 struct i7core_pvt *pvt = mci->pvt_info;
d5381642 1825
8a2f118e
MCC
1826 /*
1827 * Just let mcelog handle it if the error is
1828 * outside the memory controller
1829 */
1830 if (((mce->status & 0xffff) >> 7) != 1)
1831 return 0;
1832
f237fcf2
MCC
1833 /* Bank 8 registers are the only ones that we know how to handle */
1834 if (mce->bank != 8)
1835 return 0;
1836
3b918c12 1837#ifdef CONFIG_SMP
f4742949 1838 /* Only handle if it is the right mc controller */
6e103be1 1839 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
f4742949 1840 return 0;
3b918c12 1841#endif
f4742949 1842
ca9c90ba 1843 smp_rmb();
321ece4d 1844 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
ca9c90ba
MCC
1845 smp_wmb();
1846 pvt->mce_overrun++;
1847 return 0;
d5381642 1848 }
6e103be1
MCC
1849
1850 /* Copy memory error at the ringbuffer */
1851 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
ca9c90ba 1852 smp_wmb();
321ece4d 1853 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
d5381642 1854
c5d34528
MCC
1855 /* Handle fatal errors immediately */
1856 if (mce->mcgstatus & 1)
1857 i7core_check_error(mci);
1858
d5381642 1859 /* Advice mcelog that the error were handled */
8a2f118e 1860 return 1;
d5381642
MCC
1861}
1862
a3aa0a4a
HS
1863static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
1864{
1865 pvt->i7core_pci = edac_pci_create_generic_ctl(
1866 &pvt->i7core_dev->pdev[0]->dev,
1867 EDAC_MOD_STR);
1868 if (unlikely(!pvt->i7core_pci))
1869 pr_warn("Unable to setup PCI error report via EDAC\n");
1870}
1871
1872static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
1873{
1874 if (likely(pvt->i7core_pci))
1875 edac_pci_release_generic_ctl(pvt->i7core_pci);
1876 else
1877 i7core_printk(KERN_ERR,
1878 "Couldn't find mem_ctl_info for socket %d\n",
1879 pvt->i7core_dev->socket);
1880 pvt->i7core_pci = NULL;
1881}
1882
1c6edbbe
HS
1883static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
1884{
1885 struct mem_ctl_info *mci = i7core_dev->mci;
1886 struct i7core_pvt *pvt;
1887
1888 if (unlikely(!mci || !mci->pvt_info)) {
1889 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
1890 __func__, &i7core_dev->pdev[0]->dev);
1891
1892 i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
1893 return;
1894 }
1895
1896 pvt = mci->pvt_info;
1897
1898 debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1899 __func__, mci, &i7core_dev->pdev[0]->dev);
1900
1901 /* Disable MCE NMI handler */
1902 edac_mce_unregister(&pvt->edac_mce);
1903
1904 /* Disable EDAC polling */
1905 i7core_pci_ctl_release(pvt);
1906
1907 /* Remove MC sysfs nodes */
1908 edac_mc_del_mc(mci->dev);
1909
1910 debugf1("%s: free mci struct\n", mci->ctl_name);
1911 kfree(mci->ctl_name);
1912 edac_mc_free(mci);
1913 i7core_dev->mci = NULL;
1914}
1915
aace4283 1916static int i7core_register_mci(struct i7core_dev *i7core_dev)
a0c36a1f
MCC
1917{
1918 struct mem_ctl_info *mci;
1919 struct i7core_pvt *pvt;
aace4283
HS
1920 int rc, channels, csrows;
1921
1922 /* Check the number of active and not disabled channels */
1923 rc = i7core_get_active_channels(i7core_dev->socket, &channels, &csrows);
1924 if (unlikely(rc < 0))
1925 return rc;
a0c36a1f 1926
a0c36a1f 1927 /* allocate a new MC control structure */
aace4283 1928 mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, i7core_dev->socket);
f4742949
MCC
1929 if (unlikely(!mci))
1930 return -ENOMEM;
a0c36a1f 1931
3cfd0146
MCC
1932 debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1933 __func__, mci, &i7core_dev->pdev[0]->dev);
a0c36a1f 1934
a0c36a1f 1935 pvt = mci->pvt_info;
ef708b53 1936 memset(pvt, 0, sizeof(*pvt));
67166af4 1937
6d37d240
MCC
1938 /* Associates i7core_dev and mci for future usage */
1939 pvt->i7core_dev = i7core_dev;
1940 i7core_dev->mci = mci;
1941
41fcb7fe
MCC
1942 /*
1943 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1944 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1945 * memory channels
1946 */
1947 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
1948 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1949 mci->edac_cap = EDAC_FLAG_NONE;
1950 mci->mod_name = "i7core_edac.c";
1951 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
1952 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1953 i7core_dev->socket);
1954 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 1955 mci->ctl_page_to_phys = NULL;
1288c18f 1956
ef708b53 1957 /* Store pci devices at mci for faster access */
f4742949 1958 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 1959 if (unlikely(rc < 0))
628c5ddf 1960 goto fail0;
ef708b53 1961
5939813b
HS
1962 if (pvt->is_registered)
1963 mci->mc_driver_sysfs_attributes = i7core_sysfs_rdimm_attrs;
1964 else
1965 mci->mc_driver_sysfs_attributes = i7core_sysfs_udimm_attrs;
1966
ef708b53 1967 /* Get dimm basic config */
2e5185f7 1968 get_dimm_config(mci);
5939813b
HS
1969 /* record ptr to the generic device */
1970 mci->dev = &i7core_dev->pdev[0]->dev;
1971 /* Set the function pointer to an actual operation function */
1972 mci->edac_check = i7core_check_error;
ef708b53 1973
a0c36a1f 1974 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 1975 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
1976 debugf0("MC: " __FILE__
1977 ": %s(): failed edac_mc_add_mc()\n", __func__);
1978 /* FIXME: perhaps some code should go here that disables error
1979 * reporting if we just enabled it
1980 */
b7c76151
MCC
1981
1982 rc = -EINVAL;
628c5ddf 1983 goto fail0;
a0c36a1f
MCC
1984 }
1985
194a40fe 1986 /* Default error mask is any memory */
ef708b53 1987 pvt->inject.channel = 0;
194a40fe
MCC
1988 pvt->inject.dimm = -1;
1989 pvt->inject.rank = -1;
1990 pvt->inject.bank = -1;
1991 pvt->inject.page = -1;
1992 pvt->inject.col = -1;
1993
a3aa0a4a
HS
1994 /* allocating generic PCI control info */
1995 i7core_pci_ctl_create(pvt);
1996
d5381642 1997 /* Registers on edac_mce in order to receive memory errors */
c5d34528 1998 pvt->edac_mce.priv = mci;
d5381642 1999 pvt->edac_mce.check_error = i7core_mce_check_error;
d5381642 2000 rc = edac_mce_register(&pvt->edac_mce);
b990538a 2001 if (unlikely(rc < 0)) {
d5381642
MCC
2002 debugf0("MC: " __FILE__
2003 ": %s(): failed edac_mce_register()\n", __func__);
628c5ddf 2004 goto fail1;
f4742949
MCC
2005 }
2006
628c5ddf
HS
2007 return 0;
2008
2009fail1:
2010 i7core_pci_ctl_release(pvt);
2011 edac_mc_del_mc(mci->dev);
2012fail0:
2013 kfree(mci->ctl_name);
2014 edac_mc_free(mci);
1c6edbbe 2015 i7core_dev->mci = NULL;
f4742949
MCC
2016 return rc;
2017}
2018
2019/*
2020 * i7core_probe Probe for ONE instance of device to see if it is
2021 * present.
2022 * return:
2023 * 0 for FOUND a device
2024 * < 0 for error code
2025 */
2d95d815 2026
f4742949
MCC
2027static int __devinit i7core_probe(struct pci_dev *pdev,
2028 const struct pci_device_id *id)
2029{
f4742949
MCC
2030 int rc;
2031 struct i7core_dev *i7core_dev;
2032
2d95d815
MCC
2033 /* get the pci devices we want to reserve for our use */
2034 mutex_lock(&i7core_edac_lock);
2035
f4742949 2036 /*
d4c27795 2037 * All memory controllers are allocated at the first pass.
f4742949 2038 */
2d95d815
MCC
2039 if (unlikely(probed >= 1)) {
2040 mutex_unlock(&i7core_edac_lock);
f4742949 2041 return -EINVAL;
2d95d815
MCC
2042 }
2043 probed++;
de06eeef 2044
64c10f6e 2045 rc = i7core_get_all_devices();
f4742949
MCC
2046 if (unlikely(rc < 0))
2047 goto fail0;
2048
2049 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
aace4283 2050 rc = i7core_register_mci(i7core_dev);
d4c27795
MCC
2051 if (unlikely(rc < 0))
2052 goto fail1;
d5381642
MCC
2053 }
2054
ef708b53 2055 i7core_printk(KERN_INFO, "Driver loaded.\n");
8f331907 2056
66607706 2057 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
2058 return 0;
2059
66607706 2060fail1:
88ef5ea9
MCC
2061 list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2062 i7core_unregister_mci(i7core_dev);
2063
13d6e9b6 2064 i7core_put_all_devices();
66607706
MCC
2065fail0:
2066 mutex_unlock(&i7core_edac_lock);
b7c76151 2067 return rc;
a0c36a1f
MCC
2068}
2069
2070/*
2071 * i7core_remove destructor for one instance of device
2072 *
2073 */
2074static void __devexit i7core_remove(struct pci_dev *pdev)
2075{
64c10f6e 2076 struct i7core_dev *i7core_dev;
a0c36a1f
MCC
2077
2078 debugf0(__FILE__ ": %s()\n", __func__);
2079
22e6bcbd
MCC
2080 /*
2081 * we have a trouble here: pdev value for removal will be wrong, since
2082 * it will point to the X58 register used to detect that the machine
2083 * is a Nehalem or upper design. However, due to the way several PCI
2084 * devices are grouped together to provide MC functionality, we need
2085 * to use a different method for releasing the devices
2086 */
87d1d272 2087
66607706 2088 mutex_lock(&i7core_edac_lock);
71fe0170
HS
2089
2090 if (unlikely(!probed)) {
2091 mutex_unlock(&i7core_edac_lock);
2092 return;
2093 }
2094
88ef5ea9
MCC
2095 list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2096 i7core_unregister_mci(i7core_dev);
64c10f6e
HS
2097
2098 /* Release PCI resources */
2099 i7core_put_all_devices();
2100
2d95d815
MCC
2101 probed--;
2102
66607706 2103 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
2104}
2105
a0c36a1f
MCC
2106MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2107
2108/*
2109 * i7core_driver pci_driver structure for this module
2110 *
2111 */
2112static struct pci_driver i7core_driver = {
2113 .name = "i7core_edac",
2114 .probe = i7core_probe,
2115 .remove = __devexit_p(i7core_remove),
2116 .id_table = i7core_pci_tbl,
2117};
2118
2119/*
2120 * i7core_init Module entry function
2121 * Try to initialize this module for its devices
2122 */
2123static int __init i7core_init(void)
2124{
2125 int pci_rc;
2126
2127 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2128
2129 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2130 opstate_init();
2131
54a08ab1
MCC
2132 if (use_pci_fixup)
2133 i7core_xeon_pci_fixup(pci_dev_table);
bc2d7245 2134
a0c36a1f
MCC
2135 pci_rc = pci_register_driver(&i7core_driver);
2136
3ef288a9
MCC
2137 if (pci_rc >= 0)
2138 return 0;
2139
2140 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2141 pci_rc);
2142
2143 return pci_rc;
a0c36a1f
MCC
2144}
2145
2146/*
2147 * i7core_exit() Module exit function
2148 * Unregister the driver
2149 */
2150static void __exit i7core_exit(void)
2151{
2152 debugf2("MC: " __FILE__ ": %s()\n", __func__);
2153 pci_unregister_driver(&i7core_driver);
2154}
2155
2156module_init(i7core_init);
2157module_exit(i7core_exit);
2158
2159MODULE_LICENSE("GPL");
2160MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2161MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2162MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2163 I7CORE_REVISION);
2164
2165module_param(edac_op_state, int, 0444);
2166MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
This page took 0.176567 seconds and 5 git commands to generate.