i7core_edac: sanity check: print a warning if a mcelog is ignored
[deliverable/linux.git] / drivers / edac / i7core_edac.c
CommitLineData
a0c36a1f
MCC
1/* Intel 7 core Memory Controller kernel module (Nehalem)
2 *
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
5 *
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
8 *
9 * Red Hat Inc. http://www.redhat.com
10 *
11 * Forked and adapted from the i5400_edac driver
12 *
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
19 * also available at:
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
21 */
22
a0c36a1f
MCC
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/pci.h>
26#include <linux/pci_ids.h>
27#include <linux/slab.h>
28#include <linux/edac.h>
29#include <linux/mmzone.h>
d5381642
MCC
30#include <linux/edac_mce.h>
31#include <linux/spinlock.h>
f4742949 32#include <linux/smp.h>
14d2c083 33#include <asm/processor.h>
a0c36a1f
MCC
34
35#include "edac_core.h"
36
f4742949
MCC
37/*
38 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
39 * registers start at bus 255, and are not reported by BIOS.
40 * We currently find devices with only 2 sockets. In order to support more QPI
41 * Quick Path Interconnect, just increment this number.
42 */
43#define MAX_SOCKET_BUSES 2
44
45
a0c36a1f
MCC
46/*
47 * Alter this version for the module when modifications are made
48 */
49#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
50#define EDAC_MOD_STR "i7core_edac"
51
a0c36a1f
MCC
52/*
53 * Debug macros
54 */
55#define i7core_printk(level, fmt, arg...) \
56 edac_printk(level, "i7core", fmt, ##arg)
57
58#define i7core_mc_printk(mci, level, fmt, arg...) \
59 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
60
61/*
62 * i7core Memory Controller Registers
63 */
64
e9bd2e73
MCC
65 /* OFFSETS for Device 0 Function 0 */
66
67#define MC_CFG_CONTROL 0x90
68
a0c36a1f
MCC
69 /* OFFSETS for Device 3 Function 0 */
70
71#define MC_CONTROL 0x48
72#define MC_STATUS 0x4c
73#define MC_MAX_DOD 0x64
74
442305b1
MCC
75/*
76 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
77 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
78 */
79
80#define MC_TEST_ERR_RCV1 0x60
81 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
82
83#define MC_TEST_ERR_RCV0 0x64
84 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
85 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
86
b4e8f0b6
MCC
87/* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
88#define MC_COR_ECC_CNT_0 0x80
89#define MC_COR_ECC_CNT_1 0x84
90#define MC_COR_ECC_CNT_2 0x88
91#define MC_COR_ECC_CNT_3 0x8c
92#define MC_COR_ECC_CNT_4 0x90
93#define MC_COR_ECC_CNT_5 0x94
94
95#define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
96#define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
97
98
a0c36a1f
MCC
99 /* OFFSETS for Devices 4,5 and 6 Function 0 */
100
0b2b7b7e
MCC
101#define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
102 #define THREE_DIMMS_PRESENT (1 << 24)
103 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
104 #define QUAD_RANK_PRESENT (1 << 22)
105 #define REGISTERED_DIMM (1 << 15)
106
f122a892
MCC
107#define MC_CHANNEL_MAPPER 0x60
108 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
109 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
110
0b2b7b7e
MCC
111#define MC_CHANNEL_RANK_PRESENT 0x7c
112 #define RANK_PRESENT_MASK 0xffff
113
a0c36a1f 114#define MC_CHANNEL_ADDR_MATCH 0xf0
194a40fe
MCC
115#define MC_CHANNEL_ERROR_MASK 0xf8
116#define MC_CHANNEL_ERROR_INJECT 0xfc
117 #define INJECT_ADDR_PARITY 0x10
118 #define INJECT_ECC 0x08
119 #define MASK_CACHELINE 0x06
120 #define MASK_FULL_CACHELINE 0x06
121 #define MASK_MSB32_CACHELINE 0x04
122 #define MASK_LSB32_CACHELINE 0x02
123 #define NO_MASK_CACHELINE 0x00
124 #define REPEAT_EN 0x01
a0c36a1f 125
0b2b7b7e 126 /* OFFSETS for Devices 4,5 and 6 Function 1 */
b990538a 127
0b2b7b7e
MCC
128#define MC_DOD_CH_DIMM0 0x48
129#define MC_DOD_CH_DIMM1 0x4c
130#define MC_DOD_CH_DIMM2 0x50
131 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
132 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
133 #define DIMM_PRESENT_MASK (1 << 9)
134 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
854d3349
MCC
135 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
136 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
137 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
138 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
41fcb7fe 139 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
5566cb7c 140 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
854d3349
MCC
141 #define MC_DOD_NUMCOL_MASK 3
142 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
0b2b7b7e 143
f122a892
MCC
144#define MC_RANK_PRESENT 0x7c
145
0b2b7b7e
MCC
146#define MC_SAG_CH_0 0x80
147#define MC_SAG_CH_1 0x84
148#define MC_SAG_CH_2 0x88
149#define MC_SAG_CH_3 0x8c
150#define MC_SAG_CH_4 0x90
151#define MC_SAG_CH_5 0x94
152#define MC_SAG_CH_6 0x98
153#define MC_SAG_CH_7 0x9c
154
155#define MC_RIR_LIMIT_CH_0 0x40
156#define MC_RIR_LIMIT_CH_1 0x44
157#define MC_RIR_LIMIT_CH_2 0x48
158#define MC_RIR_LIMIT_CH_3 0x4C
159#define MC_RIR_LIMIT_CH_4 0x50
160#define MC_RIR_LIMIT_CH_5 0x54
161#define MC_RIR_LIMIT_CH_6 0x58
162#define MC_RIR_LIMIT_CH_7 0x5C
163#define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
164
165#define MC_RIR_WAY_CH 0x80
166 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
167 #define MC_RIR_WAY_RANK_MASK 0x7
168
a0c36a1f
MCC
169/*
170 * i7core structs
171 */
172
173#define NUM_CHANS 3
442305b1
MCC
174#define MAX_DIMMS 3 /* Max DIMMS per channel */
175#define MAX_MCR_FUNC 4
176#define MAX_CHAN_FUNC 3
a0c36a1f
MCC
177
178struct i7core_info {
179 u32 mc_control;
180 u32 mc_status;
181 u32 max_dod;
f122a892 182 u32 ch_map;
a0c36a1f
MCC
183};
184
194a40fe
MCC
185
186struct i7core_inject {
187 int enable;
188
189 u32 section;
190 u32 type;
191 u32 eccmask;
192
193 /* Error address mask */
194 int channel, dimm, rank, bank, page, col;
195};
196
0b2b7b7e 197struct i7core_channel {
442305b1
MCC
198 u32 ranks;
199 u32 dimms;
0b2b7b7e
MCC
200};
201
8f331907 202struct pci_id_descr {
66607706
MCC
203 int dev;
204 int func;
205 int dev_id;
8f331907
MCC
206};
207
f4742949
MCC
208struct i7core_dev {
209 struct list_head list;
210 u8 socket;
211 struct pci_dev **pdev;
212 struct mem_ctl_info *mci;
213};
214
a0c36a1f 215struct i7core_pvt {
f4742949
MCC
216 struct pci_dev *pci_noncore;
217 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
218 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
219
220 struct i7core_dev *i7core_dev;
67166af4 221
a0c36a1f 222 struct i7core_info info;
194a40fe 223 struct i7core_inject inject;
f4742949 224 struct i7core_channel channel[NUM_CHANS];
67166af4 225
f4742949 226 int channels; /* Number of active channels */
442305b1 227
f4742949
MCC
228 int ce_count_available;
229 int csrow_map[NUM_CHANS][MAX_DIMMS];
b4e8f0b6
MCC
230
231 /* ECC corrected errors counts per udimm */
f4742949
MCC
232 unsigned long udimm_ce_count[MAX_DIMMS];
233 int udimm_last_ce_count[MAX_DIMMS];
b4e8f0b6 234 /* ECC corrected errors counts per rdimm */
f4742949
MCC
235 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
236 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
442305b1 237
f4742949 238 unsigned int is_registered;
14d2c083 239
d5381642
MCC
240 /* mcelog glue */
241 struct edac_mce edac_mce;
242 struct mce mce_entry[MCE_LOG_LEN];
243 unsigned mce_count;
244 spinlock_t mce_lock;
a0c36a1f
MCC
245};
246
66607706
MCC
247/* Static vars */
248static LIST_HEAD(i7core_edac_list);
249static DEFINE_MUTEX(i7core_edac_lock);
f4742949 250static u8 max_num_sockets;
a0c36a1f 251
8f331907
MCC
252#define PCI_DESCR(device, function, device_id) \
253 .dev = (device), \
254 .func = (function), \
255 .dev_id = (device_id)
256
66607706 257struct pci_id_descr pci_dev_descr[] = {
8f331907
MCC
258 /* Memory controller */
259 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
260 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
b990538a 261 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS) }, /* if RDIMM */
8f331907
MCC
262 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
263
264 /* Channel 0 */
265 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
266 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
267 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
268 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
269
270 /* Channel 1 */
271 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
272 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
273 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
274 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
275
276 /* Channel 2 */
277 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
278 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
279 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
280 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
310cbb72
MCC
281
282 /* Generic Non-core registers */
283 /*
284 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
285 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
286 * the probing code needs to test for the other address in case of
287 * failure of this one
288 */
289 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NOCORE) },
290
a0c36a1f 291};
66607706 292#define N_DEVS ARRAY_SIZE(pci_dev_descr)
8f331907
MCC
293
294/*
295 * pci_device_id table for which devices we are looking for
8f331907
MCC
296 */
297static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
d1fd4fb6 298 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
8f331907
MCC
299 {0,} /* 0 terminated list. */
300};
301
a0c36a1f
MCC
302static struct edac_pci_ctl_info *i7core_pci;
303
304/****************************************************************************
305 Anciliary status routines
306 ****************************************************************************/
307
308 /* MC_CONTROL bits */
ef708b53
MCC
309#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
310#define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
a0c36a1f
MCC
311
312 /* MC_STATUS bits */
61053fde 313#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
ef708b53 314#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
a0c36a1f
MCC
315
316 /* MC_MAX_DOD read functions */
854d3349 317static inline int numdimms(u32 dimms)
a0c36a1f 318{
854d3349 319 return (dimms & 0x3) + 1;
a0c36a1f
MCC
320}
321
854d3349 322static inline int numrank(u32 rank)
a0c36a1f
MCC
323{
324 static int ranks[4] = { 1, 2, 4, -EINVAL };
325
854d3349 326 return ranks[rank & 0x3];
a0c36a1f
MCC
327}
328
854d3349 329static inline int numbank(u32 bank)
a0c36a1f
MCC
330{
331 static int banks[4] = { 4, 8, 16, -EINVAL };
332
854d3349 333 return banks[bank & 0x3];
a0c36a1f
MCC
334}
335
854d3349 336static inline int numrow(u32 row)
a0c36a1f
MCC
337{
338 static int rows[8] = {
339 1 << 12, 1 << 13, 1 << 14, 1 << 15,
340 1 << 16, -EINVAL, -EINVAL, -EINVAL,
341 };
342
854d3349 343 return rows[row & 0x7];
a0c36a1f
MCC
344}
345
854d3349 346static inline int numcol(u32 col)
a0c36a1f
MCC
347{
348 static int cols[8] = {
349 1 << 10, 1 << 11, 1 << 12, -EINVAL,
350 };
854d3349 351 return cols[col & 0x3];
a0c36a1f
MCC
352}
353
f4742949 354static struct i7core_dev *get_i7core_dev(u8 socket)
66607706
MCC
355{
356 struct i7core_dev *i7core_dev;
357
358 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
359 if (i7core_dev->socket == socket)
360 return i7core_dev;
361 }
362
363 return NULL;
364}
365
a0c36a1f
MCC
366/****************************************************************************
367 Memory check routines
368 ****************************************************************************/
67166af4
MCC
369static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
370 unsigned func)
ef708b53 371{
66607706 372 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
ef708b53 373 int i;
ef708b53 374
66607706
MCC
375 if (!i7core_dev)
376 return NULL;
377
ef708b53 378 for (i = 0; i < N_DEVS; i++) {
66607706 379 if (!i7core_dev->pdev[i])
ef708b53
MCC
380 continue;
381
66607706
MCC
382 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
383 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
384 return i7core_dev->pdev[i];
ef708b53
MCC
385 }
386 }
387
eb94fc40
MCC
388 return NULL;
389}
390
ec6df24c
MCC
391/**
392 * i7core_get_active_channels() - gets the number of channels and csrows
393 * @socket: Quick Path Interconnect socket
394 * @channels: Number of channels that will be returned
395 * @csrows: Number of csrows found
396 *
397 * Since EDAC core needs to know in advance the number of available channels
398 * and csrows, in order to allocate memory for csrows/channels, it is needed
399 * to run two similar steps. At the first step, implemented on this function,
400 * it checks the number of csrows/channels present at one socket.
401 * this is used in order to properly allocate the size of mci components.
402 *
403 * It should be noticed that none of the current available datasheets explain
404 * or even mention how csrows are seen by the memory controller. So, we need
405 * to add a fake description for csrows.
406 * So, this driver is attributing one DIMM memory for one csrow.
407 */
67166af4
MCC
408static int i7core_get_active_channels(u8 socket, unsigned *channels,
409 unsigned *csrows)
eb94fc40
MCC
410{
411 struct pci_dev *pdev = NULL;
412 int i, j;
413 u32 status, control;
414
415 *channels = 0;
416 *csrows = 0;
417
67166af4 418 pdev = get_pdev_slot_func(socket, 3, 0);
b7c76151 419 if (!pdev) {
67166af4
MCC
420 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
421 socket);
ef708b53 422 return -ENODEV;
b7c76151 423 }
ef708b53
MCC
424
425 /* Device 3 function 0 reads */
426 pci_read_config_dword(pdev, MC_STATUS, &status);
427 pci_read_config_dword(pdev, MC_CONTROL, &control);
428
429 for (i = 0; i < NUM_CHANS; i++) {
eb94fc40 430 u32 dimm_dod[3];
ef708b53
MCC
431 /* Check if the channel is active */
432 if (!(control & (1 << (8 + i))))
433 continue;
434
435 /* Check if the channel is disabled */
41fcb7fe 436 if (status & (1 << i))
ef708b53 437 continue;
ef708b53 438
67166af4 439 pdev = get_pdev_slot_func(socket, i + 4, 1);
eb94fc40 440 if (!pdev) {
67166af4
MCC
441 i7core_printk(KERN_ERR, "Couldn't find socket %d "
442 "fn %d.%d!!!\n",
443 socket, i + 4, 1);
eb94fc40
MCC
444 return -ENODEV;
445 }
446 /* Devices 4-6 function 1 */
447 pci_read_config_dword(pdev,
448 MC_DOD_CH_DIMM0, &dimm_dod[0]);
449 pci_read_config_dword(pdev,
450 MC_DOD_CH_DIMM1, &dimm_dod[1]);
451 pci_read_config_dword(pdev,
452 MC_DOD_CH_DIMM2, &dimm_dod[2]);
453
ef708b53 454 (*channels)++;
eb94fc40
MCC
455
456 for (j = 0; j < 3; j++) {
457 if (!DIMM_PRESENT(dimm_dod[j]))
458 continue;
459 (*csrows)++;
460 }
ef708b53
MCC
461 }
462
c77720b9 463 debugf0("Number of active channels on socket %d: %d\n",
67166af4 464 socket, *channels);
1c6fed80 465
ef708b53
MCC
466 return 0;
467}
468
f4742949 469static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
a0c36a1f
MCC
470{
471 struct i7core_pvt *pvt = mci->pvt_info;
1c6fed80 472 struct csrow_info *csr;
854d3349 473 struct pci_dev *pdev;
ba6c5c62 474 int i, j;
f4742949 475 u8 socket = pvt->i7core_dev->socket;
5566cb7c 476 unsigned long last_page = 0;
1c6fed80 477 enum edac_type mode;
854d3349 478 enum mem_type mtype;
a0c36a1f 479
854d3349 480 /* Get data from the MC register, function 0 */
f4742949 481 pdev = pvt->pci_mcr[0];
7dd6953c 482 if (!pdev)
8f331907
MCC
483 return -ENODEV;
484
f122a892 485 /* Device 3 function 0 reads */
7dd6953c
MCC
486 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
487 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
488 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
489 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
f122a892 490
17cb7b0c
MCC
491 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
492 socket, pvt->info.mc_control, pvt->info.mc_status,
f122a892 493 pvt->info.max_dod, pvt->info.ch_map);
a0c36a1f 494
1c6fed80 495 if (ECC_ENABLED(pvt)) {
41fcb7fe 496 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
1c6fed80
MCC
497 if (ECCx8(pvt))
498 mode = EDAC_S8ECD8ED;
499 else
500 mode = EDAC_S4ECD4ED;
501 } else {
a0c36a1f 502 debugf0("ECC disabled\n");
1c6fed80
MCC
503 mode = EDAC_NONE;
504 }
a0c36a1f
MCC
505
506 /* FIXME: need to handle the error codes */
17cb7b0c
MCC
507 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
508 "x%x x 0x%x\n",
854d3349
MCC
509 numdimms(pvt->info.max_dod),
510 numrank(pvt->info.max_dod >> 2),
276b824c 511 numbank(pvt->info.max_dod >> 4),
854d3349
MCC
512 numrow(pvt->info.max_dod >> 6),
513 numcol(pvt->info.max_dod >> 9));
a0c36a1f 514
0b2b7b7e 515 for (i = 0; i < NUM_CHANS; i++) {
854d3349 516 u32 data, dimm_dod[3], value[8];
0b2b7b7e
MCC
517
518 if (!CH_ACTIVE(pvt, i)) {
519 debugf0("Channel %i is not active\n", i);
520 continue;
521 }
522 if (CH_DISABLED(pvt, i)) {
523 debugf0("Channel %i is disabled\n", i);
524 continue;
525 }
526
f122a892 527 /* Devices 4-6 function 0 */
f4742949 528 pci_read_config_dword(pvt->pci_ch[i][0],
0b2b7b7e
MCC
529 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
530
f4742949 531 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
67166af4 532 4 : 2;
0b2b7b7e 533
854d3349
MCC
534 if (data & REGISTERED_DIMM)
535 mtype = MEM_RDDR3;
14d2c083 536 else
854d3349
MCC
537 mtype = MEM_DDR3;
538#if 0
0b2b7b7e
MCC
539 if (data & THREE_DIMMS_PRESENT)
540 pvt->channel[i].dimms = 3;
541 else if (data & SINGLE_QUAD_RANK_PRESENT)
542 pvt->channel[i].dimms = 1;
543 else
544 pvt->channel[i].dimms = 2;
854d3349
MCC
545#endif
546
547 /* Devices 4-6 function 1 */
f4742949 548 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 549 MC_DOD_CH_DIMM0, &dimm_dod[0]);
f4742949 550 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 551 MC_DOD_CH_DIMM1, &dimm_dod[1]);
f4742949 552 pci_read_config_dword(pvt->pci_ch[i][1],
854d3349 553 MC_DOD_CH_DIMM2, &dimm_dod[2]);
0b2b7b7e 554
1c6fed80 555 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
854d3349 556 "%d ranks, %cDIMMs\n",
1c6fed80
MCC
557 i,
558 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
559 data,
f4742949 560 pvt->channel[i].ranks,
41fcb7fe 561 (data & REGISTERED_DIMM) ? 'R' : 'U');
854d3349
MCC
562
563 for (j = 0; j < 3; j++) {
564 u32 banks, ranks, rows, cols;
5566cb7c 565 u32 size, npages;
854d3349
MCC
566
567 if (!DIMM_PRESENT(dimm_dod[j]))
568 continue;
569
570 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
571 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
572 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
573 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
574
5566cb7c
MCC
575 /* DDR3 has 8 I/O banks */
576 size = (rows * cols * banks * ranks) >> (20 - 3);
577
f4742949 578 pvt->channel[i].dimms++;
854d3349 579
17cb7b0c
MCC
580 debugf0("\tdimm %d %d Mb offset: %x, "
581 "bank: %d, rank: %d, row: %#x, col: %#x\n",
582 j, size,
854d3349
MCC
583 RANKOFFSET(dimm_dod[j]),
584 banks, ranks, rows, cols);
585
eb94fc40
MCC
586#if PAGE_SHIFT > 20
587 npages = size >> (PAGE_SHIFT - 20);
588#else
589 npages = size << (20 - PAGE_SHIFT);
590#endif
5566cb7c 591
ba6c5c62 592 csr = &mci->csrows[*csrow];
5566cb7c
MCC
593 csr->first_page = last_page + 1;
594 last_page += npages;
595 csr->last_page = last_page;
596 csr->nr_pages = npages;
597
854d3349 598 csr->page_mask = 0;
eb94fc40 599 csr->grain = 8;
ba6c5c62 600 csr->csrow_idx = *csrow;
eb94fc40
MCC
601 csr->nr_channels = 1;
602
603 csr->channels[0].chan_idx = i;
604 csr->channels[0].ce_count = 0;
854d3349 605
f4742949 606 pvt->csrow_map[i][j] = *csrow;
b4e8f0b6 607
854d3349
MCC
608 switch (banks) {
609 case 4:
610 csr->dtype = DEV_X4;
611 break;
612 case 8:
613 csr->dtype = DEV_X8;
614 break;
615 case 16:
616 csr->dtype = DEV_X16;
617 break;
618 default:
619 csr->dtype = DEV_UNKNOWN;
620 }
621
622 csr->edac_mode = mode;
623 csr->mtype = mtype;
624
ba6c5c62 625 (*csrow)++;
854d3349 626 }
1c6fed80 627
854d3349
MCC
628 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
629 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
630 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
631 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
632 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
633 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
634 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
635 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
17cb7b0c 636 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
854d3349 637 for (j = 0; j < 8; j++)
17cb7b0c 638 debugf1("\t\t%#x\t%#x\t%#x\n",
854d3349
MCC
639 (value[j] >> 27) & 0x1,
640 (value[j] >> 24) & 0x7,
641 (value[j] && ((1 << 24) - 1)));
0b2b7b7e
MCC
642 }
643
a0c36a1f
MCC
644 return 0;
645}
646
194a40fe
MCC
647/****************************************************************************
648 Error insertion routines
649 ****************************************************************************/
650
651/* The i7core has independent error injection features per channel.
652 However, to have a simpler code, we don't allow enabling error injection
653 on more than one channel.
654 Also, since a change at an inject parameter will be applied only at enable,
655 we're disabling error injection on all write calls to the sysfs nodes that
656 controls the error code injection.
657 */
8f331907 658static int disable_inject(struct mem_ctl_info *mci)
194a40fe
MCC
659{
660 struct i7core_pvt *pvt = mci->pvt_info;
661
662 pvt->inject.enable = 0;
663
f4742949 664 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
665 return -ENODEV;
666
f4742949 667 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 668 MC_CHANNEL_ERROR_INJECT, 0);
8f331907
MCC
669
670 return 0;
194a40fe
MCC
671}
672
673/*
674 * i7core inject inject.section
675 *
676 * accept and store error injection inject.section value
677 * bit 0 - refers to the lower 32-byte half cacheline
678 * bit 1 - refers to the upper 32-byte half cacheline
679 */
680static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
681 const char *data, size_t count)
682{
683 struct i7core_pvt *pvt = mci->pvt_info;
684 unsigned long value;
685 int rc;
686
687 if (pvt->inject.enable)
41fcb7fe 688 disable_inject(mci);
194a40fe
MCC
689
690 rc = strict_strtoul(data, 10, &value);
691 if ((rc < 0) || (value > 3))
2068def5 692 return -EIO;
194a40fe
MCC
693
694 pvt->inject.section = (u32) value;
695 return count;
696}
697
698static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
699 char *data)
700{
701 struct i7core_pvt *pvt = mci->pvt_info;
702 return sprintf(data, "0x%08x\n", pvt->inject.section);
703}
704
705/*
706 * i7core inject.type
707 *
708 * accept and store error injection inject.section value
709 * bit 0 - repeat enable - Enable error repetition
710 * bit 1 - inject ECC error
711 * bit 2 - inject parity error
712 */
713static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
714 const char *data, size_t count)
715{
716 struct i7core_pvt *pvt = mci->pvt_info;
717 unsigned long value;
718 int rc;
719
720 if (pvt->inject.enable)
41fcb7fe 721 disable_inject(mci);
194a40fe
MCC
722
723 rc = strict_strtoul(data, 10, &value);
724 if ((rc < 0) || (value > 7))
2068def5 725 return -EIO;
194a40fe
MCC
726
727 pvt->inject.type = (u32) value;
728 return count;
729}
730
731static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
732 char *data)
733{
734 struct i7core_pvt *pvt = mci->pvt_info;
735 return sprintf(data, "0x%08x\n", pvt->inject.type);
736}
737
738/*
739 * i7core_inject_inject.eccmask_store
740 *
741 * The type of error (UE/CE) will depend on the inject.eccmask value:
742 * Any bits set to a 1 will flip the corresponding ECC bit
743 * Correctable errors can be injected by flipping 1 bit or the bits within
744 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
745 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
746 * uncorrectable error to be injected.
747 */
748static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
749 const char *data, size_t count)
750{
751 struct i7core_pvt *pvt = mci->pvt_info;
752 unsigned long value;
753 int rc;
754
755 if (pvt->inject.enable)
41fcb7fe 756 disable_inject(mci);
194a40fe
MCC
757
758 rc = strict_strtoul(data, 10, &value);
759 if (rc < 0)
2068def5 760 return -EIO;
194a40fe
MCC
761
762 pvt->inject.eccmask = (u32) value;
763 return count;
764}
765
766static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
767 char *data)
768{
769 struct i7core_pvt *pvt = mci->pvt_info;
770 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
771}
772
773/*
774 * i7core_addrmatch
775 *
776 * The type of error (UE/CE) will depend on the inject.eccmask value:
777 * Any bits set to a 1 will flip the corresponding ECC bit
778 * Correctable errors can be injected by flipping 1 bit or the bits within
779 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
780 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
781 * uncorrectable error to be injected.
782 */
783static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci,
784 const char *data, size_t count)
785{
786 struct i7core_pvt *pvt = mci->pvt_info;
787 char *cmd, *val;
788 long value;
789 int rc;
790
791 if (pvt->inject.enable)
41fcb7fe 792 disable_inject(mci);
194a40fe
MCC
793
794 do {
795 cmd = strsep((char **) &data, ":");
796 if (!cmd)
797 break;
798 val = strsep((char **) &data, " \n\t");
799 if (!val)
800 return cmd - data;
801
41fcb7fe 802 if (!strcasecmp(val, "any"))
194a40fe
MCC
803 value = -1;
804 else {
805 rc = strict_strtol(val, 10, &value);
806 if ((rc < 0) || (value < 0))
807 return cmd - data;
808 }
809
41fcb7fe 810 if (!strcasecmp(cmd, "channel")) {
194a40fe
MCC
811 if (value < 3)
812 pvt->inject.channel = value;
813 else
814 return cmd - data;
41fcb7fe 815 } else if (!strcasecmp(cmd, "dimm")) {
276b824c 816 if (value < 3)
194a40fe
MCC
817 pvt->inject.dimm = value;
818 else
819 return cmd - data;
41fcb7fe 820 } else if (!strcasecmp(cmd, "rank")) {
194a40fe
MCC
821 if (value < 4)
822 pvt->inject.rank = value;
823 else
824 return cmd - data;
41fcb7fe 825 } else if (!strcasecmp(cmd, "bank")) {
276b824c 826 if (value < 32)
194a40fe
MCC
827 pvt->inject.bank = value;
828 else
829 return cmd - data;
41fcb7fe 830 } else if (!strcasecmp(cmd, "page")) {
194a40fe
MCC
831 if (value <= 0xffff)
832 pvt->inject.page = value;
833 else
834 return cmd - data;
41fcb7fe
MCC
835 } else if (!strcasecmp(cmd, "col") ||
836 !strcasecmp(cmd, "column")) {
194a40fe
MCC
837 if (value <= 0x3fff)
838 pvt->inject.col = value;
839 else
840 return cmd - data;
841 }
842 } while (1);
843
844 return count;
845}
846
847static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci,
848 char *data)
849{
850 struct i7core_pvt *pvt = mci->pvt_info;
851 char channel[4], dimm[4], bank[4], rank[4], page[7], col[7];
852
853 if (pvt->inject.channel < 0)
854 sprintf(channel, "any");
855 else
856 sprintf(channel, "%d", pvt->inject.channel);
857 if (pvt->inject.dimm < 0)
858 sprintf(dimm, "any");
859 else
860 sprintf(dimm, "%d", pvt->inject.dimm);
861 if (pvt->inject.bank < 0)
862 sprintf(bank, "any");
863 else
864 sprintf(bank, "%d", pvt->inject.bank);
865 if (pvt->inject.rank < 0)
866 sprintf(rank, "any");
867 else
868 sprintf(rank, "%d", pvt->inject.rank);
869 if (pvt->inject.page < 0)
870 sprintf(page, "any");
871 else
872 sprintf(page, "0x%04x", pvt->inject.page);
873 if (pvt->inject.col < 0)
874 sprintf(col, "any");
875 else
876 sprintf(col, "0x%04x", pvt->inject.col);
877
878 return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n"
879 "rank: %s\npage: %s\ncolumn: %s\n",
880 channel, dimm, bank, rank, page, col);
881}
882
276b824c
MCC
883static int write_and_test(struct pci_dev *dev, int where, u32 val)
884{
885 u32 read;
886 int count;
887
4157d9f5
MCC
888 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
889 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
890 where, val);
891
276b824c
MCC
892 for (count = 0; count < 10; count++) {
893 if (count)
b990538a 894 msleep(100);
276b824c
MCC
895 pci_write_config_dword(dev, where, val);
896 pci_read_config_dword(dev, where, &read);
897
898 if (read == val)
899 return 0;
900 }
901
4157d9f5
MCC
902 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
903 "write=%08x. Read=%08x\n",
904 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
905 where, val, read);
276b824c
MCC
906
907 return -EINVAL;
908}
909
194a40fe
MCC
910/*
911 * This routine prepares the Memory Controller for error injection.
912 * The error will be injected when some process tries to write to the
913 * memory that matches the given criteria.
914 * The criteria can be set in terms of a mask where dimm, rank, bank, page
915 * and col can be specified.
916 * A -1 value for any of the mask items will make the MCU to ignore
917 * that matching criteria for error injection.
918 *
919 * It should be noticed that the error will only happen after a write operation
920 * on a memory that matches the condition. if REPEAT_EN is not enabled at
921 * inject mask, then it will produce just one error. Otherwise, it will repeat
922 * until the injectmask would be cleaned.
923 *
924 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
925 * is reliable enough to check if the MC is using the
926 * three channels. However, this is not clear at the datasheet.
927 */
928static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
929 const char *data, size_t count)
930{
931 struct i7core_pvt *pvt = mci->pvt_info;
932 u32 injectmask;
933 u64 mask = 0;
934 int rc;
935 long enable;
936
f4742949 937 if (!pvt->pci_ch[pvt->inject.channel][0])
8f331907
MCC
938 return 0;
939
194a40fe
MCC
940 rc = strict_strtoul(data, 10, &enable);
941 if ((rc < 0))
942 return 0;
943
944 if (enable) {
945 pvt->inject.enable = 1;
946 } else {
947 disable_inject(mci);
948 return count;
949 }
950
951 /* Sets pvt->inject.dimm mask */
952 if (pvt->inject.dimm < 0)
7b029d03 953 mask |= 1L << 41;
194a40fe 954 else {
f4742949 955 if (pvt->channel[pvt->inject.channel].dimms > 2)
7b029d03 956 mask |= (pvt->inject.dimm & 0x3L) << 35;
194a40fe 957 else
7b029d03 958 mask |= (pvt->inject.dimm & 0x1L) << 36;
194a40fe
MCC
959 }
960
961 /* Sets pvt->inject.rank mask */
962 if (pvt->inject.rank < 0)
7b029d03 963 mask |= 1L << 40;
194a40fe 964 else {
f4742949 965 if (pvt->channel[pvt->inject.channel].dimms > 2)
7b029d03 966 mask |= (pvt->inject.rank & 0x1L) << 34;
194a40fe 967 else
7b029d03 968 mask |= (pvt->inject.rank & 0x3L) << 34;
194a40fe
MCC
969 }
970
971 /* Sets pvt->inject.bank mask */
972 if (pvt->inject.bank < 0)
7b029d03 973 mask |= 1L << 39;
194a40fe 974 else
7b029d03 975 mask |= (pvt->inject.bank & 0x15L) << 30;
194a40fe
MCC
976
977 /* Sets pvt->inject.page mask */
978 if (pvt->inject.page < 0)
7b029d03 979 mask |= 1L << 38;
194a40fe 980 else
7b029d03 981 mask |= (pvt->inject.page & 0xffffL) << 14;
194a40fe
MCC
982
983 /* Sets pvt->inject.column mask */
984 if (pvt->inject.col < 0)
7b029d03 985 mask |= 1L << 37;
194a40fe 986 else
7b029d03 987 mask |= (pvt->inject.col & 0x3fffL);
194a40fe 988
276b824c
MCC
989 /*
990 * bit 0: REPEAT_EN
991 * bits 1-2: MASK_HALF_CACHELINE
992 * bit 3: INJECT_ECC
993 * bit 4: INJECT_ADDR_PARITY
994 */
995
996 injectmask = (pvt->inject.type & 1) |
997 (pvt->inject.section & 0x3) << 1 |
998 (pvt->inject.type & 0x6) << (3 - 1);
999
1000 /* Unlock writes to registers - this register is write only */
f4742949 1001 pci_write_config_dword(pvt->pci_noncore,
67166af4 1002 MC_CFG_CONTROL, 0x2);
e9bd2e73 1003
f4742949 1004 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe 1005 MC_CHANNEL_ADDR_MATCH, mask);
f4742949 1006 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
7b029d03 1007 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
7b029d03 1008
f4742949 1009 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
194a40fe
MCC
1010 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1011
f4742949 1012 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1013 MC_CHANNEL_ERROR_INJECT, injectmask);
276b824c 1014
194a40fe 1015 /*
276b824c
MCC
1016 * This is something undocumented, based on my tests
1017 * Without writing 8 to this register, errors aren't injected. Not sure
1018 * why.
194a40fe 1019 */
f4742949 1020 pci_write_config_dword(pvt->pci_noncore,
276b824c 1021 MC_CFG_CONTROL, 8);
194a40fe 1022
41fcb7fe
MCC
1023 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1024 " inject 0x%08x\n",
194a40fe
MCC
1025 mask, pvt->inject.eccmask, injectmask);
1026
7b029d03 1027
194a40fe
MCC
1028 return count;
1029}
1030
1031static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1032 char *data)
1033{
1034 struct i7core_pvt *pvt = mci->pvt_info;
7b029d03
MCC
1035 u32 injectmask;
1036
f4742949 1037 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
4157d9f5 1038 MC_CHANNEL_ERROR_INJECT, &injectmask);
7b029d03
MCC
1039
1040 debugf0("Inject error read: 0x%018x\n", injectmask);
1041
1042 if (injectmask & 0x0c)
1043 pvt->inject.enable = 1;
1044
194a40fe
MCC
1045 return sprintf(data, "%d\n", pvt->inject.enable);
1046}
1047
442305b1
MCC
1048static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data)
1049{
f4742949 1050 unsigned i, count, total = 0;
442305b1
MCC
1051 struct i7core_pvt *pvt = mci->pvt_info;
1052
f4742949
MCC
1053 if (!pvt->ce_count_available) {
1054 count = sprintf(data, "data unavailable\n");
1055 return 0;
67166af4 1056 }
f4742949
MCC
1057 if (!pvt->is_registered)
1058 count = sprintf(data, "all channels "
1059 "UDIMM0: %lu UDIMM1: %lu UDIMM2: %lu\n",
1060 pvt->udimm_ce_count[0],
1061 pvt->udimm_ce_count[1],
1062 pvt->udimm_ce_count[2]);
1063 else
1064 for (i = 0; i < NUM_CHANS; i++) {
1065 count = sprintf(data, "channel %d RDIMM0: %lu "
1066 "RDIMM1: %lu RDIMM2: %lu\n",
1067 i,
1068 pvt->rdimm_ce_count[i][0],
1069 pvt->rdimm_ce_count[i][1],
1070 pvt->rdimm_ce_count[i][2]);
1071 }
1072 data += count;
1073 total += count;
442305b1 1074
67166af4 1075 return total;
442305b1
MCC
1076}
1077
194a40fe
MCC
1078/*
1079 * Sysfs struct
1080 */
1081static struct mcidev_sysfs_attribute i7core_inj_attrs[] = {
194a40fe
MCC
1082 {
1083 .attr = {
1084 .name = "inject_section",
1085 .mode = (S_IRUGO | S_IWUSR)
1086 },
1087 .show = i7core_inject_section_show,
1088 .store = i7core_inject_section_store,
1089 }, {
1090 .attr = {
1091 .name = "inject_type",
1092 .mode = (S_IRUGO | S_IWUSR)
1093 },
1094 .show = i7core_inject_type_show,
1095 .store = i7core_inject_type_store,
1096 }, {
1097 .attr = {
1098 .name = "inject_eccmask",
1099 .mode = (S_IRUGO | S_IWUSR)
1100 },
1101 .show = i7core_inject_eccmask_show,
1102 .store = i7core_inject_eccmask_store,
1103 }, {
1104 .attr = {
1105 .name = "inject_addrmatch",
1106 .mode = (S_IRUGO | S_IWUSR)
1107 },
1108 .show = i7core_inject_addrmatch_show,
1109 .store = i7core_inject_addrmatch_store,
1110 }, {
1111 .attr = {
1112 .name = "inject_enable",
1113 .mode = (S_IRUGO | S_IWUSR)
1114 },
1115 .show = i7core_inject_enable_show,
1116 .store = i7core_inject_enable_store,
442305b1
MCC
1117 }, {
1118 .attr = {
1119 .name = "corrected_error_counts",
1120 .mode = (S_IRUGO | S_IWUSR)
1121 },
1122 .show = i7core_ce_regs_show,
1123 .store = NULL,
194a40fe
MCC
1124 },
1125};
1126
a0c36a1f
MCC
1127/****************************************************************************
1128 Device initialization routines: put/get, init/exit
1129 ****************************************************************************/
1130
1131/*
1132 * i7core_put_devices 'put' all the devices that we have
1133 * reserved via 'get'
1134 */
8f331907 1135static void i7core_put_devices(void)
a0c36a1f 1136{
67166af4 1137 int i, j;
a0c36a1f 1138
f4742949 1139 for (i = 0; i < max_num_sockets; i++) {
66607706
MCC
1140 struct i7core_dev *i7core_dev = get_i7core_dev(i);
1141 if (!i7core_dev)
1142 continue;
1143
67166af4 1144 for (j = 0; j < N_DEVS; j++)
66607706
MCC
1145 pci_dev_put(i7core_dev->pdev[j]);
1146
1147 list_del(&i7core_dev->list);
1148 kfree(i7core_dev->pdev);
1149 kfree(i7core_dev);
1150 }
a0c36a1f
MCC
1151}
1152
bc2d7245
KM
1153static void i7core_xeon_pci_fixup(void)
1154{
1155 struct pci_dev *pdev = NULL;
1156 int i;
1157 /*
1158 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1159 * aren't announced by acpi. So, we need to use a legacy scan probing
1160 * to detect them
1161 */
1162 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
66607706 1163 pci_dev_descr[0].dev_id, NULL);
bc2d7245 1164 if (unlikely(!pdev)) {
f4742949 1165 for (i = 0; i < MAX_SOCKET_BUSES; i++)
bc2d7245
KM
1166 pcibios_scan_specific_bus(255-i);
1167 }
1168}
1169
a0c36a1f
MCC
1170/*
1171 * i7core_get_devices Find and perform 'get' operation on the MCH's
1172 * device/functions we want to reference for this driver
1173 *
1174 * Need to 'get' device 16 func 1 and func 2
1175 */
c77720b9 1176int i7core_get_onedevice(struct pci_dev **prev, int devno)
a0c36a1f 1177{
66607706
MCC
1178 struct i7core_dev *i7core_dev;
1179
8f331907 1180 struct pci_dev *pdev = NULL;
67166af4
MCC
1181 u8 bus = 0;
1182 u8 socket = 0;
a0c36a1f 1183
c77720b9 1184 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
66607706 1185 pci_dev_descr[devno].dev_id, *prev);
c77720b9 1186
c77720b9
MCC
1187 /*
1188 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1189 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1190 * to probe for the alternate address in case of failure
1191 */
66607706 1192 if (pci_dev_descr[devno].dev_id == PCI_DEVICE_ID_INTEL_I7_NOCORE && !pdev)
c77720b9
MCC
1193 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1194 PCI_DEVICE_ID_INTEL_I7_NOCORE_ALT, *prev);
d1fd4fb6 1195
c77720b9
MCC
1196 if (!pdev) {
1197 if (*prev) {
1198 *prev = pdev;
1199 return 0;
d1fd4fb6
MCC
1200 }
1201
310cbb72 1202 /*
c77720b9
MCC
1203 * Dev 3 function 2 only exists on chips with RDIMMs
1204 * so, it is ok to not found it
310cbb72 1205 */
66607706 1206 if ((pci_dev_descr[devno].dev == 3) && (pci_dev_descr[devno].func == 2)) {
c77720b9
MCC
1207 *prev = pdev;
1208 return 0;
1209 }
310cbb72 1210
c77720b9
MCC
1211 i7core_printk(KERN_ERR,
1212 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1213 pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1214 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
67166af4 1215
c77720b9
MCC
1216 /* End of list, leave */
1217 return -ENODEV;
1218 }
1219 bus = pdev->bus->number;
67166af4 1220
c77720b9
MCC
1221 if (bus == 0x3f)
1222 socket = 0;
1223 else
1224 socket = 255 - bus;
1225
66607706
MCC
1226 i7core_dev = get_i7core_dev(socket);
1227 if (!i7core_dev) {
1228 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1229 if (!i7core_dev)
1230 return -ENOMEM;
1231 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * N_DEVS,
1232 GFP_KERNEL);
1233 if (!i7core_dev->pdev)
1234 return -ENOMEM;
1235 i7core_dev->socket = socket;
1236 list_add_tail(&i7core_dev->list, &i7core_edac_list);
c77720b9 1237 }
67166af4 1238
66607706 1239 if (i7core_dev->pdev[devno]) {
c77720b9
MCC
1240 i7core_printk(KERN_ERR,
1241 "Duplicated device for "
1242 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1243 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1244 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
c77720b9
MCC
1245 pci_dev_put(pdev);
1246 return -ENODEV;
1247 }
67166af4 1248
66607706 1249 i7core_dev->pdev[devno] = pdev;
c77720b9
MCC
1250
1251 /* Sanity check */
66607706
MCC
1252 if (unlikely(PCI_SLOT(pdev->devfn) != pci_dev_descr[devno].dev ||
1253 PCI_FUNC(pdev->devfn) != pci_dev_descr[devno].func)) {
c77720b9
MCC
1254 i7core_printk(KERN_ERR,
1255 "Device PCI ID %04x:%04x "
1256 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
66607706 1257 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id,
c77720b9 1258 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
66607706 1259 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func);
c77720b9
MCC
1260 return -ENODEV;
1261 }
ef708b53 1262
c77720b9
MCC
1263 /* Be sure that the device is enabled */
1264 if (unlikely(pci_enable_device(pdev) < 0)) {
1265 i7core_printk(KERN_ERR,
1266 "Couldn't enable "
1267 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1268 bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1269 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
c77720b9
MCC
1270 return -ENODEV;
1271 }
ef708b53 1272
c77720b9
MCC
1273 i7core_printk(KERN_INFO,
1274 "Registered socket %d "
1275 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
66607706
MCC
1276 socket, bus, pci_dev_descr[devno].dev, pci_dev_descr[devno].func,
1277 PCI_VENDOR_ID_INTEL, pci_dev_descr[devno].dev_id);
8f331907 1278
c77720b9 1279 *prev = pdev;
ef708b53 1280
c77720b9
MCC
1281 return 0;
1282}
a0c36a1f 1283
f4742949 1284static int i7core_get_devices(void)
c77720b9
MCC
1285{
1286 int i;
1287 struct pci_dev *pdev = NULL;
ef708b53 1288
c77720b9
MCC
1289 for (i = 0; i < N_DEVS; i++) {
1290 pdev = NULL;
1291 do {
1292 if (i7core_get_onedevice(&pdev, i) < 0) {
1293 i7core_put_devices();
1294 return -ENODEV;
1295 }
1296 } while (pdev);
1297 }
66607706 1298
ef708b53 1299 return 0;
ef708b53
MCC
1300}
1301
f4742949
MCC
1302static int mci_bind_devs(struct mem_ctl_info *mci,
1303 struct i7core_dev *i7core_dev)
ef708b53
MCC
1304{
1305 struct i7core_pvt *pvt = mci->pvt_info;
1306 struct pci_dev *pdev;
f4742949 1307 int i, func, slot;
ef708b53 1308
f4742949
MCC
1309 /* Associates i7core_dev and mci for future usage */
1310 pvt->i7core_dev = i7core_dev;
1311 i7core_dev->mci = mci;
66607706 1312
f4742949
MCC
1313 pvt->is_registered = 0;
1314 for (i = 0; i < N_DEVS; i++) {
1315 pdev = i7core_dev->pdev[i];
1316 if (!pdev)
66607706
MCC
1317 continue;
1318
f4742949
MCC
1319 func = PCI_FUNC(pdev->devfn);
1320 slot = PCI_SLOT(pdev->devfn);
1321 if (slot == 3) {
1322 if (unlikely(func > MAX_MCR_FUNC))
1323 goto error;
1324 pvt->pci_mcr[func] = pdev;
1325 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1326 if (unlikely(func > MAX_CHAN_FUNC))
ef708b53 1327 goto error;
f4742949
MCC
1328 pvt->pci_ch[slot - 4][func] = pdev;
1329 } else if (!slot && !func)
1330 pvt->pci_noncore = pdev;
1331 else
1332 goto error;
ef708b53 1333
f4742949
MCC
1334 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1335 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1336 pdev, i7core_dev->socket);
14d2c083 1337
f4742949
MCC
1338 if (PCI_SLOT(pdev->devfn) == 3 &&
1339 PCI_FUNC(pdev->devfn) == 2)
1340 pvt->is_registered = 1;
a0c36a1f 1341 }
e9bd2e73 1342
a0c36a1f 1343 return 0;
ef708b53
MCC
1344
1345error:
1346 i7core_printk(KERN_ERR, "Device %d, function %d "
1347 "is out of the expected range\n",
1348 slot, func);
1349 return -EINVAL;
a0c36a1f
MCC
1350}
1351
442305b1
MCC
1352/****************************************************************************
1353 Error check routines
1354 ****************************************************************************/
f4742949 1355static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
b4e8f0b6
MCC
1356 int chan, int dimm, int add)
1357{
1358 char *msg;
1359 struct i7core_pvt *pvt = mci->pvt_info;
f4742949 1360 int row = pvt->csrow_map[chan][dimm], i;
b4e8f0b6
MCC
1361
1362 for (i = 0; i < add; i++) {
1363 msg = kasprintf(GFP_KERNEL, "Corrected error "
f4742949
MCC
1364 "(Socket=%d channel=%d dimm=%d)",
1365 pvt->i7core_dev->socket, chan, dimm);
b4e8f0b6
MCC
1366
1367 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1368 kfree (msg);
1369 }
1370}
1371
1372static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
f4742949 1373 int chan, int new0, int new1, int new2)
b4e8f0b6
MCC
1374{
1375 struct i7core_pvt *pvt = mci->pvt_info;
1376 int add0 = 0, add1 = 0, add2 = 0;
1377 /* Updates CE counters if it is not the first time here */
f4742949 1378 if (pvt->ce_count_available) {
b4e8f0b6
MCC
1379 /* Updates CE counters */
1380
f4742949
MCC
1381 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1382 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1383 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
b4e8f0b6
MCC
1384
1385 if (add2 < 0)
1386 add2 += 0x7fff;
f4742949 1387 pvt->rdimm_ce_count[chan][2] += add2;
b4e8f0b6
MCC
1388
1389 if (add1 < 0)
1390 add1 += 0x7fff;
f4742949 1391 pvt->rdimm_ce_count[chan][1] += add1;
b4e8f0b6
MCC
1392
1393 if (add0 < 0)
1394 add0 += 0x7fff;
f4742949 1395 pvt->rdimm_ce_count[chan][0] += add0;
b4e8f0b6 1396 } else
f4742949 1397 pvt->ce_count_available = 1;
b4e8f0b6
MCC
1398
1399 /* Store the new values */
f4742949
MCC
1400 pvt->rdimm_last_ce_count[chan][2] = new2;
1401 pvt->rdimm_last_ce_count[chan][1] = new1;
1402 pvt->rdimm_last_ce_count[chan][0] = new0;
b4e8f0b6
MCC
1403
1404 /*updated the edac core */
1405 if (add0 != 0)
f4742949 1406 i7core_rdimm_update_csrow(mci, chan, 0, add0);
b4e8f0b6 1407 if (add1 != 0)
f4742949 1408 i7core_rdimm_update_csrow(mci, chan, 1, add1);
b4e8f0b6 1409 if (add2 != 0)
f4742949 1410 i7core_rdimm_update_csrow(mci, chan, 2, add2);
b4e8f0b6
MCC
1411
1412}
1413
f4742949 1414static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
b4e8f0b6
MCC
1415{
1416 struct i7core_pvt *pvt = mci->pvt_info;
1417 u32 rcv[3][2];
1418 int i, new0, new1, new2;
1419
1420 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
f4742949 1421 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
b4e8f0b6 1422 &rcv[0][0]);
f4742949 1423 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
b4e8f0b6 1424 &rcv[0][1]);
f4742949 1425 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
b4e8f0b6 1426 &rcv[1][0]);
f4742949 1427 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
b4e8f0b6 1428 &rcv[1][1]);
f4742949 1429 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
b4e8f0b6 1430 &rcv[2][0]);
f4742949 1431 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
b4e8f0b6
MCC
1432 &rcv[2][1]);
1433 for (i = 0 ; i < 3; i++) {
1434 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1435 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1436 /*if the channel has 3 dimms*/
f4742949 1437 if (pvt->channel[i].dimms > 2) {
b4e8f0b6
MCC
1438 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1439 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1440 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1441 } else {
1442 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1443 DIMM_BOT_COR_ERR(rcv[i][0]);
1444 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1445 DIMM_BOT_COR_ERR(rcv[i][1]);
1446 new2 = 0;
1447 }
1448
f4742949 1449 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
b4e8f0b6
MCC
1450 }
1451}
442305b1
MCC
1452
1453/* This function is based on the device 3 function 4 registers as described on:
1454 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1455 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1456 * also available at:
1457 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1458 */
f4742949 1459static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
442305b1
MCC
1460{
1461 struct i7core_pvt *pvt = mci->pvt_info;
1462 u32 rcv1, rcv0;
1463 int new0, new1, new2;
1464
f4742949 1465 if (!pvt->pci_mcr[4]) {
b990538a 1466 debugf0("%s MCR registers not found\n", __func__);
442305b1
MCC
1467 return;
1468 }
1469
b4e8f0b6 1470 /* Corrected test errors */
f4742949
MCC
1471 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1472 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
442305b1
MCC
1473
1474 /* Store the new values */
1475 new2 = DIMM2_COR_ERR(rcv1);
1476 new1 = DIMM1_COR_ERR(rcv0);
1477 new0 = DIMM0_COR_ERR(rcv0);
1478
442305b1 1479 /* Updates CE counters if it is not the first time here */
f4742949 1480 if (pvt->ce_count_available) {
442305b1
MCC
1481 /* Updates CE counters */
1482 int add0, add1, add2;
1483
f4742949
MCC
1484 add2 = new2 - pvt->udimm_last_ce_count[2];
1485 add1 = new1 - pvt->udimm_last_ce_count[1];
1486 add0 = new0 - pvt->udimm_last_ce_count[0];
442305b1
MCC
1487
1488 if (add2 < 0)
1489 add2 += 0x7fff;
f4742949 1490 pvt->udimm_ce_count[2] += add2;
442305b1
MCC
1491
1492 if (add1 < 0)
1493 add1 += 0x7fff;
f4742949 1494 pvt->udimm_ce_count[1] += add1;
442305b1
MCC
1495
1496 if (add0 < 0)
1497 add0 += 0x7fff;
f4742949 1498 pvt->udimm_ce_count[0] += add0;
b4e8f0b6
MCC
1499
1500 if (add0 | add1 | add2)
1501 i7core_printk(KERN_ERR, "New Corrected error(s): "
1502 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1503 add0, add1, add2);
442305b1 1504 } else
f4742949 1505 pvt->ce_count_available = 1;
442305b1
MCC
1506
1507 /* Store the new values */
f4742949
MCC
1508 pvt->udimm_last_ce_count[2] = new2;
1509 pvt->udimm_last_ce_count[1] = new1;
1510 pvt->udimm_last_ce_count[0] = new0;
442305b1
MCC
1511}
1512
8a2f118e
MCC
1513/*
1514 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1515 * Architectures Software Developer’s Manual Volume 3B.
f237fcf2
MCC
1516 * Nehalem are defined as family 0x06, model 0x1a
1517 *
1518 * The MCA registers used here are the following ones:
8a2f118e 1519 * struct mce field MCA Register
f237fcf2
MCC
1520 * m->status MSR_IA32_MC8_STATUS
1521 * m->addr MSR_IA32_MC8_ADDR
1522 * m->misc MSR_IA32_MC8_MISC
8a2f118e
MCC
1523 * In the case of Nehalem, the error information is masked at .status and .misc
1524 * fields
1525 */
d5381642
MCC
1526static void i7core_mce_output_error(struct mem_ctl_info *mci,
1527 struct mce *m)
1528{
b4e8f0b6 1529 struct i7core_pvt *pvt = mci->pvt_info;
a639539f 1530 char *type, *optype, *err, *msg;
8a2f118e 1531 unsigned long error = m->status & 0x1ff0000l;
a639539f 1532 u32 optypenum = (m->status >> 4) & 0x07;
8a2f118e
MCC
1533 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1534 u32 dimm = (m->misc >> 16) & 0x3;
1535 u32 channel = (m->misc >> 18) & 0x3;
1536 u32 syndrome = m->misc >> 32;
1537 u32 errnum = find_first_bit(&error, 32);
b4e8f0b6 1538 int csrow;
8a2f118e 1539
c5d34528
MCC
1540 if (m->mcgstatus & 1)
1541 type = "FATAL";
1542 else
1543 type = "NON_FATAL";
1544
a639539f 1545 switch (optypenum) {
b990538a
MCC
1546 case 0:
1547 optype = "generic undef request";
1548 break;
1549 case 1:
1550 optype = "read error";
1551 break;
1552 case 2:
1553 optype = "write error";
1554 break;
1555 case 3:
1556 optype = "addr/cmd error";
1557 break;
1558 case 4:
1559 optype = "scrubbing error";
1560 break;
1561 default:
1562 optype = "reserved";
1563 break;
a639539f
MCC
1564 }
1565
8a2f118e
MCC
1566 switch (errnum) {
1567 case 16:
1568 err = "read ECC error";
1569 break;
1570 case 17:
1571 err = "RAS ECC error";
1572 break;
1573 case 18:
1574 err = "write parity error";
1575 break;
1576 case 19:
1577 err = "redundacy loss";
1578 break;
1579 case 20:
1580 err = "reserved";
1581 break;
1582 case 21:
1583 err = "memory range error";
1584 break;
1585 case 22:
1586 err = "RTID out of range";
1587 break;
1588 case 23:
1589 err = "address parity error";
1590 break;
1591 case 24:
1592 err = "byte enable parity error";
1593 break;
1594 default:
1595 err = "unknown";
d5381642 1596 }
d5381642 1597
f237fcf2 1598 /* FIXME: should convert addr into bank and rank information */
8a2f118e 1599 msg = kasprintf(GFP_ATOMIC,
f4742949 1600 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
a639539f 1601 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
f4742949 1602 type, (long long) m->addr, m->cpu, dimm, channel,
a639539f
MCC
1603 syndrome, core_err_cnt, (long long)m->status,
1604 (long long)m->misc, optype, err);
8a2f118e
MCC
1605
1606 debugf0("%s", msg);
d5381642 1607
f4742949 1608 csrow = pvt->csrow_map[channel][dimm];
b4e8f0b6 1609
d5381642 1610 /* Call the helper to output message */
b4e8f0b6
MCC
1611 if (m->mcgstatus & 1)
1612 edac_mc_handle_fbd_ue(mci, csrow, 0,
1613 0 /* FIXME: should be channel here */, msg);
f4742949 1614 else if (!pvt->is_registered)
b4e8f0b6
MCC
1615 edac_mc_handle_fbd_ce(mci, csrow,
1616 0 /* FIXME: should be channel here */, msg);
8a2f118e
MCC
1617
1618 kfree(msg);
d5381642
MCC
1619}
1620
87d1d272
MCC
1621/*
1622 * i7core_check_error Retrieve and process errors reported by the
1623 * hardware. Called by the Core module.
1624 */
1625static void i7core_check_error(struct mem_ctl_info *mci)
1626{
d5381642
MCC
1627 struct i7core_pvt *pvt = mci->pvt_info;
1628 int i;
1629 unsigned count = 0;
1630 struct mce *m = NULL;
1631 unsigned long flags;
1632
d5381642
MCC
1633 /* Copy all mce errors into a temporary buffer */
1634 spin_lock_irqsave(&pvt->mce_lock, flags);
1635 if (pvt->mce_count) {
1636 m = kmalloc(sizeof(*m) * pvt->mce_count, GFP_ATOMIC);
f4742949 1637
d5381642
MCC
1638 if (m) {
1639 count = pvt->mce_count;
1640 memcpy(m, &pvt->mce_entry, sizeof(*m) * count);
1641 }
1642 pvt->mce_count = 0;
1643 }
f4742949 1644
d5381642
MCC
1645 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1646
1647 /* proccess mcelog errors */
1648 for (i = 0; i < count; i++)
1649 i7core_mce_output_error(mci, &m[i]);
1650
1651 kfree(m);
1652
1653 /* check memory count errors */
f4742949
MCC
1654 if (!pvt->is_registered)
1655 i7core_udimm_check_mc_ecc_err(mci);
1656 else
1657 i7core_rdimm_check_mc_ecc_err(mci);
87d1d272
MCC
1658}
1659
d5381642
MCC
1660/*
1661 * i7core_mce_check_error Replicates mcelog routine to get errors
1662 * This routine simply queues mcelog errors, and
1663 * return. The error itself should be handled later
1664 * by i7core_check_error.
1665 */
1666static int i7core_mce_check_error(void *priv, struct mce *mce)
1667{
c5d34528
MCC
1668 struct mem_ctl_info *mci = priv;
1669 struct i7core_pvt *pvt = mci->pvt_info;
d5381642
MCC
1670 unsigned long flags;
1671
8a2f118e
MCC
1672 /*
1673 * Just let mcelog handle it if the error is
1674 * outside the memory controller
1675 */
1676 if (((mce->status & 0xffff) >> 7) != 1)
1677 return 0;
1678
f237fcf2
MCC
1679 /* Bank 8 registers are the only ones that we know how to handle */
1680 if (mce->bank != 8)
1681 return 0;
1682
f4742949 1683 /* Only handle if it is the right mc controller */
6c6aa3af
MCC
1684 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) {
1685 debugf0("mc%d: ignoring mce log for socket %d. "
1686 "Another mc should get it.\n",
1687 pvt->i7core_dev->socket,
1688 cpu_data(mce->cpu).phys_proc_id);
f4742949 1689 return 0;
6c6aa3af 1690 }
f4742949 1691
d5381642
MCC
1692 spin_lock_irqsave(&pvt->mce_lock, flags);
1693 if (pvt->mce_count < MCE_LOG_LEN) {
1694 memcpy(&pvt->mce_entry[pvt->mce_count], mce, sizeof(*mce));
1695 pvt->mce_count++;
1696 }
1697 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1698
c5d34528
MCC
1699 /* Handle fatal errors immediately */
1700 if (mce->mcgstatus & 1)
1701 i7core_check_error(mci);
1702
d5381642 1703 /* Advice mcelog that the error were handled */
8a2f118e 1704 return 1;
d5381642
MCC
1705}
1706
f4742949
MCC
1707static int i7core_register_mci(struct i7core_dev *i7core_dev,
1708 int num_channels, int num_csrows)
a0c36a1f
MCC
1709{
1710 struct mem_ctl_info *mci;
1711 struct i7core_pvt *pvt;
ba6c5c62 1712 int csrow = 0;
f4742949 1713 int rc;
a0c36a1f 1714
a0c36a1f
MCC
1715 /* allocate a new MC control structure */
1716 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
f4742949
MCC
1717 if (unlikely(!mci))
1718 return -ENOMEM;
a0c36a1f
MCC
1719
1720 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1721
f4742949
MCC
1722 /* record ptr to the generic device */
1723 mci->dev = &i7core_dev->pdev[0]->dev;
1724
a0c36a1f 1725 pvt = mci->pvt_info;
ef708b53 1726 memset(pvt, 0, sizeof(*pvt));
a0c36a1f 1727 mci->mc_idx = 0;
67166af4 1728
41fcb7fe
MCC
1729 /*
1730 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1731 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1732 * memory channels
1733 */
1734 mci->mtype_cap = MEM_FLAG_DDR3;
a0c36a1f
MCC
1735 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1736 mci->edac_cap = EDAC_FLAG_NONE;
1737 mci->mod_name = "i7core_edac.c";
1738 mci->mod_ver = I7CORE_REVISION;
f4742949
MCC
1739 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1740 i7core_dev->socket);
1741 mci->dev_name = pci_name(i7core_dev->pdev[0]);
a0c36a1f 1742 mci->ctl_page_to_phys = NULL;
194a40fe 1743 mci->mc_driver_sysfs_attributes = i7core_inj_attrs;
87d1d272
MCC
1744 /* Set the function pointer to an actual operation function */
1745 mci->edac_check = i7core_check_error;
8f331907 1746
ef708b53 1747 /* Store pci devices at mci for faster access */
f4742949 1748 rc = mci_bind_devs(mci, i7core_dev);
41fcb7fe 1749 if (unlikely(rc < 0))
f4742949 1750 goto fail;
ef708b53
MCC
1751
1752 /* Get dimm basic config */
f4742949 1753 get_dimm_config(mci, &csrow);
ef708b53 1754
a0c36a1f 1755 /* add this new MC control structure to EDAC's list of MCs */
b7c76151 1756 if (unlikely(edac_mc_add_mc(mci))) {
a0c36a1f
MCC
1757 debugf0("MC: " __FILE__
1758 ": %s(): failed edac_mc_add_mc()\n", __func__);
1759 /* FIXME: perhaps some code should go here that disables error
1760 * reporting if we just enabled it
1761 */
b7c76151
MCC
1762
1763 rc = -EINVAL;
f4742949 1764 goto fail;
a0c36a1f
MCC
1765 }
1766
1767 /* allocating generic PCI control info */
f4742949
MCC
1768 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1769 EDAC_MOD_STR);
41fcb7fe 1770 if (unlikely(!i7core_pci)) {
a0c36a1f
MCC
1771 printk(KERN_WARNING
1772 "%s(): Unable to create PCI control\n",
1773 __func__);
1774 printk(KERN_WARNING
1775 "%s(): PCI error report via EDAC not setup\n",
1776 __func__);
1777 }
1778
194a40fe 1779 /* Default error mask is any memory */
ef708b53 1780 pvt->inject.channel = 0;
194a40fe
MCC
1781 pvt->inject.dimm = -1;
1782 pvt->inject.rank = -1;
1783 pvt->inject.bank = -1;
1784 pvt->inject.page = -1;
1785 pvt->inject.col = -1;
1786
d5381642 1787 /* Registers on edac_mce in order to receive memory errors */
c5d34528 1788 pvt->edac_mce.priv = mci;
d5381642
MCC
1789 pvt->edac_mce.check_error = i7core_mce_check_error;
1790 spin_lock_init(&pvt->mce_lock);
1791
1792 rc = edac_mce_register(&pvt->edac_mce);
b990538a 1793 if (unlikely(rc < 0)) {
d5381642
MCC
1794 debugf0("MC: " __FILE__
1795 ": %s(): failed edac_mce_register()\n", __func__);
f4742949
MCC
1796 }
1797
1798fail:
1799 edac_mc_free(mci);
1800 return rc;
1801}
1802
1803/*
1804 * i7core_probe Probe for ONE instance of device to see if it is
1805 * present.
1806 * return:
1807 * 0 for FOUND a device
1808 * < 0 for error code
1809 */
1810static int __devinit i7core_probe(struct pci_dev *pdev,
1811 const struct pci_device_id *id)
1812{
1813 int dev_idx = id->driver_data;
1814 int rc;
1815 struct i7core_dev *i7core_dev;
1816
1817 /*
1818 * FIXME: All memory controllers are allocated at the first pass.
1819 */
1820 if (unlikely(dev_idx >= 1))
1821 return -EINVAL;
1822
1823 /* get the pci devices we want to reserve for our use */
1824 mutex_lock(&i7core_edac_lock);
1825 rc = i7core_get_devices();
1826 if (unlikely(rc < 0))
1827 goto fail0;
1828
1829 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1830 int channels;
1831 int csrows;
1832
1833 /* Check the number of active and not disabled channels */
1834 rc = i7core_get_active_channels(i7core_dev->socket,
1835 &channels, &csrows);
1836 if (unlikely(rc < 0))
1837 goto fail1;
1838
1839 i7core_register_mci(i7core_dev, channels, csrows);
d5381642
MCC
1840 }
1841
ef708b53 1842 i7core_printk(KERN_INFO, "Driver loaded.\n");
8f331907 1843
66607706 1844 mutex_unlock(&i7core_edac_lock);
a0c36a1f
MCC
1845 return 0;
1846
66607706 1847fail1:
b7c76151 1848 i7core_put_devices();
66607706
MCC
1849fail0:
1850 mutex_unlock(&i7core_edac_lock);
b7c76151 1851 return rc;
a0c36a1f
MCC
1852}
1853
1854/*
1855 * i7core_remove destructor for one instance of device
1856 *
1857 */
1858static void __devexit i7core_remove(struct pci_dev *pdev)
1859{
1860 struct mem_ctl_info *mci;
d5381642 1861 struct i7core_pvt *pvt;
a0c36a1f
MCC
1862
1863 debugf0(__FILE__ ": %s()\n", __func__);
1864
1865 if (i7core_pci)
1866 edac_pci_release_generic_ctl(i7core_pci);
1867
87d1d272 1868
d5381642 1869 mci = edac_mc_del_mc(&pdev->dev);
a0c36a1f
MCC
1870 if (!mci)
1871 return;
1872
d5381642
MCC
1873 /* Unregisters on edac_mce in order to receive memory errors */
1874 pvt = mci->pvt_info;
1875 edac_mce_unregister(&pvt->edac_mce);
1876
a0c36a1f 1877 /* retrieve references to resources, and free those resources */
66607706 1878 mutex_lock(&i7core_edac_lock);
8f331907 1879 i7core_put_devices();
66607706 1880 mutex_unlock(&i7core_edac_lock);
a0c36a1f 1881
f4742949 1882 kfree(mci->ctl_name);
a0c36a1f
MCC
1883 edac_mc_free(mci);
1884}
1885
a0c36a1f
MCC
1886MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1887
1888/*
1889 * i7core_driver pci_driver structure for this module
1890 *
1891 */
1892static struct pci_driver i7core_driver = {
1893 .name = "i7core_edac",
1894 .probe = i7core_probe,
1895 .remove = __devexit_p(i7core_remove),
1896 .id_table = i7core_pci_tbl,
1897};
1898
1899/*
1900 * i7core_init Module entry function
1901 * Try to initialize this module for its devices
1902 */
1903static int __init i7core_init(void)
1904{
1905 int pci_rc;
1906
1907 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1908
1909 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1910 opstate_init();
1911
bc2d7245
KM
1912 i7core_xeon_pci_fixup();
1913
a0c36a1f
MCC
1914 pci_rc = pci_register_driver(&i7core_driver);
1915
3ef288a9
MCC
1916 if (pci_rc >= 0)
1917 return 0;
1918
1919 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
1920 pci_rc);
1921
1922 return pci_rc;
a0c36a1f
MCC
1923}
1924
1925/*
1926 * i7core_exit() Module exit function
1927 * Unregister the driver
1928 */
1929static void __exit i7core_exit(void)
1930{
1931 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1932 pci_unregister_driver(&i7core_driver);
1933}
1934
1935module_init(i7core_init);
1936module_exit(i7core_exit);
1937
1938MODULE_LICENSE("GPL");
1939MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1940MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1941MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1942 I7CORE_REVISION);
1943
1944module_param(edac_op_state, int, 0444);
1945MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
This page took 0.141661 seconds and 5 git commands to generate.