1  
2  /*
3   * edac_device.c
4   * (C) 2007 www.douglaskthompson.com
5   *
6   * This file may be distributed under the terms of the
7   * GNU General Public License.
8   *
9   * Written by Doug Thompson <norsk5@xmission.com>
10   *
11   * edac_device API implementation
12   * 19 Jan 2007
13   */
14  
15  #include <asm/page.h>
16  #include <linux/uaccess.h>
17  #include <linux/ctype.h>
18  #include <linux/highmem.h>
19  #include <linux/init.h>
20  #include <linux/jiffies.h>
21  #include <linux/module.h>
22  #include <linux/slab.h>
23  #include <linux/smp.h>
24  #include <linux/spinlock.h>
25  #include <linux/sysctl.h>
26  #include <linux/timer.h>
27  
28  #include "edac_device.h"
29  #include "edac_module.h"
30  
31  /* lock for the list: 'edac_device_list', manipulation of this list
32   * is protected by the 'device_ctls_mutex' lock
33   */
34  static DEFINE_MUTEX(device_ctls_mutex);
35  static LIST_HEAD(edac_device_list);
36  
37  /* Default workqueue processing interval on this instance, in msecs */
38  #define DEFAULT_POLL_INTERVAL 1000
39  
40  #ifdef CONFIG_EDAC_DEBUG
edac_device_dump_device(struct edac_device_ctl_info * edac_dev)41  static void edac_device_dump_device(struct edac_device_ctl_info *edac_dev)
42  {
43  	edac_dbg(3, "\tedac_dev = %p dev_idx=%d\n",
44  		 edac_dev, edac_dev->dev_idx);
45  	edac_dbg(4, "\tedac_dev->edac_check = %p\n", edac_dev->edac_check);
46  	edac_dbg(3, "\tdev = %p\n", edac_dev->dev);
47  	edac_dbg(3, "\tmod_name:ctl_name = %s:%s\n",
48  		 edac_dev->mod_name, edac_dev->ctl_name);
49  	edac_dbg(3, "\tpvt_info = %p\n\n", edac_dev->pvt_info);
50  }
51  #endif				/* CONFIG_EDAC_DEBUG */
52  
53  /*
54   * @off_val: zero, 1, or other based offset
55   */
56  struct edac_device_ctl_info *
edac_device_alloc_ctl_info(unsigned pvt_sz,char * dev_name,unsigned nr_instances,char * blk_name,unsigned nr_blocks,unsigned off_val,int device_index)57  edac_device_alloc_ctl_info(unsigned pvt_sz, char *dev_name, unsigned nr_instances,
58  			   char *blk_name, unsigned nr_blocks, unsigned off_val,
59  			   int device_index)
60  {
61  	struct edac_device_block *dev_blk, *blk_p, *blk;
62  	struct edac_device_instance *dev_inst, *inst;
63  	struct edac_device_ctl_info *dev_ctl;
64  	unsigned instance, block;
65  	void *pvt;
66  	int err;
67  
68  	edac_dbg(4, "instances=%d blocks=%d\n", nr_instances, nr_blocks);
69  
70  	dev_ctl = kzalloc(sizeof(struct edac_device_ctl_info), GFP_KERNEL);
71  	if (!dev_ctl)
72  		return NULL;
73  
74  	dev_inst = kcalloc(nr_instances, sizeof(struct edac_device_instance), GFP_KERNEL);
75  	if (!dev_inst)
76  		goto free;
77  
78  	dev_ctl->instances = dev_inst;
79  
80  	dev_blk = kcalloc(nr_instances * nr_blocks, sizeof(struct edac_device_block), GFP_KERNEL);
81  	if (!dev_blk)
82  		goto free;
83  
84  	dev_ctl->blocks = dev_blk;
85  
86  	if (pvt_sz) {
87  		pvt = kzalloc(pvt_sz, GFP_KERNEL);
88  		if (!pvt)
89  			goto free;
90  
91  		dev_ctl->pvt_info = pvt;
92  	}
93  
94  	dev_ctl->dev_idx	= device_index;
95  	dev_ctl->nr_instances	= nr_instances;
96  
97  	/* Default logging of CEs and UEs */
98  	dev_ctl->log_ce = 1;
99  	dev_ctl->log_ue = 1;
100  
101  	/* Name of this edac device */
102  	snprintf(dev_ctl->name, sizeof(dev_ctl->name),"%s", dev_name);
103  
104  	/* Initialize every Instance */
105  	for (instance = 0; instance < nr_instances; instance++) {
106  		inst = &dev_inst[instance];
107  		inst->ctl = dev_ctl;
108  		inst->nr_blocks = nr_blocks;
109  		blk_p = &dev_blk[instance * nr_blocks];
110  		inst->blocks = blk_p;
111  
112  		/* name of this instance */
113  		snprintf(inst->name, sizeof(inst->name), "%s%u", dev_name, instance);
114  
115  		/* Initialize every block in each instance */
116  		for (block = 0; block < nr_blocks; block++) {
117  			blk = &blk_p[block];
118  			blk->instance = inst;
119  			snprintf(blk->name, sizeof(blk->name),
120  				 "%s%d", blk_name, block + off_val);
121  
122  			edac_dbg(4, "instance=%d inst_p=%p block=#%d block_p=%p name='%s'\n",
123  				 instance, inst, block, blk, blk->name);
124  		}
125  	}
126  
127  	/* Mark this instance as merely ALLOCATED */
128  	dev_ctl->op_state = OP_ALLOC;
129  
130  	/*
131  	 * Initialize the 'root' kobj for the edac_device controller
132  	 */
133  	err = edac_device_register_sysfs_main_kobj(dev_ctl);
134  	if (err)
135  		goto free;
136  
137  	/* at this point, the root kobj is valid, and in order to
138  	 * 'free' the object, then the function:
139  	 *	edac_device_unregister_sysfs_main_kobj() must be called
140  	 * which will perform kobj unregistration and the actual free
141  	 * will occur during the kobject callback operation
142  	 */
143  
144  	return dev_ctl;
145  
146  free:
147  	__edac_device_free_ctl_info(dev_ctl);
148  
149  	return NULL;
150  }
151  EXPORT_SYMBOL_GPL(edac_device_alloc_ctl_info);
152  
edac_device_free_ctl_info(struct edac_device_ctl_info * ctl_info)153  void edac_device_free_ctl_info(struct edac_device_ctl_info *ctl_info)
154  {
155  	edac_device_unregister_sysfs_main_kobj(ctl_info);
156  }
157  EXPORT_SYMBOL_GPL(edac_device_free_ctl_info);
158  
159  /*
160   * find_edac_device_by_dev
161   *	scans the edac_device list for a specific 'struct device *'
162   *
163   *	lock to be held prior to call:	device_ctls_mutex
164   *
165   *	Return:
166   *		pointer to control structure managing 'dev'
167   *		NULL if not found on list
168   */
find_edac_device_by_dev(struct device * dev)169  static struct edac_device_ctl_info *find_edac_device_by_dev(struct device *dev)
170  {
171  	struct edac_device_ctl_info *edac_dev;
172  	struct list_head *item;
173  
174  	edac_dbg(0, "\n");
175  
176  	list_for_each(item, &edac_device_list) {
177  		edac_dev = list_entry(item, struct edac_device_ctl_info, link);
178  
179  		if (edac_dev->dev == dev)
180  			return edac_dev;
181  	}
182  
183  	return NULL;
184  }
185  
186  /*
187   * add_edac_dev_to_global_list
188   *	Before calling this function, caller must
189   *	assign a unique value to edac_dev->dev_idx.
190   *
191   *	lock to be held prior to call:	device_ctls_mutex
192   *
193   *	Return:
194   *		0 on success
195   *		1 on failure.
196   */
add_edac_dev_to_global_list(struct edac_device_ctl_info * edac_dev)197  static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev)
198  {
199  	struct list_head *item, *insert_before;
200  	struct edac_device_ctl_info *rover;
201  
202  	insert_before = &edac_device_list;
203  
204  	/* Determine if already on the list */
205  	rover = find_edac_device_by_dev(edac_dev->dev);
206  	if (unlikely(rover != NULL))
207  		goto fail0;
208  
209  	/* Insert in ascending order by 'dev_idx', so find position */
210  	list_for_each(item, &edac_device_list) {
211  		rover = list_entry(item, struct edac_device_ctl_info, link);
212  
213  		if (rover->dev_idx >= edac_dev->dev_idx) {
214  			if (unlikely(rover->dev_idx == edac_dev->dev_idx))
215  				goto fail1;
216  
217  			insert_before = item;
218  			break;
219  		}
220  	}
221  
222  	list_add_tail_rcu(&edac_dev->link, insert_before);
223  	return 0;
224  
225  fail0:
226  	edac_printk(KERN_WARNING, EDAC_MC,
227  			"%s (%s) %s %s already assigned %d\n",
228  			dev_name(rover->dev), edac_dev_name(rover),
229  			rover->mod_name, rover->ctl_name, rover->dev_idx);
230  	return 1;
231  
232  fail1:
233  	edac_printk(KERN_WARNING, EDAC_MC,
234  			"bug in low-level driver: attempt to assign\n"
235  			"    duplicate dev_idx %d in %s()\n", rover->dev_idx,
236  			__func__);
237  	return 1;
238  }
239  
240  /*
241   * del_edac_device_from_global_list
242   */
del_edac_device_from_global_list(struct edac_device_ctl_info * edac_device)243  static void del_edac_device_from_global_list(struct edac_device_ctl_info
244  						*edac_device)
245  {
246  	list_del_rcu(&edac_device->link);
247  
248  	/* these are for safe removal of devices from global list while
249  	 * NMI handlers may be traversing list
250  	 */
251  	synchronize_rcu();
252  	INIT_LIST_HEAD(&edac_device->link);
253  }
254  
255  /*
256   * edac_device_workq_function
257   *	performs the operation scheduled by a workq request
258   *
259   *	this workq is embedded within an edac_device_ctl_info
260   *	structure, that needs to be polled for possible error events.
261   *
262   *	This operation is to acquire the list mutex lock
263   *	(thus preventing insertation or deletion)
264   *	and then call the device's poll function IFF this device is
265   *	running polled and there is a poll function defined.
266   */
edac_device_workq_function(struct work_struct * work_req)267  static void edac_device_workq_function(struct work_struct *work_req)
268  {
269  	struct delayed_work *d_work = to_delayed_work(work_req);
270  	struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work);
271  
272  	mutex_lock(&device_ctls_mutex);
273  
274  	/* If we are being removed, bail out immediately */
275  	if (edac_dev->op_state == OP_OFFLINE) {
276  		mutex_unlock(&device_ctls_mutex);
277  		return;
278  	}
279  
280  	/* Only poll controllers that are running polled and have a check */
281  	if ((edac_dev->op_state == OP_RUNNING_POLL) &&
282  		(edac_dev->edac_check != NULL)) {
283  			edac_dev->edac_check(edac_dev);
284  	}
285  
286  	mutex_unlock(&device_ctls_mutex);
287  
288  	/* Reschedule the workq for the next time period to start again
289  	 * if the number of msec is for 1 sec, then adjust to the next
290  	 * whole one second to save timers firing all over the period
291  	 * between integral seconds
292  	 */
293  	if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL)
294  		edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
295  	else
296  		edac_queue_work(&edac_dev->work, edac_dev->delay);
297  }
298  
299  /*
300   * edac_device_workq_setup
301   *	initialize a workq item for this edac_device instance
302   *	passing in the new delay period in msec
303   */
edac_device_workq_setup(struct edac_device_ctl_info * edac_dev,unsigned msec)304  static void edac_device_workq_setup(struct edac_device_ctl_info *edac_dev,
305  				    unsigned msec)
306  {
307  	edac_dbg(0, "\n");
308  
309  	/* take the arg 'msec' and set it into the control structure
310  	 * to used in the time period calculation
311  	 * then calc the number of jiffies that represents
312  	 */
313  	edac_dev->poll_msec = msec;
314  	edac_dev->delay = msecs_to_jiffies(msec);
315  
316  	INIT_DELAYED_WORK(&edac_dev->work, edac_device_workq_function);
317  
318  	/* optimize here for the 1 second case, which will be normal value, to
319  	 * fire ON the 1 second time event. This helps reduce all sorts of
320  	 * timers firing on sub-second basis, while they are happy
321  	 * to fire together on the 1 second exactly
322  	 */
323  	if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL)
324  		edac_queue_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
325  	else
326  		edac_queue_work(&edac_dev->work, edac_dev->delay);
327  }
328  
329  /*
330   * edac_device_workq_teardown
331   *	stop the workq processing on this edac_dev
332   */
edac_device_workq_teardown(struct edac_device_ctl_info * edac_dev)333  static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev)
334  {
335  	if (!edac_dev->edac_check)
336  		return;
337  
338  	edac_dev->op_state = OP_OFFLINE;
339  
340  	edac_stop_work(&edac_dev->work);
341  }
342  
343  /*
344   * edac_device_reset_delay_period
345   *
346   *	need to stop any outstanding workq queued up at this time
347   *	because we will be resetting the sleep time.
348   *	Then restart the workq on the new delay
349   */
edac_device_reset_delay_period(struct edac_device_ctl_info * edac_dev,unsigned long msec)350  void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev,
351  				    unsigned long msec)
352  {
353  	edac_dev->poll_msec = msec;
354  	edac_dev->delay	    = msecs_to_jiffies(msec);
355  
356  	/* See comment in edac_device_workq_setup() above */
357  	if (edac_dev->poll_msec == DEFAULT_POLL_INTERVAL)
358  		edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay));
359  	else
360  		edac_mod_work(&edac_dev->work, edac_dev->delay);
361  }
362  
edac_device_alloc_index(void)363  int edac_device_alloc_index(void)
364  {
365  	static atomic_t device_indexes = ATOMIC_INIT(0);
366  
367  	return atomic_inc_return(&device_indexes) - 1;
368  }
369  EXPORT_SYMBOL_GPL(edac_device_alloc_index);
370  
edac_device_add_device(struct edac_device_ctl_info * edac_dev)371  int edac_device_add_device(struct edac_device_ctl_info *edac_dev)
372  {
373  	edac_dbg(0, "\n");
374  
375  #ifdef CONFIG_EDAC_DEBUG
376  	if (edac_debug_level >= 3)
377  		edac_device_dump_device(edac_dev);
378  #endif
379  	mutex_lock(&device_ctls_mutex);
380  
381  	if (add_edac_dev_to_global_list(edac_dev))
382  		goto fail0;
383  
384  	/* set load time so that error rate can be tracked */
385  	edac_dev->start_time = jiffies;
386  
387  	/* create this instance's sysfs entries */
388  	if (edac_device_create_sysfs(edac_dev)) {
389  		edac_device_printk(edac_dev, KERN_WARNING,
390  					"failed to create sysfs device\n");
391  		goto fail1;
392  	}
393  
394  	/* If there IS a check routine, then we are running POLLED */
395  	if (edac_dev->edac_check != NULL) {
396  		/* This instance is NOW RUNNING */
397  		edac_dev->op_state = OP_RUNNING_POLL;
398  
399  		edac_device_workq_setup(edac_dev, edac_dev->poll_msec ?: DEFAULT_POLL_INTERVAL);
400  	} else {
401  		edac_dev->op_state = OP_RUNNING_INTERRUPT;
402  	}
403  
404  	/* Report action taken */
405  	edac_device_printk(edac_dev, KERN_INFO,
406  		"Giving out device to module %s controller %s: DEV %s (%s)\n",
407  		edac_dev->mod_name, edac_dev->ctl_name, edac_dev->dev_name,
408  		edac_op_state_to_string(edac_dev->op_state));
409  
410  	mutex_unlock(&device_ctls_mutex);
411  	return 0;
412  
413  fail1:
414  	/* Some error, so remove the entry from the lsit */
415  	del_edac_device_from_global_list(edac_dev);
416  
417  fail0:
418  	mutex_unlock(&device_ctls_mutex);
419  	return 1;
420  }
421  EXPORT_SYMBOL_GPL(edac_device_add_device);
422  
edac_device_del_device(struct device * dev)423  struct edac_device_ctl_info *edac_device_del_device(struct device *dev)
424  {
425  	struct edac_device_ctl_info *edac_dev;
426  
427  	edac_dbg(0, "\n");
428  
429  	mutex_lock(&device_ctls_mutex);
430  
431  	/* Find the structure on the list, if not there, then leave */
432  	edac_dev = find_edac_device_by_dev(dev);
433  	if (edac_dev == NULL) {
434  		mutex_unlock(&device_ctls_mutex);
435  		return NULL;
436  	}
437  
438  	/* mark this instance as OFFLINE */
439  	edac_dev->op_state = OP_OFFLINE;
440  
441  	/* deregister from global list */
442  	del_edac_device_from_global_list(edac_dev);
443  
444  	mutex_unlock(&device_ctls_mutex);
445  
446  	/* clear workq processing on this instance */
447  	edac_device_workq_teardown(edac_dev);
448  
449  	/* Tear down the sysfs entries for this instance */
450  	edac_device_remove_sysfs(edac_dev);
451  
452  	edac_printk(KERN_INFO, EDAC_MC,
453  		"Removed device %d for %s %s: DEV %s\n",
454  		edac_dev->dev_idx,
455  		edac_dev->mod_name, edac_dev->ctl_name, edac_dev_name(edac_dev));
456  
457  	return edac_dev;
458  }
459  EXPORT_SYMBOL_GPL(edac_device_del_device);
460  
edac_device_get_log_ce(struct edac_device_ctl_info * edac_dev)461  static inline int edac_device_get_log_ce(struct edac_device_ctl_info *edac_dev)
462  {
463  	return edac_dev->log_ce;
464  }
465  
edac_device_get_log_ue(struct edac_device_ctl_info * edac_dev)466  static inline int edac_device_get_log_ue(struct edac_device_ctl_info *edac_dev)
467  {
468  	return edac_dev->log_ue;
469  }
470  
edac_device_get_panic_on_ue(struct edac_device_ctl_info * edac_dev)471  static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
472  					*edac_dev)
473  {
474  	return edac_dev->panic_on_ue;
475  }
476  
edac_device_handle_ce_count(struct edac_device_ctl_info * edac_dev,unsigned int count,int inst_nr,int block_nr,const char * msg)477  void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
478  				 unsigned int count, int inst_nr, int block_nr,
479  				 const char *msg)
480  {
481  	struct edac_device_instance *instance;
482  	struct edac_device_block *block = NULL;
483  
484  	if (!count)
485  		return;
486  
487  	if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
488  		edac_device_printk(edac_dev, KERN_ERR,
489  				"INTERNAL ERROR: 'instance' out of range "
490  				"(%d >= %d)\n", inst_nr,
491  				edac_dev->nr_instances);
492  		return;
493  	}
494  
495  	instance = edac_dev->instances + inst_nr;
496  
497  	if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) {
498  		edac_device_printk(edac_dev, KERN_ERR,
499  				"INTERNAL ERROR: instance %d 'block' "
500  				"out of range (%d >= %d)\n",
501  				inst_nr, block_nr,
502  				instance->nr_blocks);
503  		return;
504  	}
505  
506  	if (instance->nr_blocks > 0) {
507  		block = instance->blocks + block_nr;
508  		block->counters.ce_count += count;
509  	}
510  
511  	/* Propagate the count up the 'totals' tree */
512  	instance->counters.ce_count += count;
513  	edac_dev->counters.ce_count += count;
514  
515  	if (edac_device_get_log_ce(edac_dev))
516  		edac_device_printk(edac_dev, KERN_WARNING,
517  				   "CE: %s instance: %s block: %s count: %d '%s'\n",
518  				   edac_dev->ctl_name, instance->name,
519  				   block ? block->name : "N/A", count, msg);
520  }
521  EXPORT_SYMBOL_GPL(edac_device_handle_ce_count);
522  
edac_device_handle_ue_count(struct edac_device_ctl_info * edac_dev,unsigned int count,int inst_nr,int block_nr,const char * msg)523  void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
524  				 unsigned int count, int inst_nr, int block_nr,
525  				 const char *msg)
526  {
527  	struct edac_device_instance *instance;
528  	struct edac_device_block *block = NULL;
529  
530  	if (!count)
531  		return;
532  
533  	if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
534  		edac_device_printk(edac_dev, KERN_ERR,
535  				"INTERNAL ERROR: 'instance' out of range "
536  				"(%d >= %d)\n", inst_nr,
537  				edac_dev->nr_instances);
538  		return;
539  	}
540  
541  	instance = edac_dev->instances + inst_nr;
542  
543  	if ((block_nr >= instance->nr_blocks) || (block_nr < 0)) {
544  		edac_device_printk(edac_dev, KERN_ERR,
545  				"INTERNAL ERROR: instance %d 'block' "
546  				"out of range (%d >= %d)\n",
547  				inst_nr, block_nr,
548  				instance->nr_blocks);
549  		return;
550  	}
551  
552  	if (instance->nr_blocks > 0) {
553  		block = instance->blocks + block_nr;
554  		block->counters.ue_count += count;
555  	}
556  
557  	/* Propagate the count up the 'totals' tree */
558  	instance->counters.ue_count += count;
559  	edac_dev->counters.ue_count += count;
560  
561  	if (edac_device_get_log_ue(edac_dev))
562  		edac_device_printk(edac_dev, KERN_EMERG,
563  				   "UE: %s instance: %s block: %s count: %d '%s'\n",
564  				   edac_dev->ctl_name, instance->name,
565  				   block ? block->name : "N/A", count, msg);
566  
567  	if (edac_device_get_panic_on_ue(edac_dev))
568  		panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n",
569  		      edac_dev->ctl_name, instance->name,
570  		      block ? block->name : "N/A", count, msg);
571  }
572  EXPORT_SYMBOL_GPL(edac_device_handle_ue_count);
573