1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * tracing_map - lock-free map for tracing
4   *
5   * Copyright (C) 2015 Tom Zanussi <tom.zanussi@linux.intel.com>
6   *
7   * tracing_map implementation inspired by lock-free map algorithms
8   * originated by Dr. Cliff Click:
9   *
10   * http://www.azulsystems.com/blog/cliff/2007-03-26-non-blocking-hashtable
11   * http://www.azulsystems.com/events/javaone_2007/2007_LockFreeHash.pdf
12   */
13  
14  #include <linux/vmalloc.h>
15  #include <linux/jhash.h>
16  #include <linux/slab.h>
17  #include <linux/sort.h>
18  #include <linux/kmemleak.h>
19  
20  #include "tracing_map.h"
21  #include "trace.h"
22  
23  /*
24   * NOTE: For a detailed description of the data structures used by
25   * these functions (such as tracing_map_elt) please see the overview
26   * of tracing_map data structures at the beginning of tracing_map.h.
27   */
28  
29  /**
30   * tracing_map_update_sum - Add a value to a tracing_map_elt's sum field
31   * @elt: The tracing_map_elt
32   * @i: The index of the given sum associated with the tracing_map_elt
33   * @n: The value to add to the sum
34   *
35   * Add n to sum i associated with the specified tracing_map_elt
36   * instance.  The index i is the index returned by the call to
37   * tracing_map_add_sum_field() when the tracing map was set up.
38   */
tracing_map_update_sum(struct tracing_map_elt * elt,unsigned int i,u64 n)39  void tracing_map_update_sum(struct tracing_map_elt *elt, unsigned int i, u64 n)
40  {
41  	atomic64_add(n, &elt->fields[i].sum);
42  }
43  
44  /**
45   * tracing_map_read_sum - Return the value of a tracing_map_elt's sum field
46   * @elt: The tracing_map_elt
47   * @i: The index of the given sum associated with the tracing_map_elt
48   *
49   * Retrieve the value of the sum i associated with the specified
50   * tracing_map_elt instance.  The index i is the index returned by the
51   * call to tracing_map_add_sum_field() when the tracing map was set
52   * up.
53   *
54   * Return: The sum associated with field i for elt.
55   */
tracing_map_read_sum(struct tracing_map_elt * elt,unsigned int i)56  u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i)
57  {
58  	return (u64)atomic64_read(&elt->fields[i].sum);
59  }
60  
61  /**
62   * tracing_map_set_var - Assign a tracing_map_elt's variable field
63   * @elt: The tracing_map_elt
64   * @i: The index of the given variable associated with the tracing_map_elt
65   * @n: The value to assign
66   *
67   * Assign n to variable i associated with the specified tracing_map_elt
68   * instance.  The index i is the index returned by the call to
69   * tracing_map_add_var() when the tracing map was set up.
70   */
tracing_map_set_var(struct tracing_map_elt * elt,unsigned int i,u64 n)71  void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n)
72  {
73  	atomic64_set(&elt->vars[i], n);
74  	elt->var_set[i] = true;
75  }
76  
77  /**
78   * tracing_map_var_set - Return whether or not a variable has been set
79   * @elt: The tracing_map_elt
80   * @i: The index of the given variable associated with the tracing_map_elt
81   *
82   * Return true if the variable has been set, false otherwise.  The
83   * index i is the index returned by the call to tracing_map_add_var()
84   * when the tracing map was set up.
85   */
tracing_map_var_set(struct tracing_map_elt * elt,unsigned int i)86  bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i)
87  {
88  	return elt->var_set[i];
89  }
90  
91  /**
92   * tracing_map_read_var - Return the value of a tracing_map_elt's variable field
93   * @elt: The tracing_map_elt
94   * @i: The index of the given variable associated with the tracing_map_elt
95   *
96   * Retrieve the value of the variable i associated with the specified
97   * tracing_map_elt instance.  The index i is the index returned by the
98   * call to tracing_map_add_var() when the tracing map was set
99   * up.
100   *
101   * Return: The variable value associated with field i for elt.
102   */
tracing_map_read_var(struct tracing_map_elt * elt,unsigned int i)103  u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i)
104  {
105  	return (u64)atomic64_read(&elt->vars[i]);
106  }
107  
108  /**
109   * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field
110   * @elt: The tracing_map_elt
111   * @i: The index of the given variable associated with the tracing_map_elt
112   *
113   * Retrieve the value of the variable i associated with the specified
114   * tracing_map_elt instance, and reset the variable to the 'not set'
115   * state.  The index i is the index returned by the call to
116   * tracing_map_add_var() when the tracing map was set up.  The reset
117   * essentially makes the variable a read-once variable if it's only
118   * accessed using this function.
119   *
120   * Return: The variable value associated with field i for elt.
121   */
tracing_map_read_var_once(struct tracing_map_elt * elt,unsigned int i)122  u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i)
123  {
124  	elt->var_set[i] = false;
125  	return (u64)atomic64_read(&elt->vars[i]);
126  }
127  
tracing_map_cmp_string(void * val_a,void * val_b)128  int tracing_map_cmp_string(void *val_a, void *val_b)
129  {
130  	char *a = val_a;
131  	char *b = val_b;
132  
133  	return strcmp(a, b);
134  }
135  
tracing_map_cmp_none(void * val_a,void * val_b)136  int tracing_map_cmp_none(void *val_a, void *val_b)
137  {
138  	return 0;
139  }
140  
tracing_map_cmp_atomic64(void * val_a,void * val_b)141  static int tracing_map_cmp_atomic64(void *val_a, void *val_b)
142  {
143  	u64 a = atomic64_read((atomic64_t *)val_a);
144  	u64 b = atomic64_read((atomic64_t *)val_b);
145  
146  	return (a > b) ? 1 : ((a < b) ? -1 : 0);
147  }
148  
149  #define DEFINE_TRACING_MAP_CMP_FN(type)					\
150  static int tracing_map_cmp_##type(void *val_a, void *val_b)		\
151  {									\
152  	type a = (type)(*(u64 *)val_a);					\
153  	type b = (type)(*(u64 *)val_b);					\
154  									\
155  	return (a > b) ? 1 : ((a < b) ? -1 : 0);			\
156  }
157  
158  DEFINE_TRACING_MAP_CMP_FN(s64);
159  DEFINE_TRACING_MAP_CMP_FN(u64);
160  DEFINE_TRACING_MAP_CMP_FN(s32);
161  DEFINE_TRACING_MAP_CMP_FN(u32);
162  DEFINE_TRACING_MAP_CMP_FN(s16);
163  DEFINE_TRACING_MAP_CMP_FN(u16);
164  DEFINE_TRACING_MAP_CMP_FN(s8);
165  DEFINE_TRACING_MAP_CMP_FN(u8);
166  
tracing_map_cmp_num(int field_size,int field_is_signed)167  tracing_map_cmp_fn_t tracing_map_cmp_num(int field_size,
168  					 int field_is_signed)
169  {
170  	tracing_map_cmp_fn_t fn = tracing_map_cmp_none;
171  
172  	switch (field_size) {
173  	case 8:
174  		if (field_is_signed)
175  			fn = tracing_map_cmp_s64;
176  		else
177  			fn = tracing_map_cmp_u64;
178  		break;
179  	case 4:
180  		if (field_is_signed)
181  			fn = tracing_map_cmp_s32;
182  		else
183  			fn = tracing_map_cmp_u32;
184  		break;
185  	case 2:
186  		if (field_is_signed)
187  			fn = tracing_map_cmp_s16;
188  		else
189  			fn = tracing_map_cmp_u16;
190  		break;
191  	case 1:
192  		if (field_is_signed)
193  			fn = tracing_map_cmp_s8;
194  		else
195  			fn = tracing_map_cmp_u8;
196  		break;
197  	}
198  
199  	return fn;
200  }
201  
tracing_map_add_field(struct tracing_map * map,tracing_map_cmp_fn_t cmp_fn)202  static int tracing_map_add_field(struct tracing_map *map,
203  				 tracing_map_cmp_fn_t cmp_fn)
204  {
205  	int ret = -EINVAL;
206  
207  	if (map->n_fields < TRACING_MAP_FIELDS_MAX) {
208  		ret = map->n_fields;
209  		map->fields[map->n_fields++].cmp_fn = cmp_fn;
210  	}
211  
212  	return ret;
213  }
214  
215  /**
216   * tracing_map_add_sum_field - Add a field describing a tracing_map sum
217   * @map: The tracing_map
218   *
219   * Add a sum field to the key and return the index identifying it in
220   * the map and associated tracing_map_elts.  This is the index used
221   * for instance to update a sum for a particular tracing_map_elt using
222   * tracing_map_update_sum() or reading it via tracing_map_read_sum().
223   *
224   * Return: The index identifying the field in the map and associated
225   * tracing_map_elts, or -EINVAL on error.
226   */
tracing_map_add_sum_field(struct tracing_map * map)227  int tracing_map_add_sum_field(struct tracing_map *map)
228  {
229  	return tracing_map_add_field(map, tracing_map_cmp_atomic64);
230  }
231  
232  /**
233   * tracing_map_add_var - Add a field describing a tracing_map var
234   * @map: The tracing_map
235   *
236   * Add a var to the map and return the index identifying it in the map
237   * and associated tracing_map_elts.  This is the index used for
238   * instance to update a var for a particular tracing_map_elt using
239   * tracing_map_update_var() or reading it via tracing_map_read_var().
240   *
241   * Return: The index identifying the var in the map and associated
242   * tracing_map_elts, or -EINVAL on error.
243   */
tracing_map_add_var(struct tracing_map * map)244  int tracing_map_add_var(struct tracing_map *map)
245  {
246  	int ret = -EINVAL;
247  
248  	if (map->n_vars < TRACING_MAP_VARS_MAX)
249  		ret = map->n_vars++;
250  
251  	return ret;
252  }
253  
254  /**
255   * tracing_map_add_key_field - Add a field describing a tracing_map key
256   * @map: The tracing_map
257   * @offset: The offset within the key
258   * @cmp_fn: The comparison function that will be used to sort on the key
259   *
260   * Let the map know there is a key and that if it's used as a sort key
261   * to use cmp_fn.
262   *
263   * A key can be a subset of a compound key; for that purpose, the
264   * offset param is used to describe where within the compound key
265   * the key referenced by this key field resides.
266   *
267   * Return: The index identifying the field in the map and associated
268   * tracing_map_elts, or -EINVAL on error.
269   */
tracing_map_add_key_field(struct tracing_map * map,unsigned int offset,tracing_map_cmp_fn_t cmp_fn)270  int tracing_map_add_key_field(struct tracing_map *map,
271  			      unsigned int offset,
272  			      tracing_map_cmp_fn_t cmp_fn)
273  
274  {
275  	int idx = tracing_map_add_field(map, cmp_fn);
276  
277  	if (idx < 0)
278  		return idx;
279  
280  	map->fields[idx].offset = offset;
281  
282  	map->key_idx[map->n_keys++] = idx;
283  
284  	return idx;
285  }
286  
tracing_map_array_clear(struct tracing_map_array * a)287  static void tracing_map_array_clear(struct tracing_map_array *a)
288  {
289  	unsigned int i;
290  
291  	if (!a->pages)
292  		return;
293  
294  	for (i = 0; i < a->n_pages; i++)
295  		memset(a->pages[i], 0, PAGE_SIZE);
296  }
297  
tracing_map_array_free(struct tracing_map_array * a)298  static void tracing_map_array_free(struct tracing_map_array *a)
299  {
300  	unsigned int i;
301  
302  	if (!a)
303  		return;
304  
305  	if (!a->pages)
306  		goto free;
307  
308  	for (i = 0; i < a->n_pages; i++) {
309  		if (!a->pages[i])
310  			break;
311  		kmemleak_free(a->pages[i]);
312  		free_page((unsigned long)a->pages[i]);
313  	}
314  
315  	kfree(a->pages);
316  
317   free:
318  	kfree(a);
319  }
320  
tracing_map_array_alloc(unsigned int n_elts,unsigned int entry_size)321  static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts,
322  						  unsigned int entry_size)
323  {
324  	struct tracing_map_array *a;
325  	unsigned int i;
326  
327  	a = kzalloc(sizeof(*a), GFP_KERNEL);
328  	if (!a)
329  		return NULL;
330  
331  	a->entry_size_shift = fls(roundup_pow_of_two(entry_size) - 1);
332  	a->entries_per_page = PAGE_SIZE / (1 << a->entry_size_shift);
333  	a->n_pages = n_elts / a->entries_per_page;
334  	if (!a->n_pages)
335  		a->n_pages = 1;
336  	a->entry_shift = fls(a->entries_per_page) - 1;
337  	a->entry_mask = (1 << a->entry_shift) - 1;
338  
339  	a->pages = kcalloc(a->n_pages, sizeof(void *), GFP_KERNEL);
340  	if (!a->pages)
341  		goto free;
342  
343  	for (i = 0; i < a->n_pages; i++) {
344  		a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
345  		if (!a->pages[i])
346  			goto free;
347  		kmemleak_alloc(a->pages[i], PAGE_SIZE, 1, GFP_KERNEL);
348  	}
349   out:
350  	return a;
351   free:
352  	tracing_map_array_free(a);
353  	a = NULL;
354  
355  	goto out;
356  }
357  
tracing_map_elt_clear(struct tracing_map_elt * elt)358  static void tracing_map_elt_clear(struct tracing_map_elt *elt)
359  {
360  	unsigned i;
361  
362  	for (i = 0; i < elt->map->n_fields; i++)
363  		if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64)
364  			atomic64_set(&elt->fields[i].sum, 0);
365  
366  	for (i = 0; i < elt->map->n_vars; i++) {
367  		atomic64_set(&elt->vars[i], 0);
368  		elt->var_set[i] = false;
369  	}
370  
371  	if (elt->map->ops && elt->map->ops->elt_clear)
372  		elt->map->ops->elt_clear(elt);
373  }
374  
tracing_map_elt_init_fields(struct tracing_map_elt * elt)375  static void tracing_map_elt_init_fields(struct tracing_map_elt *elt)
376  {
377  	unsigned int i;
378  
379  	tracing_map_elt_clear(elt);
380  
381  	for (i = 0; i < elt->map->n_fields; i++) {
382  		elt->fields[i].cmp_fn = elt->map->fields[i].cmp_fn;
383  
384  		if (elt->fields[i].cmp_fn != tracing_map_cmp_atomic64)
385  			elt->fields[i].offset = elt->map->fields[i].offset;
386  	}
387  }
388  
tracing_map_elt_free(struct tracing_map_elt * elt)389  static void tracing_map_elt_free(struct tracing_map_elt *elt)
390  {
391  	if (!elt)
392  		return;
393  
394  	if (elt->map->ops && elt->map->ops->elt_free)
395  		elt->map->ops->elt_free(elt);
396  	kfree(elt->fields);
397  	kfree(elt->vars);
398  	kfree(elt->var_set);
399  	kfree(elt->key);
400  	kfree(elt);
401  }
402  
tracing_map_elt_alloc(struct tracing_map * map)403  static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map)
404  {
405  	struct tracing_map_elt *elt;
406  	int err = 0;
407  
408  	elt = kzalloc(sizeof(*elt), GFP_KERNEL);
409  	if (!elt)
410  		return ERR_PTR(-ENOMEM);
411  
412  	elt->map = map;
413  
414  	elt->key = kzalloc(map->key_size, GFP_KERNEL);
415  	if (!elt->key) {
416  		err = -ENOMEM;
417  		goto free;
418  	}
419  
420  	elt->fields = kcalloc(map->n_fields, sizeof(*elt->fields), GFP_KERNEL);
421  	if (!elt->fields) {
422  		err = -ENOMEM;
423  		goto free;
424  	}
425  
426  	elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL);
427  	if (!elt->vars) {
428  		err = -ENOMEM;
429  		goto free;
430  	}
431  
432  	elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL);
433  	if (!elt->var_set) {
434  		err = -ENOMEM;
435  		goto free;
436  	}
437  
438  	tracing_map_elt_init_fields(elt);
439  
440  	if (map->ops && map->ops->elt_alloc) {
441  		err = map->ops->elt_alloc(elt);
442  		if (err)
443  			goto free;
444  	}
445  	return elt;
446   free:
447  	tracing_map_elt_free(elt);
448  
449  	return ERR_PTR(err);
450  }
451  
get_free_elt(struct tracing_map * map)452  static struct tracing_map_elt *get_free_elt(struct tracing_map *map)
453  {
454  	struct tracing_map_elt *elt = NULL;
455  	int idx;
456  
457  	idx = atomic_fetch_add_unless(&map->next_elt, 1, map->max_elts);
458  	if (idx < map->max_elts) {
459  		elt = *(TRACING_MAP_ELT(map->elts, idx));
460  		if (map->ops && map->ops->elt_init)
461  			map->ops->elt_init(elt);
462  	}
463  
464  	return elt;
465  }
466  
tracing_map_free_elts(struct tracing_map * map)467  static void tracing_map_free_elts(struct tracing_map *map)
468  {
469  	unsigned int i;
470  
471  	if (!map->elts)
472  		return;
473  
474  	for (i = 0; i < map->max_elts; i++) {
475  		tracing_map_elt_free(*(TRACING_MAP_ELT(map->elts, i)));
476  		*(TRACING_MAP_ELT(map->elts, i)) = NULL;
477  	}
478  
479  	tracing_map_array_free(map->elts);
480  	map->elts = NULL;
481  }
482  
tracing_map_alloc_elts(struct tracing_map * map)483  static int tracing_map_alloc_elts(struct tracing_map *map)
484  {
485  	unsigned int i;
486  
487  	map->elts = tracing_map_array_alloc(map->max_elts,
488  					    sizeof(struct tracing_map_elt *));
489  	if (!map->elts)
490  		return -ENOMEM;
491  
492  	for (i = 0; i < map->max_elts; i++) {
493  		*(TRACING_MAP_ELT(map->elts, i)) = tracing_map_elt_alloc(map);
494  		if (IS_ERR(*(TRACING_MAP_ELT(map->elts, i)))) {
495  			*(TRACING_MAP_ELT(map->elts, i)) = NULL;
496  			tracing_map_free_elts(map);
497  
498  			return -ENOMEM;
499  		}
500  	}
501  
502  	return 0;
503  }
504  
keys_match(void * key,void * test_key,unsigned key_size)505  static inline bool keys_match(void *key, void *test_key, unsigned key_size)
506  {
507  	bool match = true;
508  
509  	if (memcmp(key, test_key, key_size))
510  		match = false;
511  
512  	return match;
513  }
514  
515  static inline struct tracing_map_elt *
__tracing_map_insert(struct tracing_map * map,void * key,bool lookup_only)516  __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only)
517  {
518  	u32 idx, key_hash, test_key;
519  	int dup_try = 0;
520  	struct tracing_map_entry *entry;
521  	struct tracing_map_elt *val;
522  
523  	key_hash = jhash(key, map->key_size, 0);
524  	if (key_hash == 0)
525  		key_hash = 1;
526  	idx = key_hash >> (32 - (map->map_bits + 1));
527  
528  	while (1) {
529  		idx &= (map->map_size - 1);
530  		entry = TRACING_MAP_ENTRY(map->map, idx);
531  		test_key = entry->key;
532  
533  		if (test_key && test_key == key_hash) {
534  			val = READ_ONCE(entry->val);
535  			if (val &&
536  			    keys_match(key, val->key, map->key_size)) {
537  				if (!lookup_only)
538  					atomic64_inc(&map->hits);
539  				return val;
540  			} else if (unlikely(!val)) {
541  				/*
542  				 * The key is present. But, val (pointer to elt
543  				 * struct) is still NULL. which means some other
544  				 * thread is in the process of inserting an
545  				 * element.
546  				 *
547  				 * On top of that, it's key_hash is same as the
548  				 * one being inserted right now. So, it's
549  				 * possible that the element has the same
550  				 * key as well.
551  				 */
552  
553  				dup_try++;
554  				if (dup_try > map->map_size) {
555  					atomic64_inc(&map->drops);
556  					break;
557  				}
558  				continue;
559  			}
560  		}
561  
562  		if (!test_key) {
563  			if (lookup_only)
564  				break;
565  
566  			if (!cmpxchg(&entry->key, 0, key_hash)) {
567  				struct tracing_map_elt *elt;
568  
569  				elt = get_free_elt(map);
570  				if (!elt) {
571  					atomic64_inc(&map->drops);
572  					entry->key = 0;
573  					break;
574  				}
575  
576  				memcpy(elt->key, key, map->key_size);
577  				/*
578  				 * Ensure the initialization is visible and
579  				 * publish the elt.
580  				 */
581  				smp_wmb();
582  				WRITE_ONCE(entry->val, elt);
583  				atomic64_inc(&map->hits);
584  
585  				return entry->val;
586  			} else {
587  				/*
588  				 * cmpxchg() failed. Loop around once
589  				 * more to check what key was inserted.
590  				 */
591  				dup_try++;
592  				continue;
593  			}
594  		}
595  
596  		idx++;
597  	}
598  
599  	return NULL;
600  }
601  
602  /**
603   * tracing_map_insert - Insert key and/or retrieve val from a tracing_map
604   * @map: The tracing_map to insert into
605   * @key: The key to insert
606   *
607   * Inserts a key into a tracing_map and creates and returns a new
608   * tracing_map_elt for it, or if the key has already been inserted by
609   * a previous call, returns the tracing_map_elt already associated
610   * with it.  When the map was created, the number of elements to be
611   * allocated for the map was specified (internally maintained as
612   * 'max_elts' in struct tracing_map), and that number of
613   * tracing_map_elts was created by tracing_map_init().  This is the
614   * pre-allocated pool of tracing_map_elts that tracing_map_insert()
615   * will allocate from when adding new keys.  Once that pool is
616   * exhausted, tracing_map_insert() is useless and will return NULL to
617   * signal that state.  There are two user-visible tracing_map
618   * variables, 'hits' and 'drops', which are updated by this function.
619   * Every time an element is either successfully inserted or retrieved,
620   * the 'hits' value is incremented.  Every time an element insertion
621   * fails, the 'drops' value is incremented.
622   *
623   * This is a lock-free tracing map insertion function implementing a
624   * modified form of Cliff Click's basic insertion algorithm.  It
625   * requires the table size be a power of two.  To prevent any
626   * possibility of an infinite loop we always make the internal table
627   * size double the size of the requested table size (max_elts * 2).
628   * Likewise, we never reuse a slot or resize or delete elements - when
629   * we've reached max_elts entries, we simply return NULL once we've
630   * run out of entries.  Readers can at any point in time traverse the
631   * tracing map and safely access the key/val pairs.
632   *
633   * Return: the tracing_map_elt pointer val associated with the key.
634   * If this was a newly inserted key, the val will be a newly allocated
635   * and associated tracing_map_elt pointer val.  If the key wasn't
636   * found and the pool of tracing_map_elts has been exhausted, NULL is
637   * returned and no further insertions will succeed.
638   */
tracing_map_insert(struct tracing_map * map,void * key)639  struct tracing_map_elt *tracing_map_insert(struct tracing_map *map, void *key)
640  {
641  	return __tracing_map_insert(map, key, false);
642  }
643  
644  /**
645   * tracing_map_lookup - Retrieve val from a tracing_map
646   * @map: The tracing_map to perform the lookup on
647   * @key: The key to look up
648   *
649   * Looks up key in tracing_map and if found returns the matching
650   * tracing_map_elt.  This is a lock-free lookup; see
651   * tracing_map_insert() for details on tracing_map and how it works.
652   * Every time an element is retrieved, the 'hits' value is
653   * incremented.  There is one user-visible tracing_map variable,
654   * 'hits', which is updated by this function.  Every time an element
655   * is successfully retrieved, the 'hits' value is incremented.  The
656   * 'drops' value is never updated by this function.
657   *
658   * Return: the tracing_map_elt pointer val associated with the key.
659   * If the key wasn't found, NULL is returned.
660   */
tracing_map_lookup(struct tracing_map * map,void * key)661  struct tracing_map_elt *tracing_map_lookup(struct tracing_map *map, void *key)
662  {
663  	return __tracing_map_insert(map, key, true);
664  }
665  
666  /**
667   * tracing_map_destroy - Destroy a tracing_map
668   * @map: The tracing_map to destroy
669   *
670   * Frees a tracing_map along with its associated array of
671   * tracing_map_elts.
672   *
673   * Callers should make sure there are no readers or writers actively
674   * reading or inserting into the map before calling this.
675   */
tracing_map_destroy(struct tracing_map * map)676  void tracing_map_destroy(struct tracing_map *map)
677  {
678  	if (!map)
679  		return;
680  
681  	tracing_map_free_elts(map);
682  
683  	tracing_map_array_free(map->map);
684  	kfree(map);
685  }
686  
687  /**
688   * tracing_map_clear - Clear a tracing_map
689   * @map: The tracing_map to clear
690   *
691   * Resets the tracing map to a cleared or initial state.  The
692   * tracing_map_elts are all cleared, and the array of struct
693   * tracing_map_entry is reset to an initialized state.
694   *
695   * Callers should make sure there are no writers actively inserting
696   * into the map before calling this.
697   */
tracing_map_clear(struct tracing_map * map)698  void tracing_map_clear(struct tracing_map *map)
699  {
700  	unsigned int i;
701  
702  	atomic_set(&map->next_elt, 0);
703  	atomic64_set(&map->hits, 0);
704  	atomic64_set(&map->drops, 0);
705  
706  	tracing_map_array_clear(map->map);
707  
708  	for (i = 0; i < map->max_elts; i++)
709  		tracing_map_elt_clear(*(TRACING_MAP_ELT(map->elts, i)));
710  }
711  
set_sort_key(struct tracing_map * map,struct tracing_map_sort_key * sort_key)712  static void set_sort_key(struct tracing_map *map,
713  			 struct tracing_map_sort_key *sort_key)
714  {
715  	map->sort_key = *sort_key;
716  }
717  
718  /**
719   * tracing_map_create - Create a lock-free map and element pool
720   * @map_bits: The size of the map (2 ** map_bits)
721   * @key_size: The size of the key for the map in bytes
722   * @ops: Optional client-defined tracing_map_ops instance
723   * @private_data: Client data associated with the map
724   *
725   * Creates and sets up a map to contain 2 ** map_bits number of
726   * elements (internally maintained as 'max_elts' in struct
727   * tracing_map).  Before using, map fields should be added to the map
728   * with tracing_map_add_sum_field() and tracing_map_add_key_field().
729   * tracing_map_init() should then be called to allocate the array of
730   * tracing_map_elts, in order to avoid allocating anything in the map
731   * insertion path.  The user-specified map size reflects the maximum
732   * number of elements that can be contained in the table requested by
733   * the user - internally we double that in order to keep the table
734   * sparse and keep collisions manageable.
735   *
736   * A tracing_map is a special-purpose map designed to aggregate or
737   * 'sum' one or more values associated with a specific object of type
738   * tracing_map_elt, which is attached by the map to a given key.
739   *
740   * tracing_map_create() sets up the map itself, and provides
741   * operations for inserting tracing_map_elts, but doesn't allocate the
742   * tracing_map_elts themselves, or provide a means for describing the
743   * keys or sums associated with the tracing_map_elts.  All
744   * tracing_map_elts for a given map have the same set of sums and
745   * keys, which are defined by the client using the functions
746   * tracing_map_add_key_field() and tracing_map_add_sum_field().  Once
747   * the fields are defined, the pool of elements allocated for the map
748   * can be created, which occurs when the client code calls
749   * tracing_map_init().
750   *
751   * When tracing_map_init() returns, tracing_map_elt elements can be
752   * inserted into the map using tracing_map_insert().  When called,
753   * tracing_map_insert() grabs a free tracing_map_elt from the pool, or
754   * finds an existing match in the map and in either case returns it.
755   * The client can then use tracing_map_update_sum() and
756   * tracing_map_read_sum() to update or read a given sum field for the
757   * tracing_map_elt.
758   *
759   * The client can at any point retrieve and traverse the current set
760   * of inserted tracing_map_elts in a tracing_map, via
761   * tracing_map_sort_entries().  Sorting can be done on any field,
762   * including keys.
763   *
764   * See tracing_map.h for a description of tracing_map_ops.
765   *
766   * Return: the tracing_map pointer if successful, ERR_PTR if not.
767   */
tracing_map_create(unsigned int map_bits,unsigned int key_size,const struct tracing_map_ops * ops,void * private_data)768  struct tracing_map *tracing_map_create(unsigned int map_bits,
769  				       unsigned int key_size,
770  				       const struct tracing_map_ops *ops,
771  				       void *private_data)
772  {
773  	struct tracing_map *map;
774  	unsigned int i;
775  
776  	if (map_bits < TRACING_MAP_BITS_MIN ||
777  	    map_bits > TRACING_MAP_BITS_MAX)
778  		return ERR_PTR(-EINVAL);
779  
780  	map = kzalloc(sizeof(*map), GFP_KERNEL);
781  	if (!map)
782  		return ERR_PTR(-ENOMEM);
783  
784  	map->map_bits = map_bits;
785  	map->max_elts = (1 << map_bits);
786  	atomic_set(&map->next_elt, 0);
787  
788  	map->map_size = (1 << (map_bits + 1));
789  	map->ops = ops;
790  
791  	map->private_data = private_data;
792  
793  	map->map = tracing_map_array_alloc(map->map_size,
794  					   sizeof(struct tracing_map_entry));
795  	if (!map->map)
796  		goto free;
797  
798  	map->key_size = key_size;
799  	for (i = 0; i < TRACING_MAP_KEYS_MAX; i++)
800  		map->key_idx[i] = -1;
801   out:
802  	return map;
803   free:
804  	tracing_map_destroy(map);
805  	map = ERR_PTR(-ENOMEM);
806  
807  	goto out;
808  }
809  
810  /**
811   * tracing_map_init - Allocate and clear a map's tracing_map_elts
812   * @map: The tracing_map to initialize
813   *
814   * Allocates a clears a pool of tracing_map_elts equal to the
815   * user-specified size of 2 ** map_bits (internally maintained as
816   * 'max_elts' in struct tracing_map).  Before using, the map fields
817   * should be added to the map with tracing_map_add_sum_field() and
818   * tracing_map_add_key_field().  tracing_map_init() should then be
819   * called to allocate the array of tracing_map_elts, in order to avoid
820   * allocating anything in the map insertion path.  The user-specified
821   * map size reflects the max number of elements requested by the user
822   * - internally we double that in order to keep the table sparse and
823   * keep collisions manageable.
824   *
825   * See tracing_map.h for a description of tracing_map_ops.
826   *
827   * Return: the tracing_map pointer if successful, ERR_PTR if not.
828   */
tracing_map_init(struct tracing_map * map)829  int tracing_map_init(struct tracing_map *map)
830  {
831  	int err;
832  
833  	if (map->n_fields < 2)
834  		return -EINVAL; /* need at least 1 key and 1 val */
835  
836  	err = tracing_map_alloc_elts(map);
837  	if (err)
838  		return err;
839  
840  	tracing_map_clear(map);
841  
842  	return err;
843  }
844  
cmp_entries_dup(const void * A,const void * B)845  static int cmp_entries_dup(const void *A, const void *B)
846  {
847  	const struct tracing_map_sort_entry *a, *b;
848  	int ret = 0;
849  
850  	a = *(const struct tracing_map_sort_entry **)A;
851  	b = *(const struct tracing_map_sort_entry **)B;
852  
853  	if (memcmp(a->key, b->key, a->elt->map->key_size))
854  		ret = 1;
855  
856  	return ret;
857  }
858  
cmp_entries_sum(const void * A,const void * B)859  static int cmp_entries_sum(const void *A, const void *B)
860  {
861  	const struct tracing_map_elt *elt_a, *elt_b;
862  	const struct tracing_map_sort_entry *a, *b;
863  	struct tracing_map_sort_key *sort_key;
864  	struct tracing_map_field *field;
865  	tracing_map_cmp_fn_t cmp_fn;
866  	void *val_a, *val_b;
867  	int ret = 0;
868  
869  	a = *(const struct tracing_map_sort_entry **)A;
870  	b = *(const struct tracing_map_sort_entry **)B;
871  
872  	elt_a = a->elt;
873  	elt_b = b->elt;
874  
875  	sort_key = &elt_a->map->sort_key;
876  
877  	field = &elt_a->fields[sort_key->field_idx];
878  	cmp_fn = field->cmp_fn;
879  
880  	val_a = &elt_a->fields[sort_key->field_idx].sum;
881  	val_b = &elt_b->fields[sort_key->field_idx].sum;
882  
883  	ret = cmp_fn(val_a, val_b);
884  	if (sort_key->descending)
885  		ret = -ret;
886  
887  	return ret;
888  }
889  
cmp_entries_key(const void * A,const void * B)890  static int cmp_entries_key(const void *A, const void *B)
891  {
892  	const struct tracing_map_elt *elt_a, *elt_b;
893  	const struct tracing_map_sort_entry *a, *b;
894  	struct tracing_map_sort_key *sort_key;
895  	struct tracing_map_field *field;
896  	tracing_map_cmp_fn_t cmp_fn;
897  	void *val_a, *val_b;
898  	int ret = 0;
899  
900  	a = *(const struct tracing_map_sort_entry **)A;
901  	b = *(const struct tracing_map_sort_entry **)B;
902  
903  	elt_a = a->elt;
904  	elt_b = b->elt;
905  
906  	sort_key = &elt_a->map->sort_key;
907  
908  	field = &elt_a->fields[sort_key->field_idx];
909  
910  	cmp_fn = field->cmp_fn;
911  
912  	val_a = elt_a->key + field->offset;
913  	val_b = elt_b->key + field->offset;
914  
915  	ret = cmp_fn(val_a, val_b);
916  	if (sort_key->descending)
917  		ret = -ret;
918  
919  	return ret;
920  }
921  
destroy_sort_entry(struct tracing_map_sort_entry * entry)922  static void destroy_sort_entry(struct tracing_map_sort_entry *entry)
923  {
924  	if (!entry)
925  		return;
926  
927  	if (entry->elt_copied)
928  		tracing_map_elt_free(entry->elt);
929  
930  	kfree(entry);
931  }
932  
933  /**
934   * tracing_map_destroy_sort_entries - Destroy an array of sort entries
935   * @entries: The entries to destroy
936   * @n_entries: The number of entries in the array
937   *
938   * Destroy the elements returned by a tracing_map_sort_entries() call.
939   */
tracing_map_destroy_sort_entries(struct tracing_map_sort_entry ** entries,unsigned int n_entries)940  void tracing_map_destroy_sort_entries(struct tracing_map_sort_entry **entries,
941  				      unsigned int n_entries)
942  {
943  	unsigned int i;
944  
945  	for (i = 0; i < n_entries; i++)
946  		destroy_sort_entry(entries[i]);
947  
948  	vfree(entries);
949  }
950  
951  static struct tracing_map_sort_entry *
create_sort_entry(void * key,struct tracing_map_elt * elt)952  create_sort_entry(void *key, struct tracing_map_elt *elt)
953  {
954  	struct tracing_map_sort_entry *sort_entry;
955  
956  	sort_entry = kzalloc(sizeof(*sort_entry), GFP_KERNEL);
957  	if (!sort_entry)
958  		return NULL;
959  
960  	sort_entry->key = key;
961  	sort_entry->elt = elt;
962  
963  	return sort_entry;
964  }
965  
detect_dups(struct tracing_map_sort_entry ** sort_entries,int n_entries,unsigned int key_size)966  static void detect_dups(struct tracing_map_sort_entry **sort_entries,
967  		      int n_entries, unsigned int key_size)
968  {
969  	unsigned int total_dups = 0;
970  	int i;
971  	void *key;
972  
973  	if (n_entries < 2)
974  		return;
975  
976  	sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *),
977  	     (int (*)(const void *, const void *))cmp_entries_dup, NULL);
978  
979  	key = sort_entries[0]->key;
980  	for (i = 1; i < n_entries; i++) {
981  		if (!memcmp(sort_entries[i]->key, key, key_size)) {
982  			total_dups++;
983  			continue;
984  		}
985  		key = sort_entries[i]->key;
986  	}
987  
988  	WARN_ONCE(total_dups > 0,
989  		  "Duplicates detected: %d\n", total_dups);
990  }
991  
is_key(struct tracing_map * map,unsigned int field_idx)992  static bool is_key(struct tracing_map *map, unsigned int field_idx)
993  {
994  	unsigned int i;
995  
996  	for (i = 0; i < map->n_keys; i++)
997  		if (map->key_idx[i] == field_idx)
998  			return true;
999  	return false;
1000  }
1001  
sort_secondary(struct tracing_map * map,const struct tracing_map_sort_entry ** entries,unsigned int n_entries,struct tracing_map_sort_key * primary_key,struct tracing_map_sort_key * secondary_key)1002  static void sort_secondary(struct tracing_map *map,
1003  			   const struct tracing_map_sort_entry **entries,
1004  			   unsigned int n_entries,
1005  			   struct tracing_map_sort_key *primary_key,
1006  			   struct tracing_map_sort_key *secondary_key)
1007  {
1008  	int (*primary_fn)(const void *, const void *);
1009  	int (*secondary_fn)(const void *, const void *);
1010  	unsigned i, start = 0, n_sub = 1;
1011  
1012  	if (is_key(map, primary_key->field_idx))
1013  		primary_fn = cmp_entries_key;
1014  	else
1015  		primary_fn = cmp_entries_sum;
1016  
1017  	if (is_key(map, secondary_key->field_idx))
1018  		secondary_fn = cmp_entries_key;
1019  	else
1020  		secondary_fn = cmp_entries_sum;
1021  
1022  	for (i = 0; i < n_entries - 1; i++) {
1023  		const struct tracing_map_sort_entry **a = &entries[i];
1024  		const struct tracing_map_sort_entry **b = &entries[i + 1];
1025  
1026  		if (primary_fn(a, b) == 0) {
1027  			n_sub++;
1028  			if (i < n_entries - 2)
1029  				continue;
1030  		}
1031  
1032  		if (n_sub < 2) {
1033  			start = i + 1;
1034  			n_sub = 1;
1035  			continue;
1036  		}
1037  
1038  		set_sort_key(map, secondary_key);
1039  		sort(&entries[start], n_sub,
1040  		     sizeof(struct tracing_map_sort_entry *),
1041  		     (int (*)(const void *, const void *))secondary_fn, NULL);
1042  		set_sort_key(map, primary_key);
1043  
1044  		start = i + 1;
1045  		n_sub = 1;
1046  	}
1047  }
1048  
1049  /**
1050   * tracing_map_sort_entries - Sort the current set of tracing_map_elts in a map
1051   * @map: The tracing_map
1052   * @sort_keys: The sort key to use for sorting
1053   * @n_sort_keys: hitcount, always have at least one
1054   * @sort_entries: outval: pointer to allocated and sorted array of entries
1055   *
1056   * tracing_map_sort_entries() sorts the current set of entries in the
1057   * map and returns the list of tracing_map_sort_entries containing
1058   * them to the client in the sort_entries param.  The client can
1059   * access the struct tracing_map_elt element of interest directly as
1060   * the 'elt' field of a returned struct tracing_map_sort_entry object.
1061   *
1062   * The sort_key has only two fields: idx and descending.  'idx' refers
1063   * to the index of the field added via tracing_map_add_sum_field() or
1064   * tracing_map_add_key_field() when the tracing_map was initialized.
1065   * 'descending' is a flag that if set reverses the sort order, which
1066   * by default is ascending.
1067   *
1068   * The client should not hold on to the returned array but should use
1069   * it and call tracing_map_destroy_sort_entries() when done.
1070   *
1071   * Return: the number of sort_entries in the struct tracing_map_sort_entry
1072   * array, negative on error
1073   */
tracing_map_sort_entries(struct tracing_map * map,struct tracing_map_sort_key * sort_keys,unsigned int n_sort_keys,struct tracing_map_sort_entry *** sort_entries)1074  int tracing_map_sort_entries(struct tracing_map *map,
1075  			     struct tracing_map_sort_key *sort_keys,
1076  			     unsigned int n_sort_keys,
1077  			     struct tracing_map_sort_entry ***sort_entries)
1078  {
1079  	int (*cmp_entries_fn)(const void *, const void *);
1080  	struct tracing_map_sort_entry *sort_entry, **entries;
1081  	int i, n_entries, ret;
1082  
1083  	entries = vmalloc(array_size(sizeof(sort_entry), map->max_elts));
1084  	if (!entries)
1085  		return -ENOMEM;
1086  
1087  	for (i = 0, n_entries = 0; i < map->map_size; i++) {
1088  		struct tracing_map_entry *entry;
1089  
1090  		entry = TRACING_MAP_ENTRY(map->map, i);
1091  
1092  		if (!entry->key || !entry->val)
1093  			continue;
1094  
1095  		entries[n_entries] = create_sort_entry(entry->val->key,
1096  						       entry->val);
1097  		if (!entries[n_entries++]) {
1098  			ret = -ENOMEM;
1099  			goto free;
1100  		}
1101  	}
1102  
1103  	if (n_entries == 0) {
1104  		ret = 0;
1105  		goto free;
1106  	}
1107  
1108  	if (n_entries == 1) {
1109  		*sort_entries = entries;
1110  		return 1;
1111  	}
1112  
1113  	detect_dups(entries, n_entries, map->key_size);
1114  
1115  	if (is_key(map, sort_keys[0].field_idx))
1116  		cmp_entries_fn = cmp_entries_key;
1117  	else
1118  		cmp_entries_fn = cmp_entries_sum;
1119  
1120  	set_sort_key(map, &sort_keys[0]);
1121  
1122  	sort(entries, n_entries, sizeof(struct tracing_map_sort_entry *),
1123  	     (int (*)(const void *, const void *))cmp_entries_fn, NULL);
1124  
1125  	if (n_sort_keys > 1)
1126  		sort_secondary(map,
1127  			       (const struct tracing_map_sort_entry **)entries,
1128  			       n_entries,
1129  			       &sort_keys[0],
1130  			       &sort_keys[1]);
1131  
1132  	*sort_entries = entries;
1133  
1134  	return n_entries;
1135   free:
1136  	tracing_map_destroy_sort_entries(entries, n_entries);
1137  
1138  	return ret;
1139  }
1140