1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * queue_stack_maps.c: BPF queue and stack maps
4   *
5   * Copyright (c) 2018 Politecnico di Torino
6   */
7  #include <linux/bpf.h>
8  #include <linux/list.h>
9  #include <linux/slab.h>
10  #include <linux/btf_ids.h>
11  #include "percpu_freelist.h"
12  
13  #define QUEUE_STACK_CREATE_FLAG_MASK \
14  	(BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK)
15  
16  struct bpf_queue_stack {
17  	struct bpf_map map;
18  	raw_spinlock_t lock;
19  	u32 head, tail;
20  	u32 size; /* max_entries + 1 */
21  
22  	char elements[] __aligned(8);
23  };
24  
bpf_queue_stack(struct bpf_map * map)25  static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
26  {
27  	return container_of(map, struct bpf_queue_stack, map);
28  }
29  
queue_stack_map_is_empty(struct bpf_queue_stack * qs)30  static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
31  {
32  	return qs->head == qs->tail;
33  }
34  
queue_stack_map_is_full(struct bpf_queue_stack * qs)35  static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
36  {
37  	u32 head = qs->head + 1;
38  
39  	if (unlikely(head >= qs->size))
40  		head = 0;
41  
42  	return head == qs->tail;
43  }
44  
45  /* Called from syscall */
queue_stack_map_alloc_check(union bpf_attr * attr)46  static int queue_stack_map_alloc_check(union bpf_attr *attr)
47  {
48  	/* check sanity of attributes */
49  	if (attr->max_entries == 0 || attr->key_size != 0 ||
50  	    attr->value_size == 0 ||
51  	    attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK ||
52  	    !bpf_map_flags_access_ok(attr->map_flags))
53  		return -EINVAL;
54  
55  	if (attr->value_size > KMALLOC_MAX_SIZE)
56  		/* if value_size is bigger, the user space won't be able to
57  		 * access the elements.
58  		 */
59  		return -E2BIG;
60  
61  	return 0;
62  }
63  
queue_stack_map_alloc(union bpf_attr * attr)64  static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
65  {
66  	int numa_node = bpf_map_attr_numa_node(attr);
67  	struct bpf_queue_stack *qs;
68  	u64 size, queue_size;
69  
70  	size = (u64) attr->max_entries + 1;
71  	queue_size = sizeof(*qs) + size * attr->value_size;
72  
73  	qs = bpf_map_area_alloc(queue_size, numa_node);
74  	if (!qs)
75  		return ERR_PTR(-ENOMEM);
76  
77  	bpf_map_init_from_attr(&qs->map, attr);
78  
79  	qs->size = size;
80  
81  	raw_spin_lock_init(&qs->lock);
82  
83  	return &qs->map;
84  }
85  
86  /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
queue_stack_map_free(struct bpf_map * map)87  static void queue_stack_map_free(struct bpf_map *map)
88  {
89  	struct bpf_queue_stack *qs = bpf_queue_stack(map);
90  
91  	bpf_map_area_free(qs);
92  }
93  
__queue_map_get(struct bpf_map * map,void * value,bool delete)94  static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
95  {
96  	struct bpf_queue_stack *qs = bpf_queue_stack(map);
97  	unsigned long flags;
98  	int err = 0;
99  	void *ptr;
100  
101  	if (in_nmi()) {
102  		if (!raw_spin_trylock_irqsave(&qs->lock, flags))
103  			return -EBUSY;
104  	} else {
105  		raw_spin_lock_irqsave(&qs->lock, flags);
106  	}
107  
108  	if (queue_stack_map_is_empty(qs)) {
109  		memset(value, 0, qs->map.value_size);
110  		err = -ENOENT;
111  		goto out;
112  	}
113  
114  	ptr = &qs->elements[qs->tail * qs->map.value_size];
115  	memcpy(value, ptr, qs->map.value_size);
116  
117  	if (delete) {
118  		if (unlikely(++qs->tail >= qs->size))
119  			qs->tail = 0;
120  	}
121  
122  out:
123  	raw_spin_unlock_irqrestore(&qs->lock, flags);
124  	return err;
125  }
126  
127  
__stack_map_get(struct bpf_map * map,void * value,bool delete)128  static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
129  {
130  	struct bpf_queue_stack *qs = bpf_queue_stack(map);
131  	unsigned long flags;
132  	int err = 0;
133  	void *ptr;
134  	u32 index;
135  
136  	if (in_nmi()) {
137  		if (!raw_spin_trylock_irqsave(&qs->lock, flags))
138  			return -EBUSY;
139  	} else {
140  		raw_spin_lock_irqsave(&qs->lock, flags);
141  	}
142  
143  	if (queue_stack_map_is_empty(qs)) {
144  		memset(value, 0, qs->map.value_size);
145  		err = -ENOENT;
146  		goto out;
147  	}
148  
149  	index = qs->head - 1;
150  	if (unlikely(index >= qs->size))
151  		index = qs->size - 1;
152  
153  	ptr = &qs->elements[index * qs->map.value_size];
154  	memcpy(value, ptr, qs->map.value_size);
155  
156  	if (delete)
157  		qs->head = index;
158  
159  out:
160  	raw_spin_unlock_irqrestore(&qs->lock, flags);
161  	return err;
162  }
163  
164  /* Called from syscall or from eBPF program */
queue_map_peek_elem(struct bpf_map * map,void * value)165  static long queue_map_peek_elem(struct bpf_map *map, void *value)
166  {
167  	return __queue_map_get(map, value, false);
168  }
169  
170  /* Called from syscall or from eBPF program */
stack_map_peek_elem(struct bpf_map * map,void * value)171  static long stack_map_peek_elem(struct bpf_map *map, void *value)
172  {
173  	return __stack_map_get(map, value, false);
174  }
175  
176  /* Called from syscall or from eBPF program */
queue_map_pop_elem(struct bpf_map * map,void * value)177  static long queue_map_pop_elem(struct bpf_map *map, void *value)
178  {
179  	return __queue_map_get(map, value, true);
180  }
181  
182  /* Called from syscall or from eBPF program */
stack_map_pop_elem(struct bpf_map * map,void * value)183  static long stack_map_pop_elem(struct bpf_map *map, void *value)
184  {
185  	return __stack_map_get(map, value, true);
186  }
187  
188  /* Called from syscall or from eBPF program */
queue_stack_map_push_elem(struct bpf_map * map,void * value,u64 flags)189  static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
190  				      u64 flags)
191  {
192  	struct bpf_queue_stack *qs = bpf_queue_stack(map);
193  	unsigned long irq_flags;
194  	int err = 0;
195  	void *dst;
196  
197  	/* BPF_EXIST is used to force making room for a new element in case the
198  	 * map is full
199  	 */
200  	bool replace = (flags & BPF_EXIST);
201  
202  	/* Check supported flags for queue and stack maps */
203  	if (flags & BPF_NOEXIST || flags > BPF_EXIST)
204  		return -EINVAL;
205  
206  	if (in_nmi()) {
207  		if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
208  			return -EBUSY;
209  	} else {
210  		raw_spin_lock_irqsave(&qs->lock, irq_flags);
211  	}
212  
213  	if (queue_stack_map_is_full(qs)) {
214  		if (!replace) {
215  			err = -E2BIG;
216  			goto out;
217  		}
218  		/* advance tail pointer to overwrite oldest element */
219  		if (unlikely(++qs->tail >= qs->size))
220  			qs->tail = 0;
221  	}
222  
223  	dst = &qs->elements[qs->head * qs->map.value_size];
224  	memcpy(dst, value, qs->map.value_size);
225  
226  	if (unlikely(++qs->head >= qs->size))
227  		qs->head = 0;
228  
229  out:
230  	raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
231  	return err;
232  }
233  
234  /* Called from syscall or from eBPF program */
queue_stack_map_lookup_elem(struct bpf_map * map,void * key)235  static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
236  {
237  	return NULL;
238  }
239  
240  /* Called from syscall or from eBPF program */
queue_stack_map_update_elem(struct bpf_map * map,void * key,void * value,u64 flags)241  static long queue_stack_map_update_elem(struct bpf_map *map, void *key,
242  					void *value, u64 flags)
243  {
244  	return -EINVAL;
245  }
246  
247  /* Called from syscall or from eBPF program */
queue_stack_map_delete_elem(struct bpf_map * map,void * key)248  static long queue_stack_map_delete_elem(struct bpf_map *map, void *key)
249  {
250  	return -EINVAL;
251  }
252  
253  /* Called from syscall */
queue_stack_map_get_next_key(struct bpf_map * map,void * key,void * next_key)254  static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
255  					void *next_key)
256  {
257  	return -EINVAL;
258  }
259  
queue_stack_map_mem_usage(const struct bpf_map * map)260  static u64 queue_stack_map_mem_usage(const struct bpf_map *map)
261  {
262  	u64 usage = sizeof(struct bpf_queue_stack);
263  
264  	usage += ((u64)map->max_entries + 1) * map->value_size;
265  	return usage;
266  }
267  
268  BTF_ID_LIST_SINGLE(queue_map_btf_ids, struct, bpf_queue_stack)
269  const struct bpf_map_ops queue_map_ops = {
270  	.map_meta_equal = bpf_map_meta_equal,
271  	.map_alloc_check = queue_stack_map_alloc_check,
272  	.map_alloc = queue_stack_map_alloc,
273  	.map_free = queue_stack_map_free,
274  	.map_lookup_elem = queue_stack_map_lookup_elem,
275  	.map_update_elem = queue_stack_map_update_elem,
276  	.map_delete_elem = queue_stack_map_delete_elem,
277  	.map_push_elem = queue_stack_map_push_elem,
278  	.map_pop_elem = queue_map_pop_elem,
279  	.map_peek_elem = queue_map_peek_elem,
280  	.map_get_next_key = queue_stack_map_get_next_key,
281  	.map_mem_usage = queue_stack_map_mem_usage,
282  	.map_btf_id = &queue_map_btf_ids[0],
283  };
284  
285  const struct bpf_map_ops stack_map_ops = {
286  	.map_meta_equal = bpf_map_meta_equal,
287  	.map_alloc_check = queue_stack_map_alloc_check,
288  	.map_alloc = queue_stack_map_alloc,
289  	.map_free = queue_stack_map_free,
290  	.map_lookup_elem = queue_stack_map_lookup_elem,
291  	.map_update_elem = queue_stack_map_update_elem,
292  	.map_delete_elem = queue_stack_map_delete_elem,
293  	.map_push_elem = queue_stack_map_push_elem,
294  	.map_pop_elem = stack_map_pop_elem,
295  	.map_peek_elem = stack_map_peek_elem,
296  	.map_get_next_key = queue_stack_map_get_next_key,
297  	.map_mem_usage = queue_stack_map_mem_usage,
298  	.map_btf_id = &queue_map_btf_ids[0],
299  };
300