Lines Matching +full:non +full:- +full:configurable
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* net/sched/sch_hhf.c Heavy-Hitter Filter (HHF)
16 /* Heavy-Hitter Filter (HHF)
19 * Flows are classified into two buckets: non-heavy-hitter and heavy-hitter
20 * buckets. Initially, a new flow starts as non-heavy-hitter. Once classified
21 * as heavy-hitter, it is immediately switched to the heavy-hitter bucket.
23 * in which the heavy-hitter bucket is served with less weight.
24 * In other words, non-heavy-hitters (e.g., short bursts of critical traffic)
25 * are isolated from heavy-hitters (e.g., persistent bulk traffic) and also have
28 * To capture heavy-hitters, we use the "multi-stage filter" algorithm in the
33 * Conceptually, a multi-stage filter comprises k independent hash functions
37 * - For a heavy-hitter flow: *all* of its k array counters must be large.
38 * - For a non-heavy-hitter flow: some of its k array counters can be large
42 * By the design of the multi-stage filter algorithm, the false negative rate
43 * (heavy-hitters getting away uncaptured) is zero. However, the algorithm is
44 * susceptible to false positives (non-heavy-hitters mistakenly classified as
45 * heavy-hitters).
48 * - Optimization O1: once a heavy-hitter is identified, its bytes are not
51 * - Optimization O2: conservative update of counters
59 * Once a flow is classified as heavy-hitter, we also save its per-flow state
60 * in an exact-matching flow table so that its subsequent packets can be
61 * dispatched to the heavy-hitter bucket accordingly.
66 * - If the flow-id of p (e.g., TCP 5-tuple) is already in the exact-matching
67 * heavy-hitter flow table, denoted table T, then send p to the heavy-hitter
69 * - Otherwise, forward p to the multi-stage filter, denoted filter F
70 * + If F decides that p belongs to a non-heavy-hitter flow, then send p
71 * to the non-heavy-hitter bucket.
72 * + Otherwise, if F decides that p belongs to a new heavy-hitter flow,
73 * then set up a new flow entry for the flow-id of p in the table T and
74 * send p to the heavy-hitter bucket.
77 * - T is a fixed-size hash-table with 1024 entries. Hash collision is
78 * resolved by linked-list chaining.
79 * - F has four counter arrays, each array containing 1024 32-bit counters.
81 * - Since each array in F contains 1024 counters, 10 bits are sufficient to
83 * Hence, instead of having four hash functions, we chop the 32-bit
84 * skb-hash into three 10-bit chunks, and the remaining 10-bit chunk is
86 * - We need to clear the counter arrays periodically; however, directly
90 * - The Deficit Round Robin engine is taken from fq_codel implementation
96 /* Non-configurable parameters */
97 #define HH_FLOWS_CNT 1024 /* number of entries in exact-matching table T */
98 #define HHF_ARRAYS_CNT 4 /* number of arrays in multi-stage filter F */
105 WDRR_BUCKET_FOR_HH = 0, /* bucket id for heavy-hitters */
106 WDRR_BUCKET_FOR_NON_HH = 1 /* bucket id for non-heavy-hitters */
110 (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0))
112 /* Heavy-hitter per-flow state */
114 u32 hash_id; /* hash of flow-id (e.g. TCP 5-tuple) */
115 u32 hit_timestamp; /* last time heavy-hitter was seen */
150 /* Configurable HHF parameters */
168 u32 hhf_non_hh_weight; /* WDRR weight for non-HHs
170 * i.e., non-HH : HH = 2 : 1)
179 /* Looks up a heavy-hitter flow in a chaining list of table T. */
191 u32 prev = flow->hit_timestamp + q->hhf_evict_timeout; in seek_list()
194 /* Delete expired heavy-hitters, but preserve one entry in seek_list()
197 if (list_is_last(&flow->flowchain, head)) in seek_list()
199 list_del(&flow->flowchain); in seek_list()
201 q->hh_flows_current_cnt--; in seek_list()
202 } else if (flow->hash_id == hash) { in seek_list()
209 /* Returns a flow state entry for a new heavy-hitter. Either reuses an expired
219 /* Find an expired heavy-hitter flow entry. */ in alloc_new_hh()
221 u32 prev = flow->hit_timestamp + q->hhf_evict_timeout; in alloc_new_hh()
228 if (q->hh_flows_current_cnt >= q->hh_flows_limit) { in alloc_new_hh()
229 q->hh_flows_overlimit++; in alloc_new_hh()
237 q->hh_flows_current_cnt++; in alloc_new_hh()
238 INIT_LIST_HEAD(&flow->flowchain); in alloc_new_hh()
239 list_add_tail(&flow->flowchain, head); in alloc_new_hh()
244 /* Assigns packets to WDRR buckets. Implements a multi-stage filter to
245 * classify heavy-hitters.
259 prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout; in hhf_classify()
262 bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN); in hhf_classify()
263 q->hhf_arrays_reset_timestamp = now; in hhf_classify()
266 /* Get hashed flow-id of the skb. */ in hhf_classify()
267 hash = skb_get_hash_perturb(skb, &q->perturbation); in hhf_classify()
271 flow = seek_list(hash, &q->hh_flows[flow_pos], q); in hhf_classify()
273 flow->hit_timestamp = now; in hhf_classify()
277 /* Now pass the packet through the multi-stage filter. */ in hhf_classify()
280 for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) { in hhf_classify()
281 /* Split the skb_hash into three 10-bit chunks. */ in hhf_classify()
287 filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash; in hhf_classify()
294 if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) { in hhf_classify()
295 q->hhf_arrays[i][filter_pos[i]] = 0; in hhf_classify()
296 __set_bit(filter_pos[i], q->hhf_valid_bits[i]); in hhf_classify()
299 val = q->hhf_arrays[i][filter_pos[i]] + pkt_len; in hhf_classify()
305 if (min_hhf_val > q->hhf_admit_bytes) { in hhf_classify()
306 /* Just captured a new heavy-hitter. */ in hhf_classify()
307 flow = alloc_new_hh(&q->hh_flows[flow_pos], q); in hhf_classify()
310 flow->hash_id = hash; in hhf_classify()
311 flow->hit_timestamp = now; in hhf_classify()
312 q->hh_flows_total_cnt++; in hhf_classify()
314 /* By returning without updating counters in q->hhf_arrays, in hhf_classify()
322 if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val) in hhf_classify()
323 q->hhf_arrays[i][filter_pos[i]] = min_hhf_val; in hhf_classify()
331 struct sk_buff *skb = bucket->head; in dequeue_head()
333 bucket->head = skb->next; in dequeue_head()
338 /* Tail-adds skb to bucket. */
341 if (bucket->head == NULL) in bucket_add()
342 bucket->head = skb; in bucket_add()
344 bucket->tail->next = skb; in bucket_add()
345 bucket->tail = skb; in bucket_add()
346 skb->next = NULL; in bucket_add()
354 /* Always try to drop from heavy-hitters first. */ in hhf_drop()
355 bucket = &q->buckets[WDRR_BUCKET_FOR_HH]; in hhf_drop()
356 if (!bucket->head) in hhf_drop()
357 bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH]; in hhf_drop()
359 if (bucket->head) { in hhf_drop()
362 sch->q.qlen--; in hhf_drop()
368 return bucket - q->buckets; in hhf_drop()
381 bucket = &q->buckets[idx]; in hhf_enqueue()
385 if (list_empty(&bucket->bucketchain)) { in hhf_enqueue()
390 * i.e., short bursts of non-HHs should have strict priority. in hhf_enqueue()
393 /* Always move heavy-hitters to old bucket. */ in hhf_enqueue()
395 list_add_tail(&bucket->bucketchain, &q->old_buckets); in hhf_enqueue()
397 weight = q->hhf_non_hh_weight; in hhf_enqueue()
398 list_add_tail(&bucket->bucketchain, &q->new_buckets); in hhf_enqueue()
400 bucket->deficit = weight * q->quantum; in hhf_enqueue()
402 if (++sch->q.qlen <= sch->limit) in hhf_enqueue()
405 prev_backlog = sch->qstats.backlog; in hhf_enqueue()
406 q->drop_overlimit++; in hhf_enqueue()
414 qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); in hhf_enqueue()
426 head = &q->new_buckets; in hhf_dequeue()
428 head = &q->old_buckets; in hhf_dequeue()
434 if (bucket->deficit <= 0) { in hhf_dequeue()
435 int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ? in hhf_dequeue()
436 1 : q->hhf_non_hh_weight; in hhf_dequeue()
438 bucket->deficit += weight * q->quantum; in hhf_dequeue()
439 list_move_tail(&bucket->bucketchain, &q->old_buckets); in hhf_dequeue()
443 if (bucket->head) { in hhf_dequeue()
445 sch->q.qlen--; in hhf_dequeue()
451 if ((head == &q->new_buckets) && !list_empty(&q->old_buckets)) in hhf_dequeue()
452 list_move_tail(&bucket->bucketchain, &q->old_buckets); in hhf_dequeue()
454 list_del_init(&bucket->bucketchain); in hhf_dequeue()
458 bucket->deficit -= qdisc_pkt_len(skb); in hhf_dequeue()
477 kvfree(q->hhf_arrays[i]); in hhf_destroy()
478 kvfree(q->hhf_valid_bits[i]); in hhf_destroy()
481 if (!q->hh_flows) in hhf_destroy()
486 struct list_head *head = &q->hh_flows[i]; in hhf_destroy()
491 list_del(&flow->flowchain); in hhf_destroy()
495 kvfree(q->hh_flows); in hhf_destroy()
516 u32 new_quantum = q->quantum; in hhf_change()
517 u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight; in hhf_change()
532 return -EINVAL; in hhf_change()
537 WRITE_ONCE(sch->limit, nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT])); in hhf_change()
539 WRITE_ONCE(q->quantum, new_quantum); in hhf_change()
540 WRITE_ONCE(q->hhf_non_hh_weight, new_hhf_non_hh_weight); in hhf_change()
543 WRITE_ONCE(q->hh_flows_limit, in hhf_change()
549 WRITE_ONCE(q->hhf_reset_timeout, in hhf_change()
554 WRITE_ONCE(q->hhf_admit_bytes, in hhf_change()
560 WRITE_ONCE(q->hhf_evict_timeout, in hhf_change()
564 qlen = sch->q.qlen; in hhf_change()
565 prev_backlog = sch->qstats.backlog; in hhf_change()
566 while (sch->q.qlen > sch->limit) { in hhf_change()
571 qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, in hhf_change()
572 prev_backlog - sch->qstats.backlog); in hhf_change()
584 sch->limit = 1000; in hhf_init()
585 q->quantum = psched_mtu(qdisc_dev(sch)); in hhf_init()
586 get_random_bytes(&q->perturbation, sizeof(q->perturbation)); in hhf_init()
587 INIT_LIST_HEAD(&q->new_buckets); in hhf_init()
588 INIT_LIST_HEAD(&q->old_buckets); in hhf_init()
590 /* Configurable HHF parameters */ in hhf_init()
591 q->hhf_reset_timeout = HZ / 25; /* 40 ms */ in hhf_init()
592 q->hhf_admit_bytes = 131072; /* 128 KB */ in hhf_init()
593 q->hhf_evict_timeout = HZ; /* 1 sec */ in hhf_init()
594 q->hhf_non_hh_weight = 2; in hhf_init()
603 if (!q->hh_flows) { in hhf_init()
604 /* Initialize heavy-hitter flow table. */ in hhf_init()
605 q->hh_flows = kvcalloc(HH_FLOWS_CNT, sizeof(struct list_head), in hhf_init()
607 if (!q->hh_flows) in hhf_init()
608 return -ENOMEM; in hhf_init()
610 INIT_LIST_HEAD(&q->hh_flows[i]); in hhf_init()
613 q->hh_flows_limit = 2 * HH_FLOWS_CNT; in hhf_init()
614 q->hh_flows_overlimit = 0; in hhf_init()
615 q->hh_flows_total_cnt = 0; in hhf_init()
616 q->hh_flows_current_cnt = 0; in hhf_init()
618 /* Initialize heavy-hitter filter arrays. */ in hhf_init()
620 q->hhf_arrays[i] = kvcalloc(HHF_ARRAYS_LEN, in hhf_init()
623 if (!q->hhf_arrays[i]) { in hhf_init()
627 return -ENOMEM; in hhf_init()
630 q->hhf_arrays_reset_timestamp = hhf_time_stamp(); in hhf_init()
632 /* Initialize valid bits of heavy-hitter filter arrays. */ in hhf_init()
634 q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN / in hhf_init()
636 if (!q->hhf_valid_bits[i]) { in hhf_init()
640 return -ENOMEM; in hhf_init()
646 struct wdrr_bucket *bucket = q->buckets + i; in hhf_init()
648 INIT_LIST_HEAD(&bucket->bucketchain); in hhf_init()
664 if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, READ_ONCE(sch->limit)) || in hhf_dump()
665 nla_put_u32(skb, TCA_HHF_QUANTUM, READ_ONCE(q->quantum)) || in hhf_dump()
667 READ_ONCE(q->hh_flows_limit)) || in hhf_dump()
669 jiffies_to_usecs(READ_ONCE(q->hhf_reset_timeout))) || in hhf_dump()
671 READ_ONCE(q->hhf_admit_bytes)) || in hhf_dump()
673 jiffies_to_usecs(READ_ONCE(q->hhf_evict_timeout))) || in hhf_dump()
675 READ_ONCE(q->hhf_non_hh_weight))) in hhf_dump()
681 return -1; in hhf_dump()
688 .drop_overlimit = q->drop_overlimit, in hhf_dump_stats()
689 .hh_overlimit = q->hh_flows_overlimit, in hhf_dump_stats()
690 .hh_tot_count = q->hh_flows_total_cnt, in hhf_dump_stats()
691 .hh_cur_count = q->hh_flows_current_cnt, in hhf_dump_stats()
729 MODULE_DESCRIPTION("Heavy-Hitter Filter (HHF)");