1  // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2  /*
3   * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
4   */
5  
6  #include <linux/dim.h>
7  #include <linux/rtnetlink.h>
8  
9  /*
10   * Net DIM profiles:
11   *        There are different set of profiles for each CQ period mode.
12   *        There are different set of profiles for RX/TX CQs.
13   *        Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
14   */
15  #define NET_DIM_RX_EQE_PROFILES { \
16  	{.usec = 1,   .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
17  	{.usec = 8,   .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
18  	{.usec = 64,  .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
19  	{.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
20  	{.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}  \
21  }
22  
23  #define NET_DIM_RX_CQE_PROFILES { \
24  	{.usec = 2,  .pkts = 256,},             \
25  	{.usec = 8,  .pkts = 128,},             \
26  	{.usec = 16, .pkts = 64,},              \
27  	{.usec = 32, .pkts = 64,},              \
28  	{.usec = 64, .pkts = 64,}               \
29  }
30  
31  #define NET_DIM_TX_EQE_PROFILES { \
32  	{.usec = 1,   .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
33  	{.usec = 8,   .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
34  	{.usec = 32,  .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
35  	{.usec = 64,  .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,},  \
36  	{.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}   \
37  }
38  
39  #define NET_DIM_TX_CQE_PROFILES { \
40  	{.usec = 5,  .pkts = 128,},  \
41  	{.usec = 8,  .pkts = 64,},  \
42  	{.usec = 16, .pkts = 32,},  \
43  	{.usec = 32, .pkts = 32,},  \
44  	{.usec = 64, .pkts = 32,}   \
45  }
46  
47  static const struct dim_cq_moder
48  rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
49  	NET_DIM_RX_EQE_PROFILES,
50  	NET_DIM_RX_CQE_PROFILES,
51  };
52  
53  static const struct dim_cq_moder
54  tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
55  	NET_DIM_TX_EQE_PROFILES,
56  	NET_DIM_TX_CQE_PROFILES,
57  };
58  
59  struct dim_cq_moder
net_dim_get_rx_moderation(u8 cq_period_mode,int ix)60  net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
61  {
62  	struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
63  
64  	cq_moder.cq_period_mode = cq_period_mode;
65  	return cq_moder;
66  }
67  EXPORT_SYMBOL(net_dim_get_rx_moderation);
68  
69  struct dim_cq_moder
net_dim_get_def_rx_moderation(u8 cq_period_mode)70  net_dim_get_def_rx_moderation(u8 cq_period_mode)
71  {
72  	u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
73  			NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
74  
75  	return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
76  }
77  EXPORT_SYMBOL(net_dim_get_def_rx_moderation);
78  
79  struct dim_cq_moder
net_dim_get_tx_moderation(u8 cq_period_mode,int ix)80  net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
81  {
82  	struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
83  
84  	cq_moder.cq_period_mode = cq_period_mode;
85  	return cq_moder;
86  }
87  EXPORT_SYMBOL(net_dim_get_tx_moderation);
88  
89  struct dim_cq_moder
net_dim_get_def_tx_moderation(u8 cq_period_mode)90  net_dim_get_def_tx_moderation(u8 cq_period_mode)
91  {
92  	u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
93  			NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
94  
95  	return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
96  }
97  EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
98  
net_dim_init_irq_moder(struct net_device * dev,u8 profile_flags,u8 coal_flags,u8 rx_mode,u8 tx_mode,void (* rx_dim_work)(struct work_struct * work),void (* tx_dim_work)(struct work_struct * work))99  int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags,
100  			   u8 coal_flags, u8 rx_mode, u8 tx_mode,
101  			   void (*rx_dim_work)(struct work_struct *work),
102  			   void (*tx_dim_work)(struct work_struct *work))
103  {
104  	struct dim_cq_moder *rxp = NULL, *txp;
105  	struct dim_irq_moder *moder;
106  	int len;
107  
108  	dev->irq_moder = kzalloc(sizeof(*dev->irq_moder), GFP_KERNEL);
109  	if (!dev->irq_moder)
110  		return -ENOMEM;
111  
112  	moder = dev->irq_moder;
113  	len = NET_DIM_PARAMS_NUM_PROFILES * sizeof(*moder->rx_profile);
114  
115  	moder->coal_flags = coal_flags;
116  	moder->profile_flags = profile_flags;
117  
118  	if (profile_flags & DIM_PROFILE_RX) {
119  		moder->rx_dim_work = rx_dim_work;
120  		moder->dim_rx_mode = rx_mode;
121  		rxp = kmemdup(rx_profile[rx_mode], len, GFP_KERNEL);
122  		if (!rxp)
123  			goto free_moder;
124  
125  		rcu_assign_pointer(moder->rx_profile, rxp);
126  	}
127  
128  	if (profile_flags & DIM_PROFILE_TX) {
129  		moder->tx_dim_work = tx_dim_work;
130  		moder->dim_tx_mode = tx_mode;
131  		txp = kmemdup(tx_profile[tx_mode], len, GFP_KERNEL);
132  		if (!txp)
133  			goto free_rxp;
134  
135  		rcu_assign_pointer(moder->tx_profile, txp);
136  	}
137  
138  	return 0;
139  
140  free_rxp:
141  	kfree(rxp);
142  free_moder:
143  	kfree(moder);
144  	return -ENOMEM;
145  }
146  EXPORT_SYMBOL(net_dim_init_irq_moder);
147  
148  /* RTNL lock is held. */
net_dim_free_irq_moder(struct net_device * dev)149  void net_dim_free_irq_moder(struct net_device *dev)
150  {
151  	struct dim_cq_moder *rxp, *txp;
152  
153  	if (!dev->irq_moder)
154  		return;
155  
156  	rxp = rtnl_dereference(dev->irq_moder->rx_profile);
157  	txp = rtnl_dereference(dev->irq_moder->tx_profile);
158  
159  	rcu_assign_pointer(dev->irq_moder->rx_profile, NULL);
160  	rcu_assign_pointer(dev->irq_moder->tx_profile, NULL);
161  
162  	kfree_rcu(rxp, rcu);
163  	kfree_rcu(txp, rcu);
164  	kfree(dev->irq_moder);
165  }
166  EXPORT_SYMBOL(net_dim_free_irq_moder);
167  
net_dim_setting(struct net_device * dev,struct dim * dim,bool is_tx)168  void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx)
169  {
170  	struct dim_irq_moder *irq_moder = dev->irq_moder;
171  
172  	if (!irq_moder)
173  		return;
174  
175  	if (is_tx) {
176  		INIT_WORK(&dim->work, irq_moder->tx_dim_work);
177  		dim->mode = READ_ONCE(irq_moder->dim_tx_mode);
178  		return;
179  	}
180  
181  	INIT_WORK(&dim->work, irq_moder->rx_dim_work);
182  	dim->mode = READ_ONCE(irq_moder->dim_rx_mode);
183  }
184  EXPORT_SYMBOL(net_dim_setting);
185  
net_dim_work_cancel(struct dim * dim)186  void net_dim_work_cancel(struct dim *dim)
187  {
188  	cancel_work_sync(&dim->work);
189  }
190  EXPORT_SYMBOL(net_dim_work_cancel);
191  
net_dim_get_rx_irq_moder(struct net_device * dev,struct dim * dim)192  struct dim_cq_moder net_dim_get_rx_irq_moder(struct net_device *dev,
193  					     struct dim *dim)
194  {
195  	struct dim_cq_moder res, *profile;
196  
197  	rcu_read_lock();
198  	profile = rcu_dereference(dev->irq_moder->rx_profile);
199  	res = profile[dim->profile_ix];
200  	rcu_read_unlock();
201  
202  	res.cq_period_mode = dim->mode;
203  
204  	return res;
205  }
206  EXPORT_SYMBOL(net_dim_get_rx_irq_moder);
207  
net_dim_get_tx_irq_moder(struct net_device * dev,struct dim * dim)208  struct dim_cq_moder net_dim_get_tx_irq_moder(struct net_device *dev,
209  					     struct dim *dim)
210  {
211  	struct dim_cq_moder res, *profile;
212  
213  	rcu_read_lock();
214  	profile = rcu_dereference(dev->irq_moder->tx_profile);
215  	res = profile[dim->profile_ix];
216  	rcu_read_unlock();
217  
218  	res.cq_period_mode = dim->mode;
219  
220  	return res;
221  }
222  EXPORT_SYMBOL(net_dim_get_tx_irq_moder);
223  
net_dim_set_rx_mode(struct net_device * dev,u8 rx_mode)224  void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode)
225  {
226  	WRITE_ONCE(dev->irq_moder->dim_rx_mode, rx_mode);
227  }
228  EXPORT_SYMBOL(net_dim_set_rx_mode);
229  
net_dim_set_tx_mode(struct net_device * dev,u8 tx_mode)230  void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode)
231  {
232  	WRITE_ONCE(dev->irq_moder->dim_tx_mode, tx_mode);
233  }
234  EXPORT_SYMBOL(net_dim_set_tx_mode);
235  
net_dim_step(struct dim * dim)236  static int net_dim_step(struct dim *dim)
237  {
238  	if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
239  		return DIM_TOO_TIRED;
240  
241  	switch (dim->tune_state) {
242  	case DIM_PARKING_ON_TOP:
243  	case DIM_PARKING_TIRED:
244  		break;
245  	case DIM_GOING_RIGHT:
246  		if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
247  			return DIM_ON_EDGE;
248  		dim->profile_ix++;
249  		dim->steps_right++;
250  		break;
251  	case DIM_GOING_LEFT:
252  		if (dim->profile_ix == 0)
253  			return DIM_ON_EDGE;
254  		dim->profile_ix--;
255  		dim->steps_left++;
256  		break;
257  	}
258  
259  	dim->tired++;
260  	return DIM_STEPPED;
261  }
262  
net_dim_exit_parking(struct dim * dim)263  static void net_dim_exit_parking(struct dim *dim)
264  {
265  	dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT;
266  	net_dim_step(dim);
267  }
268  
net_dim_stats_compare(struct dim_stats * curr,struct dim_stats * prev)269  static int net_dim_stats_compare(struct dim_stats *curr,
270  				 struct dim_stats *prev)
271  {
272  	if (!prev->bpms)
273  		return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME;
274  
275  	if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
276  		return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER :
277  						   DIM_STATS_WORSE;
278  
279  	if (!prev->ppms)
280  		return curr->ppms ? DIM_STATS_BETTER :
281  				    DIM_STATS_SAME;
282  
283  	if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
284  		return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER :
285  						   DIM_STATS_WORSE;
286  
287  	if (!prev->epms)
288  		return DIM_STATS_SAME;
289  
290  	if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
291  		return (curr->epms < prev->epms) ? DIM_STATS_BETTER :
292  						   DIM_STATS_WORSE;
293  
294  	return DIM_STATS_SAME;
295  }
296  
net_dim_decision(struct dim_stats * curr_stats,struct dim * dim)297  static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
298  {
299  	int prev_state = dim->tune_state;
300  	int prev_ix = dim->profile_ix;
301  	int stats_res;
302  	int step_res;
303  
304  	switch (dim->tune_state) {
305  	case DIM_PARKING_ON_TOP:
306  		stats_res = net_dim_stats_compare(curr_stats,
307  						  &dim->prev_stats);
308  		if (stats_res != DIM_STATS_SAME)
309  			net_dim_exit_parking(dim);
310  		break;
311  
312  	case DIM_PARKING_TIRED:
313  		dim->tired--;
314  		if (!dim->tired)
315  			net_dim_exit_parking(dim);
316  		break;
317  
318  	case DIM_GOING_RIGHT:
319  	case DIM_GOING_LEFT:
320  		stats_res = net_dim_stats_compare(curr_stats,
321  						  &dim->prev_stats);
322  		if (stats_res != DIM_STATS_BETTER)
323  			dim_turn(dim);
324  
325  		if (dim_on_top(dim)) {
326  			dim_park_on_top(dim);
327  			break;
328  		}
329  
330  		step_res = net_dim_step(dim);
331  		switch (step_res) {
332  		case DIM_ON_EDGE:
333  			dim_park_on_top(dim);
334  			break;
335  		case DIM_TOO_TIRED:
336  			dim_park_tired(dim);
337  			break;
338  		}
339  
340  		break;
341  	}
342  
343  	if (prev_state != DIM_PARKING_ON_TOP ||
344  	    dim->tune_state != DIM_PARKING_ON_TOP)
345  		dim->prev_stats = *curr_stats;
346  
347  	return dim->profile_ix != prev_ix;
348  }
349  
net_dim(struct dim * dim,struct dim_sample end_sample)350  void net_dim(struct dim *dim, struct dim_sample end_sample)
351  {
352  	struct dim_stats curr_stats;
353  	u16 nevents;
354  
355  	switch (dim->state) {
356  	case DIM_MEASURE_IN_PROGRESS:
357  		nevents = BIT_GAP(BITS_PER_TYPE(u16),
358  				  end_sample.event_ctr,
359  				  dim->start_sample.event_ctr);
360  		if (nevents < DIM_NEVENTS)
361  			break;
362  		if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats))
363  			break;
364  		if (net_dim_decision(&curr_stats, dim)) {
365  			dim->state = DIM_APPLY_NEW_PROFILE;
366  			schedule_work(&dim->work);
367  			break;
368  		}
369  		fallthrough;
370  	case DIM_START_MEASURE:
371  		dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
372  				  end_sample.byte_ctr, &dim->start_sample);
373  		dim->state = DIM_MEASURE_IN_PROGRESS;
374  		break;
375  	case DIM_APPLY_NEW_PROFILE:
376  		break;
377  	}
378  }
379  EXPORT_SYMBOL(net_dim);
380