xref: /openbmc/linux/include/linux/dim.h (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1  /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
2  /* Copyright (c) 2019 Mellanox Technologies. */
3  
4  #ifndef DIM_H
5  #define DIM_H
6  
7  #include <linux/bits.h>
8  #include <linux/kernel.h>
9  #include <linux/module.h>
10  #include <linux/types.h>
11  #include <linux/workqueue.h>
12  
13  /*
14   * Number of events between DIM iterations.
15   * Causes a moderation of the algorithm run.
16   */
17  #define DIM_NEVENTS 64
18  
19  /*
20   * Is a difference between values justifies taking an action.
21   * We consider 10% difference as significant.
22   */
23  #define IS_SIGNIFICANT_DIFF(val, ref) \
24  	((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10))
25  
26  /*
27   * Calculate the gap between two values.
28   * Take wrap-around and variable size into consideration.
29   */
30  #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \
31  		& (BIT_ULL(bits) - 1))
32  
33  /**
34   * struct dim_cq_moder - Structure for CQ moderation values.
35   * Used for communications between DIM and its consumer.
36   *
37   * @usec: CQ timer suggestion (by DIM)
38   * @pkts: CQ packet counter suggestion (by DIM)
39   * @comps: Completion counter
40   * @cq_period_mode: CQ period count mode (from CQE/EQE)
41   */
42  struct dim_cq_moder {
43  	u16 usec;
44  	u16 pkts;
45  	u16 comps;
46  	u8 cq_period_mode;
47  };
48  
49  /**
50   * struct dim_sample - Structure for DIM sample data.
51   * Used for communications between DIM and its consumer.
52   *
53   * @time: Sample timestamp
54   * @pkt_ctr: Number of packets
55   * @byte_ctr: Number of bytes
56   * @event_ctr: Number of events
57   * @comp_ctr: Current completion counter
58   */
59  struct dim_sample {
60  	ktime_t time;
61  	u32 pkt_ctr;
62  	u32 byte_ctr;
63  	u16 event_ctr;
64  	u32 comp_ctr;
65  };
66  
67  /**
68   * struct dim_stats - Structure for DIM stats.
69   * Used for holding current measured rates.
70   *
71   * @ppms: Packets per msec
72   * @bpms: Bytes per msec
73   * @epms: Events per msec
74   * @cpms: Completions per msec
75   * @cpe_ratio: Ratio of completions to events
76   */
77  struct dim_stats {
78  	int ppms; /* packets per msec */
79  	int bpms; /* bytes per msec */
80  	int epms; /* events per msec */
81  	int cpms; /* completions per msec */
82  	int cpe_ratio; /* ratio of completions to events */
83  };
84  
85  /**
86   * struct dim - Main structure for dynamic interrupt moderation (DIM).
87   * Used for holding all information about a specific DIM instance.
88   *
89   * @state: Algorithm state (see below)
90   * @prev_stats: Measured rates from previous iteration (for comparison)
91   * @start_sample: Sampled data at start of current iteration
92   * @measuring_sample: A &dim_sample that is used to update the current events
93   * @work: Work to perform on action required
94   * @priv: A pointer to the struct that points to dim
95   * @profile_ix: Current moderation profile
96   * @mode: CQ period count mode
97   * @tune_state: Algorithm tuning state (see below)
98   * @steps_right: Number of steps taken towards higher moderation
99   * @steps_left: Number of steps taken towards lower moderation
100   * @tired: Parking depth counter
101   */
102  struct dim {
103  	u8 state;
104  	struct dim_stats prev_stats;
105  	struct dim_sample start_sample;
106  	struct dim_sample measuring_sample;
107  	struct work_struct work;
108  	void *priv;
109  	u8 profile_ix;
110  	u8 mode;
111  	u8 tune_state;
112  	u8 steps_right;
113  	u8 steps_left;
114  	u8 tired;
115  };
116  
117  /**
118   * enum dim_cq_period_mode - Modes for CQ period count
119   *
120   * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE
121   * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset)
122   * @DIM_CQ_PERIOD_NUM_MODES: Number of modes
123   */
124  enum dim_cq_period_mode {
125  	DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
126  	DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
127  	DIM_CQ_PERIOD_NUM_MODES
128  };
129  
130  /**
131   * enum dim_state - DIM algorithm states
132   *
133   * These will determine if the algorithm is in a valid state to start an iteration.
134   *
135   * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile)
136   * @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if
137   * need to perform an action
138   * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure
139   */
140  enum dim_state {
141  	DIM_START_MEASURE,
142  	DIM_MEASURE_IN_PROGRESS,
143  	DIM_APPLY_NEW_PROFILE,
144  };
145  
146  /**
147   * enum dim_tune_state - DIM algorithm tune states
148   *
149   * These will determine which action the algorithm should perform.
150   *
151   * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference
152   * @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0
153   * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels
154   * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels
155   */
156  enum dim_tune_state {
157  	DIM_PARKING_ON_TOP,
158  	DIM_PARKING_TIRED,
159  	DIM_GOING_RIGHT,
160  	DIM_GOING_LEFT,
161  };
162  
163  /**
164   * enum dim_stats_state - DIM algorithm statistics states
165   *
166   * These will determine the verdict of current iteration.
167   *
168   * @DIM_STATS_WORSE: Current iteration shows worse performance than before
169   * @DIM_STATS_SAME:  Current iteration shows same performance than before
170   * @DIM_STATS_BETTER: Current iteration shows better performance than before
171   */
172  enum dim_stats_state {
173  	DIM_STATS_WORSE,
174  	DIM_STATS_SAME,
175  	DIM_STATS_BETTER,
176  };
177  
178  /**
179   * enum dim_step_result - DIM algorithm step results
180   *
181   * These describe the result of a step.
182   *
183   * @DIM_STEPPED: Performed a regular step
184   * @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to
185   * tired parking
186   * @DIM_ON_EDGE: Stepped to the most left/right profile
187   */
188  enum dim_step_result {
189  	DIM_STEPPED,
190  	DIM_TOO_TIRED,
191  	DIM_ON_EDGE,
192  };
193  
194  /**
195   *	dim_on_top - check if current state is a good place to stop (top location)
196   *	@dim: DIM context
197   *
198   * Check if current profile is a good place to park at.
199   * This will result in reducing the DIM checks frequency as we assume we
200   * shouldn't probably change profiles, unless traffic pattern wasn't changed.
201   */
202  bool dim_on_top(struct dim *dim);
203  
204  /**
205   *	dim_turn - change profile altering direction
206   *	@dim: DIM context
207   *
208   * Go left if we were going right and vice-versa.
209   * Do nothing if currently parking.
210   */
211  void dim_turn(struct dim *dim);
212  
213  /**
214   *	dim_park_on_top - enter a parking state on a top location
215   *	@dim: DIM context
216   *
217   * Enter parking state.
218   * Clear all movement history.
219   */
220  void dim_park_on_top(struct dim *dim);
221  
222  /**
223   *	dim_park_tired - enter a tired parking state
224   *	@dim: DIM context
225   *
226   * Enter parking state.
227   * Clear all movement history and cause DIM checks frequency to reduce.
228   */
229  void dim_park_tired(struct dim *dim);
230  
231  /**
232   *	dim_calc_stats - calculate the difference between two samples
233   *	@start: start sample
234   *	@end: end sample
235   *	@curr_stats: delta between samples
236   *
237   * Calculate the delta between two samples (in data rates).
238   * Takes into consideration counter wrap-around.
239   * Returned boolean indicates whether curr_stats are reliable.
240   */
241  bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
242  		    struct dim_stats *curr_stats);
243  
244  /**
245   *	dim_update_sample - set a sample's fields with given values
246   *	@event_ctr: number of events to set
247   *	@packets: number of packets to set
248   *	@bytes: number of bytes to set
249   *	@s: DIM sample
250   */
251  static inline void
dim_update_sample(u16 event_ctr,u64 packets,u64 bytes,struct dim_sample * s)252  dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s)
253  {
254  	s->time	     = ktime_get();
255  	s->pkt_ctr   = packets;
256  	s->byte_ctr  = bytes;
257  	s->event_ctr = event_ctr;
258  }
259  
260  /**
261   *	dim_update_sample_with_comps - set a sample's fields with given
262   *	values including the completion parameter
263   *	@event_ctr: number of events to set
264   *	@packets: number of packets to set
265   *	@bytes: number of bytes to set
266   *	@comps: number of completions to set
267   *	@s: DIM sample
268   */
269  static inline void
dim_update_sample_with_comps(u16 event_ctr,u64 packets,u64 bytes,u64 comps,struct dim_sample * s)270  dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps,
271  			     struct dim_sample *s)
272  {
273  	dim_update_sample(event_ctr, packets, bytes, s);
274  	s->comp_ctr = comps;
275  }
276  
277  /* Net DIM */
278  
279  /**
280   *	net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile
281   *	@cq_period_mode: CQ period mode
282   *	@ix: Profile index
283   */
284  struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix);
285  
286  /**
287   *	net_dim_get_def_rx_moderation - provide the default RX moderation
288   *	@cq_period_mode: CQ period mode
289   */
290  struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode);
291  
292  /**
293   *	net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile
294   *	@cq_period_mode: CQ period mode
295   *	@ix: Profile index
296   */
297  struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix);
298  
299  /**
300   *	net_dim_get_def_tx_moderation - provide the default TX moderation
301   *	@cq_period_mode: CQ period mode
302   */
303  struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
304  
305  /**
306   *	net_dim - main DIM algorithm entry point
307   *	@dim: DIM instance information
308   *	@end_sample: Current data measurement
309   *
310   * Called by the consumer.
311   * This is the main logic of the algorithm, where data is processed in order
312   * to decide on next required action.
313   */
314  void net_dim(struct dim *dim, struct dim_sample end_sample);
315  
316  /* RDMA DIM */
317  
318  /*
319   * RDMA DIM profile:
320   * profile size must be of RDMA_DIM_PARAMS_NUM_PROFILES.
321   */
322  #define RDMA_DIM_PARAMS_NUM_PROFILES 9
323  #define RDMA_DIM_START_PROFILE 0
324  
325  /**
326   * rdma_dim - Runs the adaptive moderation.
327   * @dim: The moderation struct.
328   * @completions: The number of completions collected in this round.
329   *
330   * Each call to rdma_dim takes the latest amount of completions that
331   * have been collected and counts them as a new event.
332   * Once enough events have been collected the algorithm decides a new
333   * moderation level.
334   */
335  void rdma_dim(struct dim *dim, u64 completions);
336  
337  #endif /* DIM_H */
338