uts: make emu10k non-verbose
[unleashed.git] / include / sys / mac_flow_impl.h
blob307e06c1bf772754f827507f343f780ef9c7a64f
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #ifndef _MAC_FLOW_IMPL_H
28 #define _MAC_FLOW_IMPL_H
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
34 #include <sys/param.h>
35 #include <sys/atomic.h>
36 #include <sys/ksynch.h>
37 #include <sys/mac_flow.h>
38 #include <sys/stream.h>
39 #include <sys/sdt.h>
40 #include <net/if.h>
43 * Macros to increment/decrement the reference count on a flow_entry_t.
45 #define FLOW_REFHOLD(flent) { \
46 DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \
47 mutex_enter(&(flent)->fe_lock); \
48 (flent)->fe_refcnt++; \
49 mutex_exit(&(flent)->fe_lock); \
53 * Data paths must not attempt to use a flow entry if it is marked INCIPIENT
54 * or QUIESCE. In the former case the set up is not yet complete and the
55 * data path could stumble on inconsistent data structures. In the latter
56 * case a control operation is waiting for quiescence so that it can
57 * change callbacks or other structures without the use of locks.
59 #define FLOW_TRY_REFHOLD(flent, err) { \
60 DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \
61 (err) = 0; \
62 mutex_enter(&(flent)->fe_lock); \
63 if ((flent)->fe_flags & (FE_INCIPIENT | FE_QUIESCE | FE_CONDEMNED | \
64 FE_UF_NO_DATAPATH | FE_MC_NO_DATAPATH)) \
65 (err) = -1; \
66 else \
67 (flent)->fe_refcnt++; \
68 mutex_exit(&(flent)->fe_lock); \
71 #define FLOW_REFRELE(flent) { \
72 DTRACE_PROBE1(flow_refrele, flow_entry_t *, (flent)); \
73 mutex_enter(&(flent)->fe_lock); \
74 ASSERT((flent)->fe_refcnt != 0); \
75 (flent)->fe_refcnt--; \
76 if ((flent)->fe_flags & FE_WAITER) { \
77 ASSERT((flent)->fe_refcnt != 0); \
78 cv_signal(&(flent)->fe_cv); \
79 mutex_exit(&(flent)->fe_lock); \
80 } else if ((flent)->fe_refcnt == 0) { \
81 mac_flow_destroy(flent); \
82 } else { \
83 mutex_exit(&(flent)->fe_lock); \
84 } \
87 #define FLOW_USER_REFHOLD(flent) { \
88 mutex_enter(&(flent)->fe_lock); \
89 (flent)->fe_user_refcnt++; \
90 mutex_exit(&(flent)->fe_lock); \
93 #define FLOW_USER_REFRELE(flent) { \
94 mutex_enter(&(flent)->fe_lock); \
95 ASSERT((flent)->fe_user_refcnt != 0); \
96 if (--(flent)->fe_user_refcnt == 0 && \
97 ((flent)->fe_flags & FE_WAITER)) \
98 cv_signal(&(flent)->fe_cv); \
99 mutex_exit(&(flent)->fe_lock); \
102 #define FLOW_FINAL_REFRELE(flent) { \
103 ASSERT(flent->fe_refcnt == 1 && flent->fe_user_refcnt == 0); \
104 FLOW_REFRELE(flent); \
108 * Mark or unmark the flent with a bit flag
110 #define FLOW_MARK(flent, flag) { \
111 mutex_enter(&(flent)->fe_lock); \
112 (flent)->fe_flags |= flag; \
113 mutex_exit(&(flent)->fe_lock); \
116 #define FLOW_UNMARK(flent, flag) { \
117 mutex_enter(&(flent)->fe_lock); \
118 (flent)->fe_flags &= ~flag; \
119 mutex_exit(&(flent)->fe_lock); \
122 #define FLENT_TO_MIP(flent) \
123 (flent->fe_mbg != NULL ? mac_bcast_grp_mip(flent->fe_mbg) : \
124 ((mac_client_impl_t *)flent->fe_mcip)->mci_mip)
126 /* Convert a bandwidth expressed in bps to a number of bytes per tick. */
127 #define FLOW_BYTES_PER_TICK(bps) (((bps) >> 3) / hz)
130 * Given an underlying range and a priority level, obtain the minimum for the
131 * new range.
133 #define FLOW_MIN_PRIORITY(min, max, pri) \
134 ((min) + ((((max) - (min)) / MRP_PRIORITY_LEVELS) * (pri)))
137 * Given an underlying range and a minimum level (base), obtain the maximum
138 * for the new range.
140 #define FLOW_MAX_PRIORITY(min, max, base) \
141 ((base) + (((max) - (min)) / MRP_PRIORITY_LEVELS))
144 * Given an underlying range and a priority level, get the absolute
145 * priority value. For now there are just 3 values, high, low and
146 * medium so we can just return max, min or min + (max - min) / 2.
147 * If there are more than three we need to change this computation.
149 #define FLOW_PRIORITY(min, max, pri) \
150 (pri) == MPL_HIGH ? (max) : \
151 (pri) == MPL_LOW ? (min) : \
152 ((min) + (((max) - (min)) / 2))
154 #define MAC_FLOW_TAB_SIZE 500
156 typedef struct flow_entry_s flow_entry_t;
157 typedef struct flow_tab_s flow_tab_t;
158 typedef struct flow_state_s flow_state_t;
159 struct mac_impl_s;
160 struct mac_client_impl_s;
163 * Classification flags used to lookup the flow.
165 #define FLOW_INBOUND 0x01
166 #define FLOW_OUTBOUND 0x02
167 /* Don't compare VID when classifying the packets, see mac_rx_classify() */
168 #define FLOW_IGNORE_VLAN 0x04
170 /* Generic flow client function signature */
171 typedef void (*flow_fn_t)(void *, void *, mblk_t *, boolean_t);
173 /* Flow state */
174 typedef enum {
175 FLOW_DRIVER_UPCALL,
176 FLOW_USER_REF
177 } mac_flow_state_t;
179 /* Matches a flow_entry_t using the extracted flow_state_t info */
180 typedef boolean_t (*flow_match_fn_t)(flow_tab_t *, flow_entry_t *,
181 flow_state_t *);
183 /* fe_flags */
184 #define FE_QUIESCE 0x01 /* Quiesce the flow */
185 #define FE_WAITER 0x02 /* Flow has a waiter */
186 #define FE_FLOW_TAB 0x04 /* Flow is in the flow tab list */
187 #define FE_G_FLOW_HASH 0x08 /* Flow is in the global flow hash */
188 #define FE_INCIPIENT 0x10 /* Being setup */
189 #define FE_CONDEMNED 0x20 /* Being deleted */
190 #define FE_UF_NO_DATAPATH 0x40 /* No datapath setup for User flow */
191 #define FE_MC_NO_DATAPATH 0x80 /* No datapath setup for mac client */
193 /* fe_type */
194 #define FLOW_PRIMARY_MAC 0x01 /* NIC primary MAC address */
195 #define FLOW_VNIC_MAC 0x02 /* VNIC flow */
196 #define FLOW_MCAST 0x04 /* Multicast (and broadcast) */
197 #define FLOW_OTHER 0x08 /* Other flows configured */
198 #define FLOW_USER 0x10 /* User defined flow */
199 #define FLOW_VNIC FLOW_VNIC_MAC
200 #define FLOW_NO_STATS 0x20 /* Don't create stats for the flow */
203 * Shared Bandwidth control counters between the soft ring set and its
204 * associated soft rings. In case the flow associated with NIC/VNIC
205 * has a group of Rx rings assigned to it, we have the same
206 * number of soft ring sets as we have the Rx ring in the group
207 * and each individual SRS (and its soft rings) decide when to
208 * poll their Rx ring independently. But if there is a B/W limit
209 * associated with the NIC/VNIC, then the B/W control counter is
210 * shared across all the SRS in the group and their associated
211 * soft rings.
213 * There is a many to 1 mapping between the SRS and
214 * mac_bw_ctl if the flow has a group of Rx rings associated with
215 * it.
217 typedef struct mac_bw_ctl_s {
218 kmutex_t mac_bw_lock;
219 uint32_t mac_bw_state;
220 size_t mac_bw_sz; /* ?? Is it needed */
221 size_t mac_bw_limit; /* Max bytes to process per tick */
222 size_t mac_bw_used; /* Bytes processed in current tick */
223 size_t mac_bw_drop_threshold; /* Max queue length */
224 size_t mac_bw_drop_bytes;
225 size_t mac_bw_polled;
226 size_t mac_bw_intr;
227 clock_t mac_bw_curr_time;
228 } mac_bw_ctl_t;
230 struct flow_entry_s { /* Protected by */
231 struct flow_entry_s *fe_next; /* ft_lock */
233 datalink_id_t fe_link_id; /* WO */
235 /* Properties as specified for this flow */
236 mac_resource_props_t fe_resource_props; /* SL */
238 /* Properties actually effective at run time for this flow */
239 mac_resource_props_t fe_effective_props; /* SL */
241 kmutex_t fe_lock;
242 char fe_flow_name[MAXFLOWNAMELEN]; /* fe_lock */
243 flow_desc_t fe_flow_desc; /* fe_lock */
244 kcondvar_t fe_cv; /* fe_lock */
246 * Initial flow ref is 1 on creation. A thread that lookups the
247 * flent typically by a mac_flow_lookup() dynamically holds a ref.
248 * If the ref is 1, it means there arent' any upcalls from the driver
249 * or downcalls from the stack using this flent. Structures pointing
250 * to the flent or flent inserted in lists don't count towards this
251 * refcnt. Instead they are tracked using fe_flags. Only a control
252 * thread doing a teardown operation deletes the flent, after waiting
253 * for upcalls to finish synchronously. The fe_refcnt tracks
254 * the number of upcall refs
256 uint32_t fe_refcnt; /* fe_lock */
259 * This tracks lookups done using the global hash list for user
260 * generated flows. This refcnt only protects the flent itself
261 * from disappearing and helps walkers to read the flent info such
262 * as flow spec. However the flent may be quiesced and the SRS could
263 * be deleted. The fe_user_refcnt tracks the number of global flow
264 * has refs.
266 uint32_t fe_user_refcnt; /* fe_lock */
267 uint_t fe_flags; /* fe_lock */
270 * Function/args to invoke for delivering matching packets
271 * Only the function ff_fn may be changed dynamically and atomically.
272 * The ff_arg1 and ff_arg2 are set at creation time and may not
273 * be changed.
275 flow_fn_t fe_cb_fn; /* fe_lock */
276 void *fe_cb_arg1; /* fe_lock */
277 void *fe_cb_arg2; /* fe_lock */
279 void *fe_client_cookie; /* WO */
280 void *fe_rx_ring_group; /* SL */
281 void *fe_rx_srs[MAX_RINGS_PER_GROUP]; /* fe_lock */
282 int fe_rx_srs_cnt; /* fe_lock */
283 void *fe_tx_ring_group;
284 void *fe_tx_srs; /* WO */
285 int fe_tx_ring_cnt;
288 * This is a unicast flow, and is a mac_client_impl_t
290 void *fe_mcip; /* WO */
293 * Used by mci_flent_list of mac_client_impl_t to track flows sharing
294 * the same mac_client_impl_t.
296 struct flow_entry_s *fe_client_next;
299 * This is a broadcast or multicast flow and is a mac_bcast_grp_t
301 void *fe_mbg; /* WO */
302 uint_t fe_type; /* WO */
305 * BW control info.
307 mac_bw_ctl_t fe_tx_bw;
308 mac_bw_ctl_t fe_rx_bw;
311 * Used by flow table lookup code
313 flow_match_fn_t fe_match;
316 * Used by mac_flow_remove().
318 int fe_index;
319 flow_tab_t *fe_flow_tab;
321 kstat_t *fe_ksp;
322 kstat_t *fe_misc_stat_ksp;
324 boolean_t fe_desc_logged;
325 uint64_t fe_nic_speed;
329 * Various structures used by the flows framework for keeping track
330 * of packet state information.
333 /* Layer 2 */
334 typedef struct flow_l2info_s {
335 uchar_t *l2_start;
336 uint8_t *l2_daddr;
337 uint16_t l2_vid;
338 uint32_t l2_sap;
339 uint_t l2_hdrsize;
340 } flow_l2info_t;
342 /* Layer 3 */
343 typedef struct flow_l3info_s {
344 uchar_t *l3_start;
345 uint8_t l3_protocol;
346 uint8_t l3_version;
347 boolean_t l3_dst_or_src;
348 uint_t l3_hdrsize;
349 boolean_t l3_fragmented;
350 } flow_l3info_t;
352 /* Layer 4 */
353 typedef struct flow_l4info_s {
354 uchar_t *l4_start;
355 uint16_t l4_src_port;
356 uint16_t l4_dst_port;
357 uint16_t l4_hash_port;
358 } flow_l4info_t;
361 * Combined state structure.
362 * Holds flow direction and an mblk_t pointer.
364 struct flow_state_s {
365 uint_t fs_flags;
366 mblk_t *fs_mp;
367 flow_l2info_t fs_l2info;
368 flow_l3info_t fs_l3info;
369 flow_l4info_t fs_l4info;
373 * Flow ops vector.
374 * There are two groups of functions. The ones ending with _fe are
375 * called when a flow is being added. The others (hash, accept) are
376 * called at flow lookup time.
378 #define FLOW_MAX_ACCEPT 16
379 typedef struct flow_ops_s {
381 * fo_accept_fe():
382 * Validates the contents of the flow and checks whether
383 * it's compatible with the flow table. sets the fe_match
384 * function of the flow.
386 int (*fo_accept_fe)(flow_tab_t *, flow_entry_t *);
388 * fo_hash_fe():
389 * Generates a hash index to the flow table. This function
390 * must use the same algorithm as fo_hash(), which is used
391 * by the flow lookup code path.
393 uint32_t (*fo_hash_fe)(flow_tab_t *, flow_entry_t *);
395 * fo_match_fe():
396 * This is used for finding identical flows.
398 boolean_t (*fo_match_fe)(flow_tab_t *, flow_entry_t *,
399 flow_entry_t *);
401 * fo_insert_fe():
402 * Used for inserting a flow to a flow chain.
403 * Protocols that have special ordering requirements would
404 * need to implement this. For those that don't,
405 * flow_generic_insert_fe() may be used.
407 int (*fo_insert_fe)(flow_tab_t *, flow_entry_t **,
408 flow_entry_t *);
411 * Calculates the flow hash index based on the accumulated
412 * state in flow_state_t. Must use the same algorithm as
413 * fo_hash_fe().
415 uint32_t (*fo_hash)(flow_tab_t *, flow_state_t *);
418 * Array of accept fuctions.
419 * Each function in the array will accumulate enough state
420 * (header length, protocol) to allow the next function to
421 * proceed. We support up to FLOW_MAX_ACCEPT functions which
422 * should be sufficient for all practical purposes.
424 int (*fo_accept[FLOW_MAX_ACCEPT])(flow_tab_t *,
425 flow_state_t *);
426 } flow_ops_t;
429 * Generic flow table.
431 struct flow_tab_s {
432 krwlock_t ft_lock;
434 * Contains a list of functions (described above)
435 * specific to this table type.
437 flow_ops_t ft_ops;
440 * Indicates what types of flows are supported.
442 flow_mask_t ft_mask;
445 * An array of flow_entry_t * of size ft_size.
446 * Each element is the beginning of a hash chain.
448 flow_entry_t **ft_table;
449 uint_t ft_size;
452 * The number of flows inserted into ft_table.
454 uint_t ft_flow_count;
455 struct mac_impl_s *ft_mip;
456 struct mac_client_impl_s *ft_mcip;
460 * This is used for describing what type of flow table can be created.
461 * mac_flow.c contains a list of these structures.
463 typedef struct flow_tab_info_s {
464 flow_ops_t *fti_ops;
465 flow_mask_t fti_mask;
466 uint_t fti_size;
467 } flow_tab_info_t;
469 #define FLOW_TAB_EMPTY(ft) ((ft) == NULL || (ft)->ft_flow_count == 0)
472 #define MCIP_STAT_UPDATE(m, s, c) { \
473 ((mac_client_impl_t *)(m))->mci_misc_stat.mms_##s \
474 += ((uint64_t)(c)); \
477 #define SRS_RX_STAT_UPDATE(m, s, c) { \
478 ((mac_soft_ring_set_t *)(m))->srs_rx.sr_stat.mrs_##s \
479 += ((uint64_t)(c)); \
482 #define SRS_TX_STAT_UPDATE(m, s, c) { \
483 ((mac_soft_ring_set_t *)(m))->srs_tx.st_stat.mts_##s \
484 += ((uint64_t)(c)); \
487 #define SRS_TX_STATS_UPDATE(m, s) { \
488 SRS_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets); \
489 SRS_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes); \
490 SRS_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors); \
493 #define SOFTRING_TX_STAT_UPDATE(m, s, c) { \
494 ((mac_soft_ring_t *)(m))->s_st_stat.mts_##s += ((uint64_t)(c)); \
497 #define SOFTRING_TX_STATS_UPDATE(m, s) { \
498 SOFTRING_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets); \
499 SOFTRING_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes); \
500 SOFTRING_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors); \
503 extern void mac_flow_init();
504 extern void mac_flow_fini();
505 extern int mac_flow_create(flow_desc_t *, mac_resource_props_t *,
506 char *, void *, uint_t, flow_entry_t **);
508 extern int mac_flow_add(flow_tab_t *, flow_entry_t *);
509 extern int mac_flow_add_subflow(mac_client_handle_t, flow_entry_t *,
510 boolean_t);
511 extern int mac_flow_hash_add(flow_entry_t *);
512 extern int mac_flow_lookup_byname(char *, flow_entry_t **);
513 extern int mac_flow_lookup(flow_tab_t *, mblk_t *, uint_t,
514 flow_entry_t **);
516 extern int mac_flow_walk(flow_tab_t *, int (*)(flow_entry_t *, void *),
517 void *);
519 extern int mac_flow_walk_nolock(flow_tab_t *,
520 int (*)(flow_entry_t *, void *), void *);
522 extern void mac_flow_modify(flow_tab_t *, flow_entry_t *,
523 mac_resource_props_t *);
525 extern void *mac_flow_get_client_cookie(flow_entry_t *);
527 extern uint32_t mac_flow_modify_props(flow_entry_t *, mac_resource_props_t *);
529 extern int mac_flow_update(flow_tab_t *, flow_entry_t *, flow_desc_t *);
530 extern void mac_flow_get_desc(flow_entry_t *, flow_desc_t *);
531 extern void mac_flow_set_desc(flow_entry_t *, flow_desc_t *);
533 extern void mac_flow_remove(flow_tab_t *, flow_entry_t *, boolean_t);
534 extern void mac_flow_hash_remove(flow_entry_t *);
535 extern void mac_flow_wait(flow_entry_t *, mac_flow_state_t);
536 extern void mac_flow_quiesce(flow_entry_t *);
537 extern void mac_flow_restart(flow_entry_t *);
538 extern void mac_flow_cleanup(flow_entry_t *);
539 extern void mac_flow_destroy(flow_entry_t *);
541 extern void mac_flow_tab_create(flow_ops_t *, flow_mask_t, uint_t,
542 struct mac_impl_s *, flow_tab_t **);
543 extern void mac_flow_l2tab_create(struct mac_impl_s *, flow_tab_t **);
544 extern void mac_flow_tab_destroy(flow_tab_t *);
545 extern void mac_flow_drop(void *, void *, mblk_t *);
546 extern void flow_stat_destroy(flow_entry_t *);
548 #ifdef __cplusplus
550 #endif
552 #endif /* _MAC_FLOW_IMPL_H */