4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #ifndef _MAC_FLOW_IMPL_H
28 #define _MAC_FLOW_IMPL_H
34 #include <sys/param.h>
35 #include <sys/atomic.h>
36 #include <sys/ksynch.h>
37 #include <sys/mac_flow.h>
38 #include <sys/stream.h>
43 * Macros to increment/decrement the reference count on a flow_entry_t.
45 #define FLOW_REFHOLD(flent) { \
46 DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \
47 mutex_enter(&(flent)->fe_lock); \
48 (flent)->fe_refcnt++; \
49 mutex_exit(&(flent)->fe_lock); \
53 * Data paths must not attempt to use a flow entry if it is marked INCIPIENT
54 * or QUIESCE. In the former case the set up is not yet complete and the
55 * data path could stumble on inconsistent data structures. In the latter
56 * case a control operation is waiting for quiescence so that it can
57 * change callbacks or other structures without the use of locks.
59 #define FLOW_TRY_REFHOLD(flent, err) { \
60 DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent)); \
62 mutex_enter(&(flent)->fe_lock); \
63 if ((flent)->fe_flags & (FE_INCIPIENT | FE_QUIESCE | FE_CONDEMNED | \
64 FE_UF_NO_DATAPATH | FE_MC_NO_DATAPATH)) \
67 (flent)->fe_refcnt++; \
68 mutex_exit(&(flent)->fe_lock); \
71 #define FLOW_REFRELE(flent) { \
72 DTRACE_PROBE1(flow_refrele, flow_entry_t *, (flent)); \
73 mutex_enter(&(flent)->fe_lock); \
74 ASSERT((flent)->fe_refcnt != 0); \
75 (flent)->fe_refcnt--; \
76 if ((flent)->fe_flags & FE_WAITER) { \
77 ASSERT((flent)->fe_refcnt != 0); \
78 cv_signal(&(flent)->fe_cv); \
79 mutex_exit(&(flent)->fe_lock); \
80 } else if ((flent)->fe_refcnt == 0) { \
81 mac_flow_destroy(flent); \
83 mutex_exit(&(flent)->fe_lock); \
87 #define FLOW_USER_REFHOLD(flent) { \
88 mutex_enter(&(flent)->fe_lock); \
89 (flent)->fe_user_refcnt++; \
90 mutex_exit(&(flent)->fe_lock); \
93 #define FLOW_USER_REFRELE(flent) { \
94 mutex_enter(&(flent)->fe_lock); \
95 ASSERT((flent)->fe_user_refcnt != 0); \
96 if (--(flent)->fe_user_refcnt == 0 && \
97 ((flent)->fe_flags & FE_WAITER)) \
98 cv_signal(&(flent)->fe_cv); \
99 mutex_exit(&(flent)->fe_lock); \
102 #define FLOW_FINAL_REFRELE(flent) { \
103 ASSERT(flent->fe_refcnt == 1 && flent->fe_user_refcnt == 0); \
104 FLOW_REFRELE(flent); \
108 * Mark or unmark the flent with a bit flag
110 #define FLOW_MARK(flent, flag) { \
111 mutex_enter(&(flent)->fe_lock); \
112 (flent)->fe_flags |= flag; \
113 mutex_exit(&(flent)->fe_lock); \
116 #define FLOW_UNMARK(flent, flag) { \
117 mutex_enter(&(flent)->fe_lock); \
118 (flent)->fe_flags &= ~flag; \
119 mutex_exit(&(flent)->fe_lock); \
122 #define FLENT_TO_MIP(flent) \
123 (flent->fe_mbg != NULL ? mac_bcast_grp_mip(flent->fe_mbg) : \
124 ((mac_client_impl_t *)flent->fe_mcip)->mci_mip)
126 /* Convert a bandwidth expressed in bps to a number of bytes per tick. */
127 #define FLOW_BYTES_PER_TICK(bps) (((bps) >> 3) / hz)
130 * Given an underlying range and a priority level, obtain the minimum for the
133 #define FLOW_MIN_PRIORITY(min, max, pri) \
134 ((min) + ((((max) - (min)) / MRP_PRIORITY_LEVELS) * (pri)))
137 * Given an underlying range and a minimum level (base), obtain the maximum
140 #define FLOW_MAX_PRIORITY(min, max, base) \
141 ((base) + (((max) - (min)) / MRP_PRIORITY_LEVELS))
144 * Given an underlying range and a priority level, get the absolute
145 * priority value. For now there are just 3 values, high, low and
146 * medium so we can just return max, min or min + (max - min) / 2.
147 * If there are more than three we need to change this computation.
149 #define FLOW_PRIORITY(min, max, pri) \
150 (pri) == MPL_HIGH ? (max) : \
151 (pri) == MPL_LOW ? (min) : \
152 ((min) + (((max) - (min)) / 2))
154 #define MAC_FLOW_TAB_SIZE 500
156 typedef struct flow_entry_s flow_entry_t
;
157 typedef struct flow_tab_s flow_tab_t
;
158 typedef struct flow_state_s flow_state_t
;
160 struct mac_client_impl_s
;
163 * Classification flags used to lookup the flow.
165 #define FLOW_INBOUND 0x01
166 #define FLOW_OUTBOUND 0x02
167 /* Don't compare VID when classifying the packets, see mac_rx_classify() */
168 #define FLOW_IGNORE_VLAN 0x04
170 /* Generic flow client function signature */
171 typedef void (*flow_fn_t
)(void *, void *, mblk_t
*, boolean_t
);
179 /* Matches a flow_entry_t using the extracted flow_state_t info */
180 typedef boolean_t (*flow_match_fn_t
)(flow_tab_t
*, flow_entry_t
*,
184 #define FE_QUIESCE 0x01 /* Quiesce the flow */
185 #define FE_WAITER 0x02 /* Flow has a waiter */
186 #define FE_FLOW_TAB 0x04 /* Flow is in the flow tab list */
187 #define FE_G_FLOW_HASH 0x08 /* Flow is in the global flow hash */
188 #define FE_INCIPIENT 0x10 /* Being setup */
189 #define FE_CONDEMNED 0x20 /* Being deleted */
190 #define FE_UF_NO_DATAPATH 0x40 /* No datapath setup for User flow */
191 #define FE_MC_NO_DATAPATH 0x80 /* No datapath setup for mac client */
194 #define FLOW_PRIMARY_MAC 0x01 /* NIC primary MAC address */
195 #define FLOW_VNIC_MAC 0x02 /* VNIC flow */
196 #define FLOW_MCAST 0x04 /* Multicast (and broadcast) */
197 #define FLOW_OTHER 0x08 /* Other flows configured */
198 #define FLOW_USER 0x10 /* User defined flow */
199 #define FLOW_VNIC FLOW_VNIC_MAC
200 #define FLOW_NO_STATS 0x20 /* Don't create stats for the flow */
203 * Shared Bandwidth control counters between the soft ring set and its
204 * associated soft rings. In case the flow associated with NIC/VNIC
205 * has a group of Rx rings assigned to it, we have the same
206 * number of soft ring sets as we have the Rx ring in the group
207 * and each individual SRS (and its soft rings) decide when to
208 * poll their Rx ring independently. But if there is a B/W limit
209 * associated with the NIC/VNIC, then the B/W control counter is
210 * shared across all the SRS in the group and their associated
213 * There is a many to 1 mapping between the SRS and
214 * mac_bw_ctl if the flow has a group of Rx rings associated with
217 typedef struct mac_bw_ctl_s
{
218 kmutex_t mac_bw_lock
;
219 uint32_t mac_bw_state
;
220 size_t mac_bw_sz
; /* ?? Is it needed */
221 size_t mac_bw_limit
; /* Max bytes to process per tick */
222 size_t mac_bw_used
; /* Bytes processed in current tick */
223 size_t mac_bw_drop_threshold
; /* Max queue length */
224 size_t mac_bw_drop_bytes
;
225 size_t mac_bw_polled
;
227 clock_t mac_bw_curr_time
;
230 struct flow_entry_s
{ /* Protected by */
231 struct flow_entry_s
*fe_next
; /* ft_lock */
233 datalink_id_t fe_link_id
; /* WO */
235 /* Properties as specified for this flow */
236 mac_resource_props_t fe_resource_props
; /* SL */
238 /* Properties actually effective at run time for this flow */
239 mac_resource_props_t fe_effective_props
; /* SL */
242 char fe_flow_name
[MAXFLOWNAMELEN
]; /* fe_lock */
243 flow_desc_t fe_flow_desc
; /* fe_lock */
244 kcondvar_t fe_cv
; /* fe_lock */
246 * Initial flow ref is 1 on creation. A thread that lookups the
247 * flent typically by a mac_flow_lookup() dynamically holds a ref.
248 * If the ref is 1, it means there arent' any upcalls from the driver
249 * or downcalls from the stack using this flent. Structures pointing
250 * to the flent or flent inserted in lists don't count towards this
251 * refcnt. Instead they are tracked using fe_flags. Only a control
252 * thread doing a teardown operation deletes the flent, after waiting
253 * for upcalls to finish synchronously. The fe_refcnt tracks
254 * the number of upcall refs
256 uint32_t fe_refcnt
; /* fe_lock */
259 * This tracks lookups done using the global hash list for user
260 * generated flows. This refcnt only protects the flent itself
261 * from disappearing and helps walkers to read the flent info such
262 * as flow spec. However the flent may be quiesced and the SRS could
263 * be deleted. The fe_user_refcnt tracks the number of global flow
266 uint32_t fe_user_refcnt
; /* fe_lock */
267 uint_t fe_flags
; /* fe_lock */
270 * Function/args to invoke for delivering matching packets
271 * Only the function ff_fn may be changed dynamically and atomically.
272 * The ff_arg1 and ff_arg2 are set at creation time and may not
275 flow_fn_t fe_cb_fn
; /* fe_lock */
276 void *fe_cb_arg1
; /* fe_lock */
277 void *fe_cb_arg2
; /* fe_lock */
279 void *fe_client_cookie
; /* WO */
280 void *fe_rx_ring_group
; /* SL */
281 void *fe_rx_srs
[MAX_RINGS_PER_GROUP
]; /* fe_lock */
282 int fe_rx_srs_cnt
; /* fe_lock */
283 void *fe_tx_ring_group
;
284 void *fe_tx_srs
; /* WO */
288 * This is a unicast flow, and is a mac_client_impl_t
290 void *fe_mcip
; /* WO */
293 * Used by mci_flent_list of mac_client_impl_t to track flows sharing
294 * the same mac_client_impl_t.
296 struct flow_entry_s
*fe_client_next
;
299 * This is a broadcast or multicast flow and is a mac_bcast_grp_t
301 void *fe_mbg
; /* WO */
302 uint_t fe_type
; /* WO */
307 mac_bw_ctl_t fe_tx_bw
;
308 mac_bw_ctl_t fe_rx_bw
;
311 * Used by flow table lookup code
313 flow_match_fn_t fe_match
;
316 * Used by mac_flow_remove().
319 flow_tab_t
*fe_flow_tab
;
322 kstat_t
*fe_misc_stat_ksp
;
324 boolean_t fe_desc_logged
;
325 uint64_t fe_nic_speed
;
329 * Various structures used by the flows framework for keeping track
330 * of packet state information.
334 typedef struct flow_l2info_s
{
343 typedef struct flow_l3info_s
{
347 boolean_t l3_dst_or_src
;
349 boolean_t l3_fragmented
;
353 typedef struct flow_l4info_s
{
355 uint16_t l4_src_port
;
356 uint16_t l4_dst_port
;
357 uint16_t l4_hash_port
;
361 * Combined state structure.
362 * Holds flow direction and an mblk_t pointer.
364 struct flow_state_s
{
367 flow_l2info_t fs_l2info
;
368 flow_l3info_t fs_l3info
;
369 flow_l4info_t fs_l4info
;
374 * There are two groups of functions. The ones ending with _fe are
375 * called when a flow is being added. The others (hash, accept) are
376 * called at flow lookup time.
378 #define FLOW_MAX_ACCEPT 16
379 typedef struct flow_ops_s
{
382 * Validates the contents of the flow and checks whether
383 * it's compatible with the flow table. sets the fe_match
384 * function of the flow.
386 int (*fo_accept_fe
)(flow_tab_t
*, flow_entry_t
*);
389 * Generates a hash index to the flow table. This function
390 * must use the same algorithm as fo_hash(), which is used
391 * by the flow lookup code path.
393 uint32_t (*fo_hash_fe
)(flow_tab_t
*, flow_entry_t
*);
396 * This is used for finding identical flows.
398 boolean_t (*fo_match_fe
)(flow_tab_t
*, flow_entry_t
*,
402 * Used for inserting a flow to a flow chain.
403 * Protocols that have special ordering requirements would
404 * need to implement this. For those that don't,
405 * flow_generic_insert_fe() may be used.
407 int (*fo_insert_fe
)(flow_tab_t
*, flow_entry_t
**,
411 * Calculates the flow hash index based on the accumulated
412 * state in flow_state_t. Must use the same algorithm as
415 uint32_t (*fo_hash
)(flow_tab_t
*, flow_state_t
*);
418 * Array of accept fuctions.
419 * Each function in the array will accumulate enough state
420 * (header length, protocol) to allow the next function to
421 * proceed. We support up to FLOW_MAX_ACCEPT functions which
422 * should be sufficient for all practical purposes.
424 int (*fo_accept
[FLOW_MAX_ACCEPT
])(flow_tab_t
*,
429 * Generic flow table.
434 * Contains a list of functions (described above)
435 * specific to this table type.
440 * Indicates what types of flows are supported.
445 * An array of flow_entry_t * of size ft_size.
446 * Each element is the beginning of a hash chain.
448 flow_entry_t
**ft_table
;
452 * The number of flows inserted into ft_table.
454 uint_t ft_flow_count
;
455 struct mac_impl_s
*ft_mip
;
456 struct mac_client_impl_s
*ft_mcip
;
460 * This is used for describing what type of flow table can be created.
461 * mac_flow.c contains a list of these structures.
463 typedef struct flow_tab_info_s
{
465 flow_mask_t fti_mask
;
469 #define FLOW_TAB_EMPTY(ft) ((ft) == NULL || (ft)->ft_flow_count == 0)
472 #define MCIP_STAT_UPDATE(m, s, c) { \
473 ((mac_client_impl_t *)(m))->mci_misc_stat.mms_##s \
474 += ((uint64_t)(c)); \
477 #define SRS_RX_STAT_UPDATE(m, s, c) { \
478 ((mac_soft_ring_set_t *)(m))->srs_rx.sr_stat.mrs_##s \
479 += ((uint64_t)(c)); \
482 #define SRS_TX_STAT_UPDATE(m, s, c) { \
483 ((mac_soft_ring_set_t *)(m))->srs_tx.st_stat.mts_##s \
484 += ((uint64_t)(c)); \
487 #define SRS_TX_STATS_UPDATE(m, s) { \
488 SRS_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets); \
489 SRS_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes); \
490 SRS_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors); \
493 #define SOFTRING_TX_STAT_UPDATE(m, s, c) { \
494 ((mac_soft_ring_t *)(m))->s_st_stat.mts_##s += ((uint64_t)(c)); \
497 #define SOFTRING_TX_STATS_UPDATE(m, s) { \
498 SOFTRING_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets); \
499 SOFTRING_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes); \
500 SOFTRING_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors); \
503 extern void mac_flow_init();
504 extern void mac_flow_fini();
505 extern int mac_flow_create(flow_desc_t
*, mac_resource_props_t
*,
506 char *, void *, uint_t
, flow_entry_t
**);
508 extern int mac_flow_add(flow_tab_t
*, flow_entry_t
*);
509 extern int mac_flow_add_subflow(mac_client_handle_t
, flow_entry_t
*,
511 extern int mac_flow_hash_add(flow_entry_t
*);
512 extern int mac_flow_lookup_byname(char *, flow_entry_t
**);
513 extern int mac_flow_lookup(flow_tab_t
*, mblk_t
*, uint_t
,
516 extern int mac_flow_walk(flow_tab_t
*, int (*)(flow_entry_t
*, void *),
519 extern int mac_flow_walk_nolock(flow_tab_t
*,
520 int (*)(flow_entry_t
*, void *), void *);
522 extern void mac_flow_modify(flow_tab_t
*, flow_entry_t
*,
523 mac_resource_props_t
*);
525 extern void *mac_flow_get_client_cookie(flow_entry_t
*);
527 extern uint32_t mac_flow_modify_props(flow_entry_t
*, mac_resource_props_t
*);
529 extern int mac_flow_update(flow_tab_t
*, flow_entry_t
*, flow_desc_t
*);
530 extern void mac_flow_get_desc(flow_entry_t
*, flow_desc_t
*);
531 extern void mac_flow_set_desc(flow_entry_t
*, flow_desc_t
*);
533 extern void mac_flow_remove(flow_tab_t
*, flow_entry_t
*, boolean_t
);
534 extern void mac_flow_hash_remove(flow_entry_t
*);
535 extern void mac_flow_wait(flow_entry_t
*, mac_flow_state_t
);
536 extern void mac_flow_quiesce(flow_entry_t
*);
537 extern void mac_flow_restart(flow_entry_t
*);
538 extern void mac_flow_cleanup(flow_entry_t
*);
539 extern void mac_flow_destroy(flow_entry_t
*);
541 extern void mac_flow_tab_create(flow_ops_t
*, flow_mask_t
, uint_t
,
542 struct mac_impl_s
*, flow_tab_t
**);
543 extern void mac_flow_l2tab_create(struct mac_impl_s
*, flow_tab_t
**);
544 extern void mac_flow_tab_destroy(flow_tab_t
*);
545 extern void mac_flow_drop(void *, void *, mblk_t
*);
546 extern void flow_stat_destroy(flow_entry_t
*);
552 #endif /* _MAC_FLOW_IMPL_H */