1 /* 2 * Copyright 2021 Google LLC 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef PLATFORMS_NEMORA_PORTABLE_NCSI_FSM_H_ 18 #define PLATFORMS_NEMORA_PORTABLE_NCSI_FSM_H_ 19 20 /* Nemora NC-SI (Finite) State Machine implementation */ 21 22 #include <stdint.h> 23 #include <stdbool.h> 24 25 #include "platforms/nemora/portable/ncsi.h" 26 #include "platforms/nemora/portable/net_types.h" 27 28 /* TODO put this into config somewhere? */ 29 #define NCSI_FSM_RESTART_DELAY_COUNT 100 30 #define NCSI_FSM_RETEST_DELAY_COUNT 100 31 32 /* The network state is defined as a combination of the NC-SI connection state 33 * and the network configuration. However the two cannot be decoupled: 34 * - we cannot DHCP unless the NC-SI connection is up 35 * - we cannot do the OEM L3/L4 NC-SI configuration unless we have a valid 36 * network configuration 37 * 38 * For additional complexity we cannot get DHCP/ARP responses after the host 39 * has loaded the Mellanox NIC driver but we want to be able to periodically 40 * test the NC-SI connection regardless of whether we have network configuration 41 * (so that flaky cables can be troubleshooted using the host interface). 42 * 43 * For this reason there are actually 3 NC-SI finite state machines: 44 * - L2 configuration (i.e. enabling all available NC-SI channel for passthrough 45 * RX and TX, although only TX will work after the host loads the NIC driver) 46 * - L3/L4 configuration (i.e. configuring flow steering for RX traffic that 47 * matches our IP address and dedicated Nemora port so that we can receive 48 * Nemora requests even after the host loaded the NIC driver) 49 * - Connection test (i.e. periodically doing a ping test between the EC and the 50 * NIC) and also ensuring that L3/L4 configuration parameters have not been 51 * wiped out) 52 * 53 * For good karma, try to keep the state machines as linear as possible (one 54 * step after the other). 55 */ 56 57 typedef enum { 58 // First 59 NCSI_STATE_L2_CONFIG_BEGIN, 60 // Actual sequence 61 NCSI_STATE_RESTART = NCSI_STATE_L2_CONFIG_BEGIN, 62 NCSI_STATE_CLEAR_0, 63 NCSI_STATE_CLEAR_0_RESPONSE, 64 NCSI_STATE_GET_VERSION, 65 NCSI_STATE_GET_VERSION_RESPONSE, 66 NCSI_STATE_GET_CAPABILITIES, 67 NCSI_STATE_GET_CAPABILITIES_RESPONSE, 68 NCSI_STATE_CLEAR_1, 69 NCSI_STATE_CLEAR_1_RESPONSE, 70 NCSI_STATE_RESET_CHANNEL_0, 71 NCSI_STATE_RESET_CHANNEL_0_RESPONSE, 72 NCSI_STATE_RESET_CHANNEL_1, 73 NCSI_STATE_RESET_CHANNEL_1_RESPONSE, 74 NCSI_STATE_STOPPED, 75 NCSI_STATE_GET_MAC, 76 NCSI_STATE_GET_MAC_RESPONSE, 77 NCSI_STATE_SET_MAC_FILTER_0, 78 NCSI_STATE_SET_MAC_FILTER_0_RESPONSE, 79 NCSI_STATE_SET_MAC_FILTER_1, 80 NCSI_STATE_SET_MAC_FILTER_1_RESPONSE, 81 NCSI_STATE_ENABLE_CHANNEL_0, 82 NCSI_STATE_ENABLE_CHANNEL_0_RESPONSE, 83 NCSI_STATE_ENABLE_CHANNEL_1, 84 NCSI_STATE_ENABLE_CHANNEL_1_RESPONSE, 85 NCSI_STATE_ENABLE_TX, 86 NCSI_STATE_ENABLE_TX_RESPONSE, 87 // Last 88 NCSI_STATE_L2_CONFIG_END 89 } ncsi_l2_config_state_t; 90 91 typedef enum { 92 // First 93 NCSI_STATE_L3L4_CONFIG_BEGIN, 94 // Actual sequence 95 NCSI_STATE_CONFIG_FILTERS, 96 // Last 97 NCSI_STATE_L3L4_CONFIG_END 98 } ncsi_l3l4_config_state_t; 99 100 typedef enum { 101 // First 102 NCSI_STATE_TEST_BEGIN, 103 // Actual sequence 104 NCSI_STATE_TEST_PARAMS = NCSI_STATE_TEST_BEGIN, 105 NCSI_STATE_ECHO, 106 NCSI_STATE_ECHO_RESPONSE, 107 NCSI_STATE_CHECK_FILTERS, 108 NCSI_STATE_CHECK_FILTERS_RESPONSE, 109 NCSI_STATE_GET_PT_STATS, 110 NCSI_STATE_GET_PT_STATS_RESPONSE, 111 NCSI_STATE_GET_LINK_STATUS, 112 NCSI_STATE_GET_LINK_STATUS_RESPONSE, 113 // Last 114 NCSI_STATE_TEST_END 115 } ncsi_test_state_t; 116 117 typedef struct { 118 ncsi_l2_config_state_t l2_config_state; 119 ncsi_l3l4_config_state_t l3l4_config_state; 120 ncsi_test_state_t test_state; 121 // Last (OEM) command that was sent. (L3L4 SM only) 122 // Valid only if l3l4_waiting_response is true. 123 uint8_t l3l4_command; 124 // Number of the channel we are currently operating on. (L3L4 SM only) 125 uint8_t l3l4_channel; 126 // If true, means the request was sent and we are waiting for response. 127 bool l3l4_waiting_response; 128 uint8_t channel_count; 129 // The re-start and re-test delays ensures that we can flush the DMA 130 // buffers of potential out-of-sequence NC-SI packets (e.g. from 131 // packet that may have been received shortly after we timed out on 132 // them). The re-test delays also reduce the effect of NC-SI 133 // testing on more useful traffic. 134 uint8_t restart_delay_count; 135 uint8_t retest_delay_count; 136 struct { 137 uint8_t flags; 138 uint8_t regid[8]; 139 } flowsteering[2]; 140 } ncsi_state_t; 141 142 // Debug variables. 143 // TODO - Change name to something more meaningful since the NC-SI test 144 // is not a debug-only feature. 145 typedef struct { 146 uint32_t task_count; 147 uint32_t host_ctrl_flags; 148 struct { 149 bool enabled; 150 bool pending_stop; 151 bool pending_restart; 152 bool oem_filter_disable; 153 bool loopback; 154 bool mlx_legacy; 155 uint32_t fail_count; 156 ncsi_state_t state_that_failed; 157 uint32_t tx_count; 158 uint32_t rx_count; 159 uint32_t tx_error_count; 160 struct { 161 uint32_t timeout_count; 162 uint32_t oversized_count; 163 uint32_t undersized_count; 164 uint32_t nack_count; 165 uint32_t unexpected_size_count; 166 uint32_t unexpected_type_count; 167 } rx_error; 168 struct { 169 uint32_t runs; 170 uint8_t ch_under_test; 171 uint8_t tries; 172 uint8_t max_tries; // 0 = skip test, 1 = restart on failure, > 1 = retry 173 struct { 174 uint8_t tx[NCSI_OEM_ECHO_PATTERN_SIZE]; 175 uint32_t tx_count; 176 uint32_t rx_count; 177 uint32_t bad_rx_count; 178 uint8_t last_bad_rx[NCSI_OEM_ECHO_PATTERN_SIZE]; 179 } ping; 180 } test; 181 ncsi_passthrough_stats_t pt_stats_be[2]; // big-endian as received from NIC 182 } ncsi; 183 } network_debug_t; 184 185 typedef struct { 186 uint8_t data[ETH_BUFFER_SIZE]; 187 uint32_t len; // Non-zero when there's a new NC-SI response. 188 } ncsi_buf_t; 189 190 191 #ifdef __cplusplus 192 extern "C" { 193 #endif 194 195 ncsi_response_type_t ncsi_fsm_poll_l2_config(ncsi_state_t* ncsi_state, 196 network_debug_t* network_debug, 197 ncsi_buf_t* ncsi_buf, 198 mac_addr_t* mac); 199 200 ncsi_response_type_t ncsi_fsm_poll_l3l4_config(ncsi_state_t* ncsi_state, 201 network_debug_t* network_debug, 202 ncsi_buf_t* ncsi_buf, 203 mac_addr_t* mac, 204 uint32_t ipv4_addr, 205 uint16_t rx_port); 206 207 ncsi_response_type_t ncsi_fsm_poll_test(ncsi_state_t* ncsi_state, 208 network_debug_t* network_debug, 209 ncsi_buf_t* ncsi_buf, mac_addr_t* mac, 210 uint32_t ipv4_addr, uint16_t rx_port); 211 212 /* 213 * Report a global state of the NC-SI connection as a function of the state 214 * of the 3 finite state machines. 215 * Note: Additionally for the case where the connection is down it reports 216 * whether a loopback is inferred. 217 */ 218 ncsi_connection_state_t ncsi_fsm_connection_state( 219 const ncsi_state_t* ncsi_state, const network_debug_t* network_debug); 220 221 /* 222 * Returns true if we have executed an NC-SI Get OEM Filter command for all 223 * channels and the flags indicate that it is running in hostless mode. 224 * This means that we can DHCP/ARP if needed. 225 * Otherwise returns false. 226 * 227 * NOTE: We default to false, if we cannot complete the L2 config state 228 * machine or the test sequence. 229 */ 230 bool ncsi_fsm_is_nic_hostless(const ncsi_state_t* ncsi_state); 231 232 #ifdef __cplusplus 233 } /* extern "C" */ 234 #endif 235 236 #endif // PLATFORMS_NEMORA_PORTABLE_NCSI_FSM_H_ 237