xref: /openbmc/openbmc-test-automation/lib/ras/host_utils.robot (revision 663c2da54c94ff23e986dd4c1ca3bd48891b2f57)
1*** Settings ***
2Documentation       Utility for error injection scenarios through HOST & BMC.
3Resource            ../../lib/rest_client.robot
4Resource            ../../lib/utils.robot
5Resource            ../../lib/common_utils.robot
6Variables           ../../lib/ras/variables.py
7Library             ../../lib/bmc_ssh_utils.py
8Library             ../../lib/gen_print.py
9Library             ../../lib/gen_robot_print.py
10
11Library             OperatingSystem
12
13*** Keywords ***
14
15Getscom Operations On OS
16    [Documentation]  Executes getscom command on OS with the given
17    ...              input command.
18    [Arguments]      ${input_cmd}
19    # Description of arguments:
20    # input_cmd      -l|--list-chips
21    #                -c|--chip <chip-id> <addr>
22
23    ${output}  ${stderr}  ${rc}=  OS Execute Command  getscom ${input_cmd}
24    [Return]  ${output}
25
26Gard Operations On OS
27    [Documentation]  Executes opal-gard command on OS with the given
28    ...              input command.
29    [Arguments]      ${input_cmd}
30    # Description of arguments:
31    # input_cmd      list/clear all/show <gard_record_id>
32
33    ${output}  ${stderr}  ${rc}=  OS Execute Command  opal-gard ${input_cmd}
34    [Return]  ${output}
35
36Putscom Operations On OS
37    [Documentation]  Executes putscom command on OS with the given
38    ...              input arguments.
39    [Arguments]      ${proc_chip_id}  ${fir_address}  ${value}
40    # Description of arguments:
41    # proc_chip_id        Processor ID (e.g '0', '8').
42    # fir_address         FIR (Fault isolation register) value (e.g. 2011400).
43    # value               (e.g 4000000000000000).
44
45    ${cmd}=  Catenate  putscom -c 0x${proc_chip_id} 0x${fir_address} 0x${value}
46    Start Command  ${cmd}
47
48Get ProcChipId From OS
49    [Documentation]  Get processor chip ID values based on the input.
50    [Arguments]      ${chip_type}  ${master_proc_chip}
51    # Description of arguments:
52    # chip_type         The chip type (Processor/Centaur).
53    # master_proc_chip  Processor chip type ('True' or 'False').
54
55    ${cmd}=  Catenate  -l | grep -i ${chip_type} | cut -c1-8
56    ${proc_chip_id}=  Getscom Operations On OS  ${cmd}
57    # Example output:
58    # getscom -l | grep processor | cut -c1-8
59    # 00000008     - False
60    # 00000000     - True
61
62    ${proc_ids}=  Split String  ${proc_chip_id}
63    ${proc_id}=  Run Keyword If  '${master_proc_chip}' == 'True'
64    ...  Get From List  ${proc_ids}  1
65    ...    ELSE  Get From List  ${proc_ids}  0
66
67    # Example output:
68    # 00000008
69    [Return]  ${proc_id}
70
71Get Core IDs From OS
72    [Documentation]  Get Core IDs corresponding to the input processor chip ID.
73    [Arguments]      ${proc_chip_id}
74    # Description of argument(s):
75    # proc_chip_id        Processor ID (e.g '0', '8').
76
77    ${cmd}=  Catenate  set -o pipefail ; ${probe_cpu_file_path}
78    ...    | grep -i 'CHIP ID: ${proc_chip_id}' | cut -c21-22
79    ${output}  ${stderr}  ${rc}=  OS Execute Command  ${cmd}
80    ${core_ids}=  Split String  ${output}
81    # Example output:
82    # ['2', '3', '4', '5', '6']
83    [Return]  ${core_ids}
84
85FIR Address Translation Through HOST
86    [Documentation]  Do FIR address translation through host for given FIR,
87    ...              core value & target type.
88    [Arguments]  ${fir_address}  ${core_id}  ${target_type}
89    # Description of argument(s):
90    # fir_address     FIR (Fault isolation register) value (e.g. 2011400).
91    # core_id         Core ID (e.g. 9).
92    # target_type     Target type (e.g. 'EQ', 'EX', 'C').
93
94    ${cmd}=  Catenate  set -o pipefail ; ${addr_translation_file_path} ${fir_address}
95    ...  ${core_id} | grep -i ${target_type}
96    ${output}  ${stderr}  ${rc}=  OS Execute Command  ${cmd}
97    ${translated_addr}=  Split String  ${output}  :${SPACE}0x
98    # Example output:
99    # 0x10010c00
100    [Return]  ${translated_addr[1]}
101
102Inject Error Through HOST
103    [Documentation]  Inject checkstop on multiple targets like
104    ...              CPU/CME/OCC/NPU/CAPP/MCA etc. through HOST.
105    ...              Test sequence:
106    ...              1. Boot To HOST.
107    ...              2. Clear any existing gard records.
108    ...              3. Inject Error on processor.
109    [Arguments]      ${fir_address}  ${value}  ${threshold_limit}
110    ...  ${master_proc_chip}=True
111    # Description of argument(s):
112    # fir_address         FIR (Fault isolation register) value (e.g. 2011400).
113    # value               (e.g 2000000000000000).
114    # threshold_limit     Threshold limit (e.g 1, 5, 32).
115    # master_proc_chip    Processor chip type (True' or 'False').
116
117    Delete Error Logs
118    Login To OS Host
119    Set Auto Reboot  1
120    Gard Operations On OS  clear all
121
122    # Fetch processor chip IDs.
123    ${proc_chip_id}=  Get ProcChipId From OS  Processor  ${master_proc_chip}
124
125    ${threshold_limit}=  Convert To Integer  ${threshold_limit}
126    FOR  ${count}  IN RANGE  ${threshold_limit}
127        Run Keyword  Putscom Operations On OS  ${proc_chip_id}  ${fir_address}
128        ...  ${value}
129        # Adding delay after each error injection.
130        Sleep  10s
131    END
132
133    # Adding delay to get error log after error injection.
134    Sleep  120s
135
136Code Update Unrecoverable Error Inject
137    [Documentation]  Inject UE MCACALFIR checkstop on processor through
138    ...   host during PNOR code update.
139
140    Inject Error Through HOST  05010800  4000000000000000  1
141
142Disable CPU States Through HOST
143    [Documentation]  Disable CPU states through host.
144
145    # Fetch number of states present for cpu0.
146    ${cmd}=  Catenate  ls /sys/devices/system/cpu/cpu0/cpuidle|grep state|wc -l
147    ${output}  ${stderr}  ${rc}=  OS Execute Command  ${cmd}
148    ${no_of_states}=  Convert To Integer  ${output}
149
150    # Disable state for all cpus.
151    FOR  ${count}  IN RANGE  ${no_of_states}
152        ${cmd}=  Catenate  SEPARATOR=  for file_path in /sys/devices/system/cpu/
153        ...  cpu*/cpuidle/state${count}/disable; do echo 1 > $file_path; done
154        ${output}  ${stderr}  ${rc}=  Run Keyword  OS Execute Command  ${cmd}
155    END
156
157Is Opal-PRD Service Enabled
158    [Documentation]  Check if Opal-PRD service is running & return either
159    ...              'enabled' or 'disabled'.
160
161    ${cmd}=  Catenate  systemctl list-unit-files | grep opal-prd
162    ${output}  ${stderr}  ${rc}=  OS Execute Command  ${cmd}
163    ${opal_prd_state}=  Split String  ${output}
164
165    # Example output from prior command:
166    # opal-prd.service enabled
167    [Return]  ${opal_prd_state[1]}
168
169Enable Opal-PRD Service On HOST
170    [Documentation]  Enable Opal-PRD service on host.
171
172    OS Execute Command  service opal-prd start
173    ${opal_prd_state}=  Is Opal-PRD Service Enabled
174    Should Contain  ${opal_prd_state}  enabled
175
176
177Inject Error Through BMC
178    [Documentation]  Inject checkstop on multiple targets like
179    ...              CPU/CME/OCC/NPU/CAPP/MCA etc. through BMC.
180    ...              Test sequence:
181    ...              1. Boot To HOST.
182    ...              2. Clear any existing gard records.
183    ...              3. Inject Error on processor.
184    [Arguments]      ${fir_address}  ${value}  ${threshold_limit}
185    ...  ${master_proc_chip}=True
186    # Description of argument(s):
187    # fir_address         FIR (Fault isolation register) value (e.g. '2011400').
188    # value               (e.g. '2000000000000000').
189    # threshold_limit     Recoverable error threshold limit (e.g. '1', '5', '32').
190
191    Delete Error Logs
192    Login To OS Host
193    Set Auto Reboot  1
194
195    Gard Operations On OS  clear all
196
197    ${threshold_limit}=  Convert To Integer  ${threshold_limit}
198    FOR  ${count}  IN RANGE  ${threshold_limit}
199        Pdbg  -p0 putscom 0x${fir_address} 0x${value}
200        # Adding delay after each error injection.
201        Sleep  10s
202    END
203
204    # Adding delay to get error log after error injection.
205    Sleep  120s
206
207
208Inject Error Through BMC At HOST Boot
209    [Documentation]  Inject error on multiple targets like
210    ...              CPU/CME/OCC/NPU/CAPP/MCA etc. through BMC at HOST Boot.
211    ...              Test sequence:
212    ...              1. Boot To HOST.
213    ...              2. Clear any existing gard records.
214    ...              3. Power off HOST and Boot.
215    ...              4. Inject Error on processor through BMC.
216    [Arguments]      ${fir_address}  ${value}
217    # Description of argument(s):
218    # fir_address    FIR (Fault isolation register) value (e.g. '2011400').
219    # value          (e.g. '2000000000000000').
220
221    Delete Error Logs
222
223    REST Power On  stack_mode=skip
224
225    Gard Operations On OS  clear all
226
227    REST Power Off
228    Set Auto Reboot  1
229    Initiate Host Boot  wait=${0}
230
231    Start SOL Console Logging   ${EXECDIR}/esol.log
232
233    Wait Until Keyword Succeeds  5 min  5 sec
234    ...  Shell Cmd  grep 'ISTEP *14' ${EXECDIR}/esol.log  quiet=1
235    ...  print_output=0  show_err=0  ignore_err=0
236
237    Pdbg  -p0 putscom 0x${fir_address} 0x${value}
238    # Adding delay to get error log after error injection.
239    Sleep  10s
240
241    Stop SOL Console Logging
242