xref: /openbmc/openbmc-test-automation/lib/ras/host_utils.robot (revision a68120981fbdc58c69f378758ae7f6527027f8ee)
1*** Settings ***
2Documentation       Utility for error injection scenarios through HOST & BMC.
3Resource            ../../lib/rest_client.robot
4Resource            ../../lib/utils.robot
5Resource            ../../lib/common_utils.robot
6Variables           ../../lib/ras/variables.py
7Library             ../../lib/bmc_ssh_utils.py
8Library             ../../lib/gen_print.py
9Library             ../../lib/gen_robot_print.py
10
11Library             OperatingSystem
12
13*** Keywords ***
14
15Getscom Operations On OS
16    [Documentation]  Executes getscom command on OS with the given
17    ...              input command.
18    [Arguments]      ${input_cmd}
19    # Description of arguments:
20    # input_cmd      -l|--list-chips
21    #                -c|--chip <chip-id> <addr>
22
23    ${output}  ${stderr}  ${rc}=  OS Execute Command  getscom ${input_cmd}
24    [Return]  ${output}
25
26Gard Operations On OS
27    [Documentation]  Executes opal-gard command on OS with the given
28    ...              input command.
29    [Arguments]      ${input_cmd}
30    # Description of arguments:
31    # input_cmd      list/clear all/show <gard_record_id>
32
33    ${output}  ${stderr}  ${rc}=  OS Execute Command  opal-gard ${input_cmd}
34    [Return]  ${output}
35
36Putscom Operations On OS
37    [Documentation]  Executes putscom command on OS with the given
38    ...              input arguments.
39    [Arguments]      ${proc_chip_id}  ${fru}  ${address}
40    # Description of arguments:
41    # proc_chip_id        Processor ID (e.g '0', '8').
42    # fru            FRU value (e.g. 2011400).
43    # address        Chip address (e.g 4000000000000000).
44
45    ${cmd}=  Catenate  putscom -c 0x${proc_chip_id} 0x${fru} 0x${address}
46    Start Command  ${cmd}
47
48Get ProcChipId From OS
49    [Documentation]  Get processor chip ID values based on the input.
50    [Arguments]      ${chip_type}  ${master_proc_chip}
51    # Description of arguments:
52    # chip_type         The chip type (Processor/Centaur).
53    # master_proc_chip  Processor chip type ('True' or 'False').
54
55    ${cmd}=  Catenate  -l | grep -i ${chip_type} | cut -c1-8
56    ${proc_chip_id}=  Getscom Operations On OS  ${cmd}
57    # Example output:
58    # getscom -l | grep processor | cut -c1-8
59    # 00000008     - False
60    # 00000000     - True
61
62    ${proc_ids}=  Split String  ${proc_chip_id}
63    ${proc_id}=  Run Keyword If  '${master_proc_chip}' == 'True'
64    \  ...  Get From List  ${proc_ids}  1
65    \  ...  ELSE  Get From List  ${proc_ids}  0
66
67    # Example output:
68    # 00000008
69    [Return]  ${proc_id}
70
71Get Core IDs From OS
72    [Documentation]  Get Core IDs corresponding to the input processor chip ID.
73    [Arguments]      ${proc_chip_id}
74    # Description of argument(s):
75    # proc_chip_id        Processor ID (e.g '0', '8').
76
77    ${cmd}=  Catenate  set -o pipefail ; ${probe_cpu_file_path}
78    ...    | grep -i 'CHIP ID: ${proc_chip_id}' | cut -c21-22
79    ${output}  ${stderr}  ${rc}=  OS Execute Command  ${cmd}
80    ${core_ids}=  Split String  ${output}
81    # Example output:
82    # ['2', '3', '4', '5', '6']
83    [Return]  ${core_ids}
84
85FIR Address Translation Through HOST
86    [Documentation]  Do FIR address translation through host for given FIR,
87    ...              core value & target type.
88    [Arguments]  ${fir}  ${core_id}  ${target_type}
89    # Description of argument(s):
90    # fir          FIR (Fault isolation register) value (e.g. 2011400).
91    # core_id      Core ID (e.g. 9).
92    # target_type  Target type (e.g. 'EQ', 'EX', 'C').
93
94    ${cmd}=  Catenate  set -o pipefail ; ${addr_translation_file_path} ${fir}
95    ...  ${core_id} | grep -i ${target_type}
96    ${output}  ${stderr}  ${rc}=  OS Execute Command  ${cmd}
97    ${translated_addr}=  Split String  ${output}  :${SPACE}0x
98    # Example output:
99    # 0x10010c00
100    [Return]  ${translated_addr[1]}
101
102Inject Error Through HOST
103    [Documentation]  Inject checkstop on multiple targets like
104    ...              CPU/CME/OCC/NPU/CAPP/MCA etc. through HOST.
105    ...              Test sequence:
106    ...              1. Boot To HOST.
107    ...              2. Clear any existing gard records.
108    ...              3. Inject Error on processor.
109    [Arguments]      ${fir}  ${chip_address}  ${threshold_limit}
110    ...  ${master_proc_chip}=True
111    # Description of argument(s):
112    # fir                 FIR (Fault isolation register) value (e.g. 2011400).
113    # chip_address        chip address (e.g 2000000000000000).
114    # threshold_limit     Threshold limit (e.g 1, 5, 32).
115    # master_proc_chip    Processor chip type (True' or 'False').
116
117    Delete Error Logs
118    Login To OS Host
119    Set Auto Reboot  1
120    Gard Operations On OS  clear all
121
122    # Fetch processor chip IDs.
123    ${proc_chip_id}=  Get ProcChipId From OS  Processor  ${master_proc_chip}
124
125    ${threshold_limit}=  Convert To Integer  ${threshold_limit}
126    :FOR  ${count}  IN RANGE  ${threshold_limit}
127    \  Run Keyword  Putscom Operations On OS  ${proc_chip_id}  ${fir}
128    ...  ${chip_address}
129    # Adding delay after each error injection.
130    \  Sleep  10s
131    # Adding delay to get error log after error injection.
132    Sleep  120s
133
134Code Update Unrecoverable Error Inject
135    [Documentation]  Inject UE MCACALFIR checkstop on processor through
136    ...   host during PNOR code update.
137
138    Inject Error Through HOST  05010800  4000000000000000  1
139
140Disable CPU States Through HOST
141    [Documentation]  Disable CPU states through host.
142
143    # Fetch number of states present for cpu0.
144    ${cmd}=  Catenate  ls /sys/devices/system/cpu/cpu0/cpuidle|grep state|wc -l
145    ${output}  ${stderr}  ${rc}=  OS Execute Command  ${cmd}
146    ${no_of_states}=  Convert To Integer  ${output}
147
148    # Disable state for all cpus.
149    :FOR  ${count}  IN RANGE  ${no_of_states}
150    \  ${cmd}=  Catenate  SEPARATOR=  for file_path in /sys/devices/system/cpu/
151     ...  cpu*/cpuidle/state${i}/disable; do echo 1 > $file_path; done
152    \  ${output}  ${stderr}  ${rc}=  Run Keyword  OS Execute Command  ${cmd}
153
154Is Opal-PRD Service Enabled
155    [Documentation]  Check if Opal-PRD service is running & return either
156    ...              'enabled' or 'disabled'.
157
158    ${cmd}=  Catenate  systemctl list-unit-files | grep opal-prd
159    ${output}  ${stderr}  ${rc}=  OS Execute Command  ${cmd}
160    ${opal_prd_state}=  Split String  ${output}
161
162    # Example output from prior command:
163    # opal-prd.service enabled
164    [Return]  ${opal_prd_state[1]}
165
166Enable Opal-PRD Service On HOST
167    [Documentation]  Enable Opal-PRD service on host.
168
169    OS Execute Command  service opal-prd start
170    ${opal_prd_state}=  Is Opal-PRD Service Enabled
171    Should Contain  ${opal_prd_state}  enabled
172
173BMC Putscom
174    [Documentation]  Executes putscom command through BMC.
175
176    [Arguments]      ${proc_chip_id}  ${fru}  ${chip_address}
177
178    # Description of argument(s):
179    # proc_chip_id        Processor ID (e.g '0', '8').
180    # fru                 FRU (field replaceable unit) (e.g. '2011400').
181    # chip_address        Chip address (e.g. '4000000000000000').
182
183    ${cmd}=  Catenate  pdbg -d p9w -p${proc_chip_id} putscom 0x${fru} 0x${chip_address}
184
185    BMC Execute Command  ${cmd}
186
187Inject Error Through BMC
188    [Documentation]  Inject checkstop on multiple targets like
189    ...              CPU/CME/OCC/NPU/CAPP/MCA etc. through BMC.
190    ...              Test sequence:
191    ...              1. Boot To HOST.
192    ...              2. Clear any existing gard records.
193    ...              3. Inject Error on processor.
194    [Arguments]      ${fir}  ${chip_address}  ${threshold_limit}
195    ...  ${master_proc_chip}=True
196    # Description of argument(s):
197    # fir                 FIR (Fault isolation register) value (e.g. '2011400').
198    # chip_address        Chip address (e.g. '2000000000000000').
199    # threshold_limit     Recoverable error threshold limit (e.g. '1', '5', '32').
200
201    Delete Error Logs
202    Login To OS Host
203    Set Auto Reboot  1
204
205    Gard Operations On OS  clear all
206
207    ${threshold_limit}=  Convert To Integer  ${threshold_limit}
208    :FOR  ${count}  IN RANGE  ${threshold_limit}
209    \  BMC Putscom  0  ${fir}
210    ...  ${chip_address}
211    # Adding delay after each error injection.
212    \  Sleep  10s
213    # Adding delay to get error log after error injection.
214    Sleep  120s
215
216
217Inject Error Through BMC At HOST Boot
218    [Documentation]  Inject error on multiple targets like
219    ...              CPU/CME/OCC/NPU/CAPP/MCA etc. through BMC at HOST Boot.
220    ...              Test sequence:
221    ...              1. Boot To HOST.
222    ...              2. Clear any existing gard records.
223    ...              3. Power off HOST and Boot.
224    ...              4. Inject Error on processor through BMC.
225    [Arguments]      ${fir}  ${chip_address}
226    # Description of argument(s):
227    # fir                 FIR (Fault isolation register) value (e.g. '2011400').
228    # chip_address        Chip address (e.g. '2000000000000000').
229
230    Delete Error Logs
231
232    REST Power On  stack_mode=skip
233
234    Gard Operations On OS  clear all
235
236    REST Power Off
237    Set Auto Reboot  1
238    Initiate Host Boot  wait=${0}
239
240    Start SOL Console Logging   ${EXECDIR}/esol.log
241
242    Wait Until Keyword Succeeds  5 min  5 sec
243    ...  Shell Cmd  grep 'ISTEP *14' ${EXECDIR}/esol.log  quiet=1
244    ...  print_output=0  show_err=0  ignore_err=0
245
246    BMC Putscom  0  ${fir}  ${chip_address}
247    # Adding delay to get error log after error injection.
248    Sleep  10s
249
250    Stop SOL Console Logging
251