xref: /openbmc/phosphor-pid-control/pid/pidloop.cpp (revision 9f9a06aa9c4d49749c2d94164ed00c6ef706e04f)
1 /**
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "pidloop.hpp"
18 
19 #include "pid/pidcontroller.hpp"
20 #include "pid/tuning.hpp"
21 #include "pid/zone_interface.hpp"
22 #include "sensors/sensor.hpp"
23 
24 #include <boost/asio/steady_timer.hpp>
25 
26 #include <chrono>
27 #include <map>
28 #include <memory>
29 #include <sstream>
30 #include <thread>
31 #include <vector>
32 
33 namespace pid_control
34 {
35 
processThermals(std::shared_ptr<ZoneInterface> zone)36 static void processThermals(std::shared_ptr<ZoneInterface> zone)
37 {
38     // Get the latest margins.
39     zone->updateSensors();
40     // Zero out the set point goals.
41     zone->clearSetPoints();
42     zone->clearRPMCeilings();
43     // Run the margin PIDs.
44     zone->processThermals();
45     // Get the maximum RPM setpoint.
46     zone->determineMaxSetPointRequest();
47 }
48 
pidControlLoop(std::shared_ptr<ZoneInterface> zone,std::shared_ptr<boost::asio::steady_timer> timer,const bool * isCanceling,bool first,uint64_t cycleCnt)49 void pidControlLoop(std::shared_ptr<ZoneInterface> zone,
50                     std::shared_ptr<boost::asio::steady_timer> timer,
51                     const bool* isCanceling, bool first, uint64_t cycleCnt)
52 {
53     if (*isCanceling)
54         return;
55 
56     std::chrono::steady_clock::time_point nextTime;
57 
58     if (first)
59     {
60         if (loggingEnabled)
61         {
62             zone->initializeLog();
63         }
64 
65         zone->initializeCache();
66         processThermals(zone);
67 
68         nextTime = std::chrono::steady_clock::now();
69     }
70     else
71     {
72         nextTime = timer->expiry();
73     }
74 
75     uint64_t msPerFanCycle = zone->getCycleIntervalTime();
76 
77     // Push forward the original expiration time of timer, instead of just
78     // resetting it with expires_after() from now, to make sure the interval
79     // is of the expected duration, and not stretched out by CPU time taken.
80     nextTime += std::chrono::milliseconds(msPerFanCycle);
81     timer->expires_at(nextTime);
82     timer->async_wait([zone, timer, cycleCnt, isCanceling, msPerFanCycle](
83                           const boost::system::error_code& ec) mutable {
84         if (ec == boost::asio::error::operation_aborted)
85         {
86             return; // timer being canceled, stop loop
87         }
88 
89         /*
90          * This should sleep on the conditional wait for the listen thread
91          * to tell us it's in sync.  But then we also need a timeout option
92          * in case phosphor-hwmon is down, we can go into some weird failure
93          * more.
94          *
95          * Another approach would be to start all sensors in worst-case
96          * values, and fail-safe mode and then clear out of fail-safe mode
97          * once we start getting values.  Which I think it is a solid
98          * approach.
99          *
100          * For now this runs before it necessarily has any sensor values.
101          * For the host sensors they start out in fail-safe mode.  For the
102          * fans, they start out as 0 as input and then are adjusted once
103          * they have values.
104          *
105          * If a fan has failed, it's value will be whatever we're told or
106          * however we retrieve it.  This program disregards fan values of 0,
107          * so any code providing a fan speed can set to 0 on failure and
108          * that fan value will be effectively ignored.  The PID algorithm
109          * will be unhappy but nothing bad will happen.
110          *
111          * TODO(venture): If the fan value is 0 should that loop just be
112          * skipped? Right now, a 0 value is ignored in
113          * FanController::inputProc()
114          */
115 
116         // Check if we should just go back to sleep.
117         if (zone->getManualMode())
118         {
119             pidControlLoop(zone, timer, isCanceling, false, cycleCnt);
120             return;
121         }
122 
123         // Get the latest fan speeds.
124         zone->updateFanTelemetry();
125 
126         uint64_t msPerThermalCycle = zone->getUpdateThermalsCycle();
127 
128         // Process thermal cycles at a rate that is less often than fan
129         // cycles. If thermal time is not an exact multiple of fan time,
130         // there will be some remainder left over, to keep the timing
131         // correct, as the intervals are staggered into one another.
132         if (cycleCnt >= msPerThermalCycle)
133         {
134             cycleCnt -= msPerThermalCycle;
135 
136             processThermals(zone);
137         }
138 
139         // Run the fan PIDs every iteration.
140         zone->processFans();
141 
142         if (loggingEnabled)
143         {
144             std::ostringstream out;
145             out << "," << zone->getFailSafeMode() << std::endl;
146             zone->writeLog(out.str());
147         }
148 
149         // Count how many milliseconds have elapsed, so we can know when
150         // to perform thermal cycles, in proper ratio with fan cycles.
151         cycleCnt += msPerFanCycle;
152 
153         pidControlLoop(zone, timer, isCanceling, false, cycleCnt);
154     });
155 }
156 
157 } // namespace pid_control
158