xref: /openbmc/phosphor-pid-control/pid/pidloop.cpp (revision f8b6e55147148c3cfb42327ff267197a460b411c)
1 /**
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "pidloop.hpp"
18 
19 #include "pid/pidcontroller.hpp"
20 #include "pid/tuning.hpp"
21 #include "pid/zone_interface.hpp"
22 
23 #include <boost/asio/error.hpp>
24 #include <boost/asio/steady_timer.hpp>
25 
26 #include <chrono>
27 #include <cstdint>
28 #include <memory>
29 #include <ostream>
30 #include <sstream>
31 
32 namespace pid_control
33 {
34 
processThermals(const std::shared_ptr<ZoneInterface> & zone)35 static void processThermals(const std::shared_ptr<ZoneInterface>& zone)
36 {
37     // Get the latest margins.
38     zone->updateSensors();
39     // Zero out the set point goals.
40     zone->clearSetPoints();
41     zone->clearRPMCeilings();
42     // Run the margin PIDs.
43     zone->processThermals();
44     // Get the maximum RPM setpoint.
45     zone->determineMaxSetPointRequest();
46 }
47 
pidControlLoop(const std::shared_ptr<ZoneInterface> & zone,const std::shared_ptr<boost::asio::steady_timer> & timer,const bool * isCanceling,bool first,uint64_t cycleCnt)48 void pidControlLoop(const std::shared_ptr<ZoneInterface>& zone,
49                     const std::shared_ptr<boost::asio::steady_timer>& timer,
50                     const bool* isCanceling, bool first, uint64_t cycleCnt)
51 {
52     if (*isCanceling)
53     {
54         return;
55     }
56 
57     std::chrono::steady_clock::time_point nextTime;
58 
59     if (first)
60     {
61         if (loggingEnabled)
62         {
63             zone->initializeLog();
64         }
65 
66         zone->initializeCache();
67         processThermals(zone);
68 
69         nextTime = std::chrono::steady_clock::now();
70     }
71     else
72     {
73         nextTime = timer->expiry();
74     }
75 
76     uint64_t msPerFanCycle = zone->getCycleIntervalTime();
77 
78     // Push forward the original expiration time of timer, instead of just
79     // resetting it with expires_after() from now, to make sure the interval
80     // is of the expected duration, and not stretched out by CPU time taken.
81     nextTime += std::chrono::milliseconds(msPerFanCycle);
82     timer->expires_at(nextTime);
83     timer->async_wait([zone, timer, cycleCnt, isCanceling, msPerFanCycle](
84                           const boost::system::error_code& ec) mutable {
85         if (ec == boost::asio::error::operation_aborted)
86         {
87             return; // timer being canceled, stop loop
88         }
89 
90         /*
91          * This should sleep on the conditional wait for the listen thread
92          * to tell us it's in sync.  But then we also need a timeout option
93          * in case phosphor-hwmon is down, we can go into some weird failure
94          * more.
95          *
96          * Another approach would be to start all sensors in worst-case
97          * values, and fail-safe mode and then clear out of fail-safe mode
98          * once we start getting values.  Which I think it is a solid
99          * approach.
100          *
101          * For now this runs before it necessarily has any sensor values.
102          * For the host sensors they start out in fail-safe mode.  For the
103          * fans, they start out as 0 as input and then are adjusted once
104          * they have values.
105          *
106          * If a fan has failed, it's value will be whatever we're told or
107          * however we retrieve it.  This program disregards fan values of 0,
108          * so any code providing a fan speed can set to 0 on failure and
109          * that fan value will be effectively ignored.  The PID algorithm
110          * will be unhappy but nothing bad will happen.
111          *
112          * TODO(venture): If the fan value is 0 should that loop just be
113          * skipped? Right now, a 0 value is ignored in
114          * FanController::inputProc()
115          */
116 
117         // Check if we should just go back to sleep.
118         if (zone->getManualMode())
119         {
120             pidControlLoop(zone, timer, isCanceling, false, cycleCnt);
121             return;
122         }
123 
124         // Get the latest fan speeds.
125         zone->updateFanTelemetry();
126 
127         uint64_t msPerThermalCycle = zone->getUpdateThermalsCycle();
128 
129         // Process thermal cycles at a rate that is less often than fan
130         // cycles. If thermal time is not an exact multiple of fan time,
131         // there will be some remainder left over, to keep the timing
132         // correct, as the intervals are staggered into one another.
133         if (cycleCnt >= msPerThermalCycle)
134         {
135             cycleCnt -= msPerThermalCycle;
136 
137             processThermals(zone);
138         }
139 
140         // Run the fan PIDs every iteration.
141         zone->processFans();
142 
143         if (loggingEnabled)
144         {
145             std::ostringstream out;
146             out << "," << zone->getFailSafeMode() << std::endl;
147             zone->writeLog(out.str());
148         }
149 
150         // Count how many milliseconds have elapsed, so we can know when
151         // to perform thermal cycles, in proper ratio with fan cycles.
152         cycleCnt += msPerFanCycle;
153 
154         pidControlLoop(zone, timer, isCanceling, false, cycleCnt);
155     });
156 }
157 
158 } // namespace pid_control
159