xref: /openbmc/phosphor-pid-control/pid/zone.cpp (revision b300575e)
1 /**
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* Configuration. */
18 #include "zone.hpp"
19 
20 #include "conf.hpp"
21 #include "pid/controller.hpp"
22 #include "pid/ec/pid.hpp"
23 #include "pid/fancontroller.hpp"
24 #include "pid/stepwisecontroller.hpp"
25 #include "pid/thermalcontroller.hpp"
26 #include "pid/tuning.hpp"
27 
28 #include <algorithm>
29 #include <chrono>
30 #include <cstring>
31 #include <fstream>
32 #include <iostream>
33 #include <memory>
34 #include <sstream>
35 #include <string>
36 
37 using tstamp = std::chrono::high_resolution_clock::time_point;
38 using namespace std::literals::chrono_literals;
39 
40 // Enforces minimum duration between events
41 // Rreturns true if event should be allowed, false if disallowed
42 bool allowThrottle(const tstamp& now, const std::chrono::seconds& pace)
43 {
44     static tstamp then;
45     static bool first = true;
46 
47     if (first)
48     {
49         // Special case initialization
50         then = now;
51         first = false;
52 
53         // Initialization, always allow
54         return true;
55     }
56 
57     auto elapsed = now - then;
58     if (elapsed < pace)
59     {
60         // Too soon since last time, disallow
61         return false;
62     }
63 
64     // It has been long enough, allow
65     then = now;
66     return true;
67 }
68 
69 namespace pid_control
70 {
71 
72 double DbusPidZone::getMaxSetPointRequest(void) const
73 {
74     return _maximumSetPoint;
75 }
76 
77 bool DbusPidZone::getManualMode(void) const
78 {
79     return _manualMode;
80 }
81 
82 void DbusPidZone::setManualMode(bool mode)
83 {
84     _manualMode = mode;
85 
86     // If returning to automatic mode, need to restore PWM from PID loop
87     if (!mode)
88     {
89         _redundantWrite = true;
90     }
91 }
92 
93 bool DbusPidZone::getFailSafeMode(void) const
94 {
95     // If any keys are present at least one sensor is in fail safe mode.
96     return !_failSafeSensors.empty();
97 }
98 
99 int64_t DbusPidZone::getZoneID(void) const
100 {
101     return _zoneId;
102 }
103 
104 void DbusPidZone::addSetPoint(double setPoint, const std::string& name)
105 {
106     _SetPoints.push_back(setPoint);
107     /*
108      * if there are multiple thermal controllers with the same
109      * value, pick the first one in the iterator
110      */
111     if (_maximumSetPoint < setPoint)
112     {
113         _maximumSetPoint = setPoint;
114         _maximumSetPointName = name;
115     }
116 }
117 
118 void DbusPidZone::addRPMCeiling(double ceiling)
119 {
120     _RPMCeilings.push_back(ceiling);
121 }
122 
123 void DbusPidZone::clearRPMCeilings(void)
124 {
125     _RPMCeilings.clear();
126 }
127 
128 void DbusPidZone::clearSetPoints(void)
129 {
130     _SetPoints.clear();
131     _maximumSetPoint = 0;
132 }
133 
134 double DbusPidZone::getFailSafePercent(void) const
135 {
136     return _failSafePercent;
137 }
138 
139 double DbusPidZone::getMinThermalSetPoint(void) const
140 {
141     return _minThermalOutputSetPt;
142 }
143 
144 void DbusPidZone::addFanPID(std::unique_ptr<Controller> pid)
145 {
146     _fans.push_back(std::move(pid));
147 }
148 
149 void DbusPidZone::addThermalPID(std::unique_ptr<Controller> pid)
150 {
151     _thermals.push_back(std::move(pid));
152 }
153 
154 double DbusPidZone::getCachedValue(const std::string& name)
155 {
156     return _cachedValuesByName.at(name).scaled;
157 }
158 
159 ValueCacheEntry DbusPidZone::getCachedValues(const std::string& name)
160 {
161     return _cachedValuesByName.at(name);
162 }
163 
164 void DbusPidZone::setOutputCache(std::string_view name,
165                                  const ValueCacheEntry& values)
166 {
167     _cachedFanOutputs[std::string{name}] = values;
168 }
169 
170 void DbusPidZone::addFanInput(const std::string& fan)
171 {
172     _fanInputs.push_back(fan);
173 }
174 
175 void DbusPidZone::addThermalInput(const std::string& therm)
176 {
177     _thermalInputs.push_back(therm);
178 }
179 
180 // Updates desired RPM setpoint from optional text file
181 // Returns true if rpmValue updated, false if left unchanged
182 static bool fileParseRpm(const std::string& fileName, double& rpmValue)
183 {
184     static constexpr std::chrono::seconds throttlePace{3};
185 
186     std::string errText;
187 
188     try
189     {
190         std::ifstream ifs;
191         ifs.open(fileName);
192         if (ifs)
193         {
194             int value;
195             ifs >> value;
196 
197             if (value <= 0)
198             {
199                 errText = "File content could not be parsed to a number";
200             }
201             else if (value <= 100)
202             {
203                 errText = "File must contain RPM value, not PWM value";
204             }
205             else
206             {
207                 rpmValue = static_cast<double>(value);
208                 return true;
209             }
210         }
211     }
212     catch (const std::exception& e)
213     {
214         errText = "Exception: ";
215         errText += e.what();
216     }
217 
218     // The file is optional, intentionally not an error if file not found
219     if (!(errText.empty()))
220     {
221         tstamp now = std::chrono::high_resolution_clock::now();
222         if (allowThrottle(now, throttlePace))
223         {
224             std::cerr << "Unable to read from '" << fileName << "': " << errText
225                       << "\n";
226         }
227     }
228 
229     return false;
230 }
231 
232 void DbusPidZone::determineMaxSetPointRequest(void)
233 {
234     std::vector<double>::iterator result;
235     double minThermalThreshold = getMinThermalSetPoint();
236 
237     if (_RPMCeilings.size() > 0)
238     {
239         result = std::min_element(_RPMCeilings.begin(), _RPMCeilings.end());
240         // if Max set point is larger than the lowest ceiling, reset to lowest
241         // ceiling.
242         if (*result < _maximumSetPoint)
243         {
244             _maximumSetPoint = *result;
245             // When using lowest ceiling, controller name is ceiling.
246             _maximumSetPointName = "Ceiling";
247         }
248     }
249 
250     /*
251      * If the maximum RPM setpoint output is below the minimum RPM
252      * setpoint, set it to the minimum.
253      */
254     if (minThermalThreshold >= _maximumSetPoint)
255     {
256         _maximumSetPoint = minThermalThreshold;
257         _maximumSetPointName = "";
258     }
259     else if (_maximumSetPointName.compare(_maximumSetPointNamePrev))
260     {
261         std::cerr << "PID Zone " << _zoneId << " max SetPoint "
262                   << _maximumSetPoint << " requested by "
263                   << _maximumSetPointName;
264         for (const auto& sensor : _failSafeSensors)
265         {
266             if (sensor.find("Fan") == std::string::npos)
267             {
268                 std::cerr << " " << sensor;
269             }
270         }
271         std::cerr << "\n";
272         _maximumSetPointNamePrev.assign(_maximumSetPointName);
273     }
274     if (tuningEnabled)
275     {
276         /*
277          * We received no setpoints from thermal sensors.
278          * This is a case experienced during tuning where they only specify
279          * fan sensors and one large fan PID for all the fans.
280          */
281         static constexpr auto setpointpath = "/etc/thermal.d/setpoint";
282 
283         fileParseRpm(setpointpath, _maximumSetPoint);
284 
285         // Allow per-zone setpoint files to override overall setpoint file
286         std::ostringstream zoneSuffix;
287         zoneSuffix << ".zone" << _zoneId;
288         std::string zoneSetpointPath = setpointpath + zoneSuffix.str();
289 
290         fileParseRpm(zoneSetpointPath, _maximumSetPoint);
291     }
292     return;
293 }
294 
295 void DbusPidZone::initializeLog(void)
296 {
297     /* Print header for log file:
298      * epoch_ms,setpt,fan1,fan1_raw,fan1_pwm,fan1_pwm_raw,fan2,fan2_raw,fan2_pwm,fan2_pwm_raw,fanN,fanN_raw,fanN_pwm,fanN_pwm_raw,sensor1,sensor1_raw,sensor2,sensor2_raw,sensorN,sensorN_raw,failsafe
299      */
300 
301     _log << "epoch_ms,setpt,requester";
302 
303     for (const auto& f : _fanInputs)
304     {
305         _log << "," << f << "," << f << "_raw";
306         _log << "," << f << "_pwm," << f << "_pwm_raw";
307     }
308     for (const auto& t : _thermalInputs)
309     {
310         _log << "," << t << "," << t << "_raw";
311     }
312 
313     _log << ",failsafe";
314     _log << std::endl;
315 }
316 
317 void DbusPidZone::writeLog(const std::string& value)
318 {
319     _log << value;
320 }
321 
322 /*
323  * TODO(venture) This is effectively updating the cache and should check if the
324  * values they're using to update it are new or old, or whatnot.  For instance,
325  * if we haven't heard from the host in X time we need to detect this failure.
326  *
327  * I haven't decided if the Sensor should have a lastUpdated method or whether
328  * that should be for the ReadInterface or etc...
329  */
330 
331 /**
332  * We want the PID loop to run with values cached, so this will get all the
333  * fan tachs for the loop.
334  */
335 void DbusPidZone::updateFanTelemetry(void)
336 {
337     /* TODO(venture): Should I just make _log point to /dev/null when logging
338      * is disabled?  I think it's a waste to try and log things even if the
339      * data is just being dropped though.
340      */
341     tstamp now = std::chrono::high_resolution_clock::now();
342     if (loggingEnabled)
343     {
344         _log << std::chrono::duration_cast<std::chrono::milliseconds>(
345                     now.time_since_epoch())
346                     .count();
347         _log << "," << _maximumSetPoint;
348         _log << "," << _maximumSetPointName;
349     }
350 
351     for (const auto& f : _fanInputs)
352     {
353         auto sensor = _mgr.getSensor(f);
354         ReadReturn r = sensor->read();
355         _cachedValuesByName[f] = {r.value, r.unscaled};
356         int64_t timeout = sensor->getTimeout();
357         tstamp then = r.updated;
358 
359         auto duration =
360             std::chrono::duration_cast<std::chrono::seconds>(now - then)
361                 .count();
362         auto period = std::chrono::seconds(timeout).count();
363         /*
364          * TODO(venture): We should check when these were last read.
365          * However, these are the fans, so if I'm not getting updated values
366          * for them... what should I do?
367          */
368         if (loggingEnabled)
369         {
370             const auto& v = _cachedValuesByName[f];
371             _log << "," << v.scaled << "," << v.unscaled;
372             const auto& p = _cachedFanOutputs[f];
373             _log << "," << p.scaled << "," << p.unscaled;
374         }
375 
376         // check if fan fail.
377         if (sensor->getFailed())
378         {
379             _failSafeSensors.insert(f);
380         }
381         else if (timeout != 0 && duration >= period)
382         {
383             _failSafeSensors.insert(f);
384         }
385         else
386         {
387             // Check if it's in there: remove it.
388             auto kt = _failSafeSensors.find(f);
389             if (kt != _failSafeSensors.end())
390             {
391                 _failSafeSensors.erase(kt);
392             }
393         }
394     }
395 
396     if (loggingEnabled)
397     {
398         for (const auto& t : _thermalInputs)
399         {
400             const auto& v = _cachedValuesByName[t];
401             _log << "," << v.scaled << "," << v.unscaled;
402         }
403     }
404 
405     return;
406 }
407 
408 void DbusPidZone::updateSensors(void)
409 {
410     using namespace std::chrono;
411     /* margin and temp are stored as temp */
412     tstamp now = high_resolution_clock::now();
413 
414     for (const auto& t : _thermalInputs)
415     {
416         auto sensor = _mgr.getSensor(t);
417         ReadReturn r = sensor->read();
418         int64_t timeout = sensor->getTimeout();
419 
420         _cachedValuesByName[t] = {r.value, r.unscaled};
421         tstamp then = r.updated;
422 
423         auto duration = duration_cast<std::chrono::seconds>(now - then).count();
424         auto period = std::chrono::seconds(timeout).count();
425 
426         if (sensor->getFailed())
427         {
428             _failSafeSensors.insert(t);
429         }
430         else if (timeout != 0 && duration >= period)
431         {
432             // std::cerr << "Entering fail safe mode.\n";
433             _failSafeSensors.insert(t);
434         }
435         else
436         {
437             // Check if it's in there: remove it.
438             auto kt = _failSafeSensors.find(t);
439             if (kt != _failSafeSensors.end())
440             {
441                 _failSafeSensors.erase(kt);
442             }
443         }
444     }
445 
446     return;
447 }
448 
449 void DbusPidZone::initializeCache(void)
450 {
451     auto nan = std::numeric_limits<double>::quiet_NaN();
452 
453     for (const auto& f : _fanInputs)
454     {
455         _cachedValuesByName[f] = {nan, nan};
456         _cachedFanOutputs[f] = {nan, nan};
457 
458         // Start all fans in fail-safe mode.
459         _failSafeSensors.insert(f);
460     }
461 
462     for (const auto& t : _thermalInputs)
463     {
464         _cachedValuesByName[t] = {nan, nan};
465 
466         // Start all sensors in fail-safe mode.
467         _failSafeSensors.insert(t);
468     }
469 }
470 
471 void DbusPidZone::dumpCache(void)
472 {
473     std::cerr << "Cache values now: \n";
474     for (const auto& [name, value] : _cachedValuesByName)
475     {
476         std::cerr << name << ": " << value.scaled << " " << value.unscaled
477                   << "\n";
478     }
479 
480     std::cerr << "Fan outputs now: \n";
481     for (const auto& [name, value] : _cachedFanOutputs)
482     {
483         std::cerr << name << ": " << value.scaled << " " << value.unscaled
484                   << "\n";
485     }
486 }
487 
488 void DbusPidZone::processFans(void)
489 {
490     for (auto& p : _fans)
491     {
492         p->process();
493     }
494 
495     if (_redundantWrite)
496     {
497         // This is only needed once
498         _redundantWrite = false;
499     }
500 }
501 
502 void DbusPidZone::processThermals(void)
503 {
504     for (auto& p : _thermals)
505     {
506         p->process();
507     }
508 }
509 
510 Sensor* DbusPidZone::getSensor(const std::string& name)
511 {
512     return _mgr.getSensor(name);
513 }
514 
515 bool DbusPidZone::getRedundantWrite(void) const
516 {
517     return _redundantWrite;
518 }
519 
520 bool DbusPidZone::manual(bool value)
521 {
522     std::cerr << "manual: " << value << std::endl;
523     setManualMode(value);
524     return ModeObject::manual(value);
525 }
526 
527 bool DbusPidZone::failSafe() const
528 {
529     return getFailSafeMode();
530 }
531 
532 } // namespace pid_control
533