xref: /openbmc/phosphor-pid-control/pid/zone.cpp (revision de74542c)
1 /**
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* Configuration. */
18 #include "zone.hpp"
19 
20 #include "conf.hpp"
21 #include "pid/controller.hpp"
22 #include "pid/ec/pid.hpp"
23 #include "pid/fancontroller.hpp"
24 #include "pid/stepwisecontroller.hpp"
25 #include "pid/thermalcontroller.hpp"
26 #include "pid/tuning.hpp"
27 
28 #include <algorithm>
29 #include <chrono>
30 #include <cstring>
31 #include <fstream>
32 #include <iostream>
33 #include <memory>
34 #include <sstream>
35 #include <string>
36 
37 using tstamp = std::chrono::high_resolution_clock::time_point;
38 using namespace std::literals::chrono_literals;
39 
40 // Enforces minimum duration between events
41 // Rreturns true if event should be allowed, false if disallowed
42 bool allowThrottle(const tstamp& now, const std::chrono::seconds& pace)
43 {
44     static tstamp then;
45     static bool first = true;
46 
47     if (first)
48     {
49         // Special case initialization
50         then = now;
51         first = false;
52 
53         // Initialization, always allow
54         return true;
55     }
56 
57     auto elapsed = now - then;
58     if (elapsed < pace)
59     {
60         // Too soon since last time, disallow
61         return false;
62     }
63 
64     // It has been long enough, allow
65     then = now;
66     return true;
67 }
68 
69 namespace pid_control
70 {
71 
72 double DbusPidZone::getMaxSetPointRequest(void) const
73 {
74     return _maximumSetPoint;
75 }
76 
77 bool DbusPidZone::getManualMode(void) const
78 {
79     return _manualMode;
80 }
81 
82 void DbusPidZone::setManualMode(bool mode)
83 {
84     _manualMode = mode;
85 
86     // If returning to automatic mode, need to restore PWM from PID loop
87     if (!mode)
88     {
89         _redundantWrite = true;
90     }
91 }
92 
93 bool DbusPidZone::getFailSafeMode(void) const
94 {
95     // If any keys are present at least one sensor is in fail safe mode.
96     return !_failSafeSensors.empty();
97 }
98 
99 int64_t DbusPidZone::getZoneID(void) const
100 {
101     return _zoneId;
102 }
103 
104 void DbusPidZone::addSetPoint(double setPoint, const std::string& name)
105 {
106     _SetPoints.push_back(setPoint);
107     /*
108      * if there are multiple thermal controllers with the same
109      * value, pick the first one in the iterator
110      */
111     if (_maximumSetPoint < setPoint)
112     {
113         _maximumSetPoint = setPoint;
114         _maximumSetPointName = name;
115     }
116 }
117 
118 void DbusPidZone::addRPMCeiling(double ceiling)
119 {
120     _RPMCeilings.push_back(ceiling);
121 }
122 
123 void DbusPidZone::clearRPMCeilings(void)
124 {
125     _RPMCeilings.clear();
126 }
127 
128 void DbusPidZone::clearSetPoints(void)
129 {
130     _SetPoints.clear();
131     _maximumSetPoint = 0;
132 }
133 
134 double DbusPidZone::getFailSafePercent(void) const
135 {
136     return _failSafePercent;
137 }
138 
139 double DbusPidZone::getMinThermalSetPoint(void) const
140 {
141     return _minThermalOutputSetPt;
142 }
143 
144 uint64_t DbusPidZone::getCycleIntervalTime(void) const
145 {
146     return _cycleTime.cycleIntervalTimeMS;
147 }
148 
149 uint64_t DbusPidZone::getUpdateThermalsCycle(void) const
150 {
151     return _cycleTime.updateThermalsTimeMS;
152 }
153 
154 void DbusPidZone::addFanPID(std::unique_ptr<Controller> pid)
155 {
156     _fans.push_back(std::move(pid));
157 }
158 
159 void DbusPidZone::addThermalPID(std::unique_ptr<Controller> pid)
160 {
161     _thermals.push_back(std::move(pid));
162 }
163 
164 double DbusPidZone::getCachedValue(const std::string& name)
165 {
166     return _cachedValuesByName.at(name).scaled;
167 }
168 
169 ValueCacheEntry DbusPidZone::getCachedValues(const std::string& name)
170 {
171     return _cachedValuesByName.at(name);
172 }
173 
174 void DbusPidZone::setOutputCache(std::string_view name,
175                                  const ValueCacheEntry& values)
176 {
177     _cachedFanOutputs[std::string{name}] = values;
178 }
179 
180 void DbusPidZone::addFanInput(const std::string& fan)
181 {
182     _fanInputs.push_back(fan);
183 }
184 
185 void DbusPidZone::addThermalInput(const std::string& therm)
186 {
187     _thermalInputs.push_back(therm);
188 }
189 
190 // Updates desired RPM setpoint from optional text file
191 // Returns true if rpmValue updated, false if left unchanged
192 static bool fileParseRpm(const std::string& fileName, double& rpmValue)
193 {
194     static constexpr std::chrono::seconds throttlePace{3};
195 
196     std::string errText;
197 
198     try
199     {
200         std::ifstream ifs;
201         ifs.open(fileName);
202         if (ifs)
203         {
204             int value;
205             ifs >> value;
206 
207             if (value <= 0)
208             {
209                 errText = "File content could not be parsed to a number";
210             }
211             else if (value <= 100)
212             {
213                 errText = "File must contain RPM value, not PWM value";
214             }
215             else
216             {
217                 rpmValue = static_cast<double>(value);
218                 return true;
219             }
220         }
221     }
222     catch (const std::exception& e)
223     {
224         errText = "Exception: ";
225         errText += e.what();
226     }
227 
228     // The file is optional, intentionally not an error if file not found
229     if (!(errText.empty()))
230     {
231         tstamp now = std::chrono::high_resolution_clock::now();
232         if (allowThrottle(now, throttlePace))
233         {
234             std::cerr << "Unable to read from '" << fileName << "': " << errText
235                       << "\n";
236         }
237     }
238 
239     return false;
240 }
241 
242 void DbusPidZone::determineMaxSetPointRequest(void)
243 {
244     std::vector<double>::iterator result;
245     double minThermalThreshold = getMinThermalSetPoint();
246 
247     if (_RPMCeilings.size() > 0)
248     {
249         result = std::min_element(_RPMCeilings.begin(), _RPMCeilings.end());
250         // if Max set point is larger than the lowest ceiling, reset to lowest
251         // ceiling.
252         if (*result < _maximumSetPoint)
253         {
254             _maximumSetPoint = *result;
255             // When using lowest ceiling, controller name is ceiling.
256             _maximumSetPointName = "Ceiling";
257         }
258     }
259 
260     /*
261      * If the maximum RPM setpoint output is below the minimum RPM
262      * setpoint, set it to the minimum.
263      */
264     if (minThermalThreshold >= _maximumSetPoint)
265     {
266         _maximumSetPoint = minThermalThreshold;
267         _maximumSetPointName = "";
268     }
269     else if (_maximumSetPointName.compare(_maximumSetPointNamePrev))
270     {
271         std::cerr << "PID Zone " << _zoneId << " max SetPoint "
272                   << _maximumSetPoint << " requested by "
273                   << _maximumSetPointName;
274         for (const auto& sensor : _failSafeSensors)
275         {
276             if (sensor.find("Fan") == std::string::npos)
277             {
278                 std::cerr << " " << sensor;
279             }
280         }
281         std::cerr << "\n";
282         _maximumSetPointNamePrev.assign(_maximumSetPointName);
283     }
284     if (tuningEnabled)
285     {
286         /*
287          * We received no setpoints from thermal sensors.
288          * This is a case experienced during tuning where they only specify
289          * fan sensors and one large fan PID for all the fans.
290          */
291         static constexpr auto setpointpath = "/etc/thermal.d/setpoint";
292 
293         fileParseRpm(setpointpath, _maximumSetPoint);
294 
295         // Allow per-zone setpoint files to override overall setpoint file
296         std::ostringstream zoneSuffix;
297         zoneSuffix << ".zone" << _zoneId;
298         std::string zoneSetpointPath = setpointpath + zoneSuffix.str();
299 
300         fileParseRpm(zoneSetpointPath, _maximumSetPoint);
301     }
302     return;
303 }
304 
305 void DbusPidZone::initializeLog(void)
306 {
307     /* Print header for log file:
308      * epoch_ms,setpt,fan1,fan1_raw,fan1_pwm,fan1_pwm_raw,fan2,fan2_raw,fan2_pwm,fan2_pwm_raw,fanN,fanN_raw,fanN_pwm,fanN_pwm_raw,sensor1,sensor1_raw,sensor2,sensor2_raw,sensorN,sensorN_raw,failsafe
309      */
310 
311     _log << "epoch_ms,setpt,requester";
312 
313     for (const auto& f : _fanInputs)
314     {
315         _log << "," << f << "," << f << "_raw";
316         _log << "," << f << "_pwm," << f << "_pwm_raw";
317     }
318     for (const auto& t : _thermalInputs)
319     {
320         _log << "," << t << "," << t << "_raw";
321     }
322 
323     _log << ",failsafe";
324     _log << std::endl;
325 }
326 
327 void DbusPidZone::writeLog(const std::string& value)
328 {
329     _log << value;
330 }
331 
332 /*
333  * TODO(venture) This is effectively updating the cache and should check if the
334  * values they're using to update it are new or old, or whatnot.  For instance,
335  * if we haven't heard from the host in X time we need to detect this failure.
336  *
337  * I haven't decided if the Sensor should have a lastUpdated method or whether
338  * that should be for the ReadInterface or etc...
339  */
340 
341 /**
342  * We want the PID loop to run with values cached, so this will get all the
343  * fan tachs for the loop.
344  */
345 void DbusPidZone::updateFanTelemetry(void)
346 {
347     /* TODO(venture): Should I just make _log point to /dev/null when logging
348      * is disabled?  I think it's a waste to try and log things even if the
349      * data is just being dropped though.
350      */
351     tstamp now = std::chrono::high_resolution_clock::now();
352     if (loggingEnabled)
353     {
354         _log << std::chrono::duration_cast<std::chrono::milliseconds>(
355                     now.time_since_epoch())
356                     .count();
357         _log << "," << _maximumSetPoint;
358         _log << "," << _maximumSetPointName;
359     }
360 
361     for (const auto& f : _fanInputs)
362     {
363         auto sensor = _mgr.getSensor(f);
364         ReadReturn r = sensor->read();
365         _cachedValuesByName[f] = {r.value, r.unscaled};
366         int64_t timeout = sensor->getTimeout();
367         tstamp then = r.updated;
368 
369         auto duration =
370             std::chrono::duration_cast<std::chrono::seconds>(now - then)
371                 .count();
372         auto period = std::chrono::seconds(timeout).count();
373         /*
374          * TODO(venture): We should check when these were last read.
375          * However, these are the fans, so if I'm not getting updated values
376          * for them... what should I do?
377          */
378         if (loggingEnabled)
379         {
380             const auto& v = _cachedValuesByName[f];
381             _log << "," << v.scaled << "," << v.unscaled;
382             const auto& p = _cachedFanOutputs[f];
383             _log << "," << p.scaled << "," << p.unscaled;
384         }
385 
386         if (debugEnabled)
387         {
388             std::cerr << f << " fan sensor reading: " << r.value << "\n";
389         }
390 
391         // check if fan fail.
392         if (sensor->getFailed())
393         {
394             _failSafeSensors.insert(f);
395             if (debugEnabled)
396             {
397                 std::cerr << f << " fan sensor get failed\n";
398             }
399         }
400         else if (timeout != 0 && duration >= period)
401         {
402             _failSafeSensors.insert(f);
403             if (debugEnabled)
404             {
405                 std::cerr << f << " fan sensor timeout\n";
406             }
407         }
408         else
409         {
410             // Check if it's in there: remove it.
411             auto kt = _failSafeSensors.find(f);
412             if (kt != _failSafeSensors.end())
413             {
414                 if (debugEnabled)
415                 {
416                     std::cerr << f << " is erased from failsafe sensor set\n";
417                 }
418                 _failSafeSensors.erase(kt);
419             }
420         }
421     }
422 
423     if (loggingEnabled)
424     {
425         for (const auto& t : _thermalInputs)
426         {
427             const auto& v = _cachedValuesByName[t];
428             _log << "," << v.scaled << "," << v.unscaled;
429         }
430     }
431 
432     return;
433 }
434 
435 void DbusPidZone::updateSensors(void)
436 {
437     using namespace std::chrono;
438     /* margin and temp are stored as temp */
439     tstamp now = high_resolution_clock::now();
440 
441     for (const auto& t : _thermalInputs)
442     {
443         auto sensor = _mgr.getSensor(t);
444         ReadReturn r = sensor->read();
445         int64_t timeout = sensor->getTimeout();
446 
447         _cachedValuesByName[t] = {r.value, r.unscaled};
448         tstamp then = r.updated;
449 
450         auto duration = duration_cast<std::chrono::seconds>(now - then).count();
451         auto period = std::chrono::seconds(timeout).count();
452 
453         if (debugEnabled)
454         {
455             std::cerr << t << " temperature sensor reading: " << r.value
456                       << "\n";
457         }
458 
459         if (sensor->getFailed())
460         {
461             _failSafeSensors.insert(t);
462             if (debugEnabled)
463             {
464                 std::cerr << t << " temperature sensor get failed\n";
465             }
466         }
467         else if (timeout != 0 && duration >= period)
468         {
469             // std::cerr << "Entering fail safe mode.\n";
470             _failSafeSensors.insert(t);
471             if (debugEnabled)
472             {
473                 std::cerr << t << " temperature sensor get timeout\n";
474             }
475         }
476         else
477         {
478             // Check if it's in there: remove it.
479             auto kt = _failSafeSensors.find(t);
480             if (kt != _failSafeSensors.end())
481             {
482                 if (debugEnabled)
483                 {
484                     std::cerr << t << " is erased from failsafe sensor set\n";
485                 }
486                 _failSafeSensors.erase(kt);
487             }
488         }
489     }
490 
491     return;
492 }
493 
494 void DbusPidZone::initializeCache(void)
495 {
496     auto nan = std::numeric_limits<double>::quiet_NaN();
497 
498     for (const auto& f : _fanInputs)
499     {
500         _cachedValuesByName[f] = {nan, nan};
501         _cachedFanOutputs[f] = {nan, nan};
502 
503         // Start all fans in fail-safe mode.
504         _failSafeSensors.insert(f);
505     }
506 
507     for (const auto& t : _thermalInputs)
508     {
509         _cachedValuesByName[t] = {nan, nan};
510 
511         // Start all sensors in fail-safe mode.
512         _failSafeSensors.insert(t);
513     }
514 }
515 
516 void DbusPidZone::dumpCache(void)
517 {
518     std::cerr << "Cache values now: \n";
519     for (const auto& [name, value] : _cachedValuesByName)
520     {
521         std::cerr << name << ": " << value.scaled << " " << value.unscaled
522                   << "\n";
523     }
524 
525     std::cerr << "Fan outputs now: \n";
526     for (const auto& [name, value] : _cachedFanOutputs)
527     {
528         std::cerr << name << ": " << value.scaled << " " << value.unscaled
529                   << "\n";
530     }
531 }
532 
533 void DbusPidZone::processFans(void)
534 {
535     for (auto& p : _fans)
536     {
537         p->process();
538     }
539 
540     if (_redundantWrite)
541     {
542         // This is only needed once
543         _redundantWrite = false;
544     }
545 }
546 
547 void DbusPidZone::processThermals(void)
548 {
549     for (auto& p : _thermals)
550     {
551         p->process();
552     }
553 }
554 
555 Sensor* DbusPidZone::getSensor(const std::string& name)
556 {
557     return _mgr.getSensor(name);
558 }
559 
560 bool DbusPidZone::getRedundantWrite(void) const
561 {
562     return _redundantWrite;
563 }
564 
565 bool DbusPidZone::manual(bool value)
566 {
567     std::cerr << "manual: " << value << std::endl;
568     setManualMode(value);
569     return ModeObject::manual(value);
570 }
571 
572 bool DbusPidZone::failSafe() const
573 {
574     return getFailSafeMode();
575 }
576 
577 } // namespace pid_control
578