xref: /openbmc/phosphor-pid-control/pid/zone.cpp (revision 6df8bb5086b29c43217596b194dda7fbc4e3ec4a)
1 /**
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* Configuration. */
18 #include "zone.hpp"
19 
20 #include "conf.hpp"
21 #include "failsafeloggers/failsafe_logger_utility.hpp"
22 #include "pid/controller.hpp"
23 #include "pid/ec/pid.hpp"
24 #include "pid/fancontroller.hpp"
25 #include "pid/stepwisecontroller.hpp"
26 #include "pid/thermalcontroller.hpp"
27 #include "pid/tuning.hpp"
28 
29 #include <algorithm>
30 #include <chrono>
31 #include <cstring>
32 #include <fstream>
33 #include <iostream>
34 #include <memory>
35 #include <sstream>
36 #include <string>
37 
38 using tstamp = std::chrono::high_resolution_clock::time_point;
39 using namespace std::literals::chrono_literals;
40 
41 // Enforces minimum duration between events
42 // Rreturns true if event should be allowed, false if disallowed
allowThrottle(const tstamp & now,const std::chrono::seconds & pace)43 bool allowThrottle(const tstamp& now, const std::chrono::seconds& pace)
44 {
45     static tstamp then;
46     static bool first = true;
47 
48     if (first)
49     {
50         // Special case initialization
51         then = now;
52         first = false;
53 
54         // Initialization, always allow
55         return true;
56     }
57 
58     auto elapsed = now - then;
59     if (elapsed < pace)
60     {
61         // Too soon since last time, disallow
62         return false;
63     }
64 
65     // It has been long enough, allow
66     then = now;
67     return true;
68 }
69 
70 namespace pid_control
71 {
72 
getMaxSetPointRequest(void) const73 double DbusPidZone::getMaxSetPointRequest(void) const
74 {
75     return _maximumSetPoint;
76 }
77 
getManualMode(void) const78 bool DbusPidZone::getManualMode(void) const
79 {
80     return _manualMode;
81 }
82 
setManualMode(bool mode)83 void DbusPidZone::setManualMode(bool mode)
84 {
85     _manualMode = mode;
86 
87     // If returning to automatic mode, need to restore PWM from PID loop
88     if (!mode)
89     {
90         _redundantWrite = true;
91     }
92 }
93 
getFailSafeMode(void) const94 bool DbusPidZone::getFailSafeMode(void) const
95 {
96     // If any keys are present at least one sensor is in fail safe mode.
97     return !_failSafeSensors.empty();
98 }
99 
markSensorMissing(const std::string & name)100 void DbusPidZone::markSensorMissing(const std::string& name)
101 {
102     if (_missingAcceptable.find(name) != _missingAcceptable.end())
103     {
104         // Disallow sensors in MissingIsAcceptable list from causing failsafe
105         outputFailsafeLogWithZone(_zoneId, this->getFailSafeMode(), name,
106                                   "The sensor is missing but is acceptable.");
107         return;
108     }
109 
110     if (_sensorFailSafePercent[name] == 0)
111     {
112         _failSafeSensors[name] = _zoneFailSafePercent;
113     }
114     else
115     {
116         _failSafeSensors[name] = _sensorFailSafePercent[name];
117     }
118 
119     if (debugEnabled)
120     {
121         std::cerr << "Sensor " << name << " marked missing\n";
122     }
123 }
124 
getZoneID(void) const125 int64_t DbusPidZone::getZoneID(void) const
126 {
127     return _zoneId;
128 }
129 
addSetPoint(double setPoint,const std::string & name)130 void DbusPidZone::addSetPoint(double setPoint, const std::string& name)
131 {
132     /* exclude disabled pidloop from _maximumSetPoint calculation*/
133     if (!isPidProcessEnabled(name))
134     {
135         return;
136     }
137 
138     auto profileName = name;
139     if (getAccSetPoint())
140     {
141         /*
142          * If the name of controller is Linear_Temp_CPU0.
143          * The profile name will be Temp_CPU0.
144          */
145         profileName = name.substr(name.find("_") + 1);
146         _SetPoints[profileName] += setPoint;
147     }
148     else
149     {
150         if (_SetPoints[profileName] < setPoint)
151         {
152             _SetPoints[profileName] = setPoint;
153         }
154     }
155 
156     /*
157      * if there are multiple thermal controllers with the same
158      * value, pick the first one in the iterator
159      */
160     if (_maximumSetPoint < _SetPoints[profileName])
161     {
162         _maximumSetPoint = _SetPoints[profileName];
163         _maximumSetPointName = profileName;
164     }
165 }
166 
addRPMCeiling(double ceiling)167 void DbusPidZone::addRPMCeiling(double ceiling)
168 {
169     _RPMCeilings.push_back(ceiling);
170 }
171 
clearRPMCeilings(void)172 void DbusPidZone::clearRPMCeilings(void)
173 {
174     _RPMCeilings.clear();
175 }
176 
clearSetPoints(void)177 void DbusPidZone::clearSetPoints(void)
178 {
179     _SetPoints.clear();
180     _maximumSetPoint = 0;
181     _maximumSetPointName.clear();
182 }
183 
getFailSafePercent(void)184 double DbusPidZone::getFailSafePercent(void)
185 {
186     std::map<std::string, double>::iterator maxData = std::max_element(
187         _failSafeSensors.begin(), _failSafeSensors.end(),
188         [](const std::pair<std::string, double> firstData,
189            const std::pair<std::string, double> secondData) {
190             return firstData.second < secondData.second;
191         });
192 
193     // In dbus/dbusconfiguration.cpp, the default sensor failsafepercent is 0 if
194     // there is no setting in json.
195     // Therfore, if the max failsafe duty in _failSafeSensors is 0, set final
196     // failsafe duty to _zoneFailSafePercent.
197     if ((*maxData).second == 0)
198     {
199         return _zoneFailSafePercent;
200     }
201     else
202     {
203         return (*maxData).second;
204     }
205 }
206 
getMinThermalSetPoint(void) const207 double DbusPidZone::getMinThermalSetPoint(void) const
208 {
209     return _minThermalOutputSetPt;
210 }
211 
getCycleIntervalTime(void) const212 uint64_t DbusPidZone::getCycleIntervalTime(void) const
213 {
214     return _cycleTime.cycleIntervalTimeMS;
215 }
216 
getUpdateThermalsCycle(void) const217 uint64_t DbusPidZone::getUpdateThermalsCycle(void) const
218 {
219     return _cycleTime.updateThermalsTimeMS;
220 }
221 
addFanPID(std::unique_ptr<Controller> pid)222 void DbusPidZone::addFanPID(std::unique_ptr<Controller> pid)
223 {
224     _fans.push_back(std::move(pid));
225 }
226 
addThermalPID(std::unique_ptr<Controller> pid)227 void DbusPidZone::addThermalPID(std::unique_ptr<Controller> pid)
228 {
229     _thermals.push_back(std::move(pid));
230 }
231 
getCachedValue(const std::string & name)232 double DbusPidZone::getCachedValue(const std::string& name)
233 {
234     return _cachedValuesByName.at(name).scaled;
235 }
236 
getCachedValues(const std::string & name)237 ValueCacheEntry DbusPidZone::getCachedValues(const std::string& name)
238 {
239     return _cachedValuesByName.at(name);
240 }
241 
setOutputCache(std::string_view name,const ValueCacheEntry & values)242 void DbusPidZone::setOutputCache(std::string_view name,
243                                  const ValueCacheEntry& values)
244 {
245     _cachedFanOutputs[std::string{name}] = values;
246 }
247 
addFanInput(const std::string & fan,bool missingAcceptable)248 void DbusPidZone::addFanInput(const std::string& fan, bool missingAcceptable)
249 {
250     _fanInputs.push_back(fan);
251 
252     if (missingAcceptable)
253     {
254         _missingAcceptable.emplace(fan);
255     }
256 }
257 
addThermalInput(const std::string & therm,bool missingAcceptable)258 void DbusPidZone::addThermalInput(const std::string& therm,
259                                   bool missingAcceptable)
260 {
261     /*
262      * One sensor may have stepwise and PID at the same time.
263      * Searching the sensor name before inserting it to avoid duplicated sensor
264      * names.
265      */
266     if (std::find(_thermalInputs.begin(), _thermalInputs.end(), therm) ==
267         _thermalInputs.end())
268     {
269         _thermalInputs.push_back(therm);
270     }
271 
272     if (missingAcceptable)
273     {
274         _missingAcceptable.emplace(therm);
275     }
276 }
277 
278 // Updates desired RPM setpoint from optional text file
279 // Returns true if rpmValue updated, false if left unchanged
fileParseRpm(const std::string & fileName,double & rpmValue)280 static bool fileParseRpm(const std::string& fileName, double& rpmValue)
281 {
282     static constexpr std::chrono::seconds throttlePace{3};
283 
284     std::string errText;
285 
286     try
287     {
288         std::ifstream ifs;
289         ifs.open(fileName);
290         if (ifs)
291         {
292             int value;
293             ifs >> value;
294 
295             if (value <= 0)
296             {
297                 errText = "File content could not be parsed to a number";
298             }
299             else if (value <= 100)
300             {
301                 errText = "File must contain RPM value, not PWM value";
302             }
303             else
304             {
305                 rpmValue = static_cast<double>(value);
306                 return true;
307             }
308         }
309     }
310     catch (const std::exception& e)
311     {
312         errText = "Exception: ";
313         errText += e.what();
314     }
315 
316     // The file is optional, intentionally not an error if file not found
317     if (!(errText.empty()))
318     {
319         tstamp now = std::chrono::high_resolution_clock::now();
320         if (allowThrottle(now, throttlePace))
321         {
322             std::cerr << "Unable to read from '" << fileName << "': " << errText
323                       << "\n";
324         }
325     }
326 
327     return false;
328 }
329 
determineMaxSetPointRequest(void)330 void DbusPidZone::determineMaxSetPointRequest(void)
331 {
332     std::vector<double>::iterator result;
333     double minThermalThreshold = getMinThermalSetPoint();
334 
335     if (_RPMCeilings.size() > 0)
336     {
337         result = std::min_element(_RPMCeilings.begin(), _RPMCeilings.end());
338         // if Max set point is larger than the lowest ceiling, reset to lowest
339         // ceiling.
340         if (*result < _maximumSetPoint)
341         {
342             _maximumSetPoint = *result;
343             // When using lowest ceiling, controller name is ceiling.
344             _maximumSetPointName = "Ceiling";
345         }
346     }
347 
348     /*
349      * Combine the maximum SetPoint Name if the controllers have same profile
350      * name. e.g., PID_BB_INLET_TEMP_C + Stepwise_BB_INLET_TEMP_C.
351      */
352     if (getAccSetPoint())
353     {
354         auto profileName = _maximumSetPointName;
355         _maximumSetPointName = "";
356 
357         for (auto& p : _thermals)
358         {
359             auto controllerID = p->getID();
360             auto found = controllerID.find(profileName);
361             if (found != std::string::npos)
362             {
363                 if (_maximumSetPointName.empty())
364                 {
365                     _maximumSetPointName = controllerID;
366                 }
367                 else
368                 {
369                     _maximumSetPointName += " + " + controllerID;
370                 }
371             }
372         }
373     }
374 
375     /*
376      * If the maximum RPM setpoint output is below the minimum RPM
377      * setpoint, set it to the minimum.
378      */
379     if (minThermalThreshold >= _maximumSetPoint)
380     {
381         _maximumSetPoint = minThermalThreshold;
382         _maximumSetPointName = "Minimum";
383     }
384     else if (_maximumSetPointName.compare(_maximumSetPointNamePrev))
385     {
386         std::cerr << "PID Zone " << _zoneId << " max SetPoint "
387                   << _maximumSetPoint << " requested by "
388                   << _maximumSetPointName;
389         for (const auto& sensor : _failSafeSensors)
390         {
391             if (sensor.first.find("Fan") == std::string::npos)
392             {
393                 std::cerr << " " << sensor.first;
394             }
395         }
396         std::cerr << "\n";
397         _maximumSetPointNamePrev.assign(_maximumSetPointName);
398     }
399     if (tuningEnabled)
400     {
401         /*
402          * We received no setpoints from thermal sensors.
403          * This is a case experienced during tuning where they only specify
404          * fan sensors and one large fan PID for all the fans.
405          */
406         static constexpr auto setpointpath = "/etc/thermal.d/setpoint";
407 
408         fileParseRpm(setpointpath, _maximumSetPoint);
409 
410         // Allow per-zone setpoint files to override overall setpoint file
411         std::ostringstream zoneSuffix;
412         zoneSuffix << ".zone" << _zoneId;
413         std::string zoneSetpointPath = setpointpath + zoneSuffix.str();
414 
415         fileParseRpm(zoneSetpointPath, _maximumSetPoint);
416     }
417     return;
418 }
419 
initializeLog(void)420 void DbusPidZone::initializeLog(void)
421 {
422     /* Print header for log file:
423      * epoch_ms,setpt,fan1,fan1_raw,fan1_pwm,fan1_pwm_raw,fan2,fan2_raw,fan2_pwm,fan2_pwm_raw,fanN,fanN_raw,fanN_pwm,fanN_pwm_raw,sensor1,sensor1_raw,sensor2,sensor2_raw,sensorN,sensorN_raw,failsafe
424      */
425 
426     _log << "epoch_ms,setpt,requester";
427 
428     for (const auto& f : _fanInputs)
429     {
430         _log << "," << f << "," << f << "_raw";
431         _log << "," << f << "_pwm," << f << "_pwm_raw";
432     }
433     for (const auto& t : _thermalInputs)
434     {
435         _log << "," << t << "," << t << "_raw";
436     }
437 
438     _log << ",failsafe";
439     _log << std::endl;
440 }
441 
writeLog(const std::string & value)442 void DbusPidZone::writeLog(const std::string& value)
443 {
444     _log << value;
445 }
446 
447 /*
448  * TODO(venture) This is effectively updating the cache and should check if the
449  * values they're using to update it are new or old, or whatnot.  For instance,
450  * if we haven't heard from the host in X time we need to detect this failure.
451  *
452  * I haven't decided if the Sensor should have a lastUpdated method or whether
453  * that should be for the ReadInterface or etc...
454  */
455 
456 /**
457  * We want the PID loop to run with values cached, so this will get all the
458  * fan tachs for the loop.
459  */
updateFanTelemetry(void)460 void DbusPidZone::updateFanTelemetry(void)
461 {
462     /* TODO(venture): Should I just make _log point to /dev/null when logging
463      * is disabled?  I think it's a waste to try and log things even if the
464      * data is just being dropped though.
465      */
466     const auto now = std::chrono::high_resolution_clock::now();
467     if (loggingEnabled)
468     {
469         _log << std::chrono::duration_cast<std::chrono::milliseconds>(
470                     now.time_since_epoch())
471                     .count();
472         _log << "," << _maximumSetPoint;
473         _log << "," << _maximumSetPointName;
474     }
475 
476     processSensorInputs</* fanSensorLogging */ true>(_fanInputs, now);
477 
478     if (loggingEnabled)
479     {
480         for (const auto& t : _thermalInputs)
481         {
482             const auto& v = _cachedValuesByName[t];
483             _log << "," << v.scaled << "," << v.unscaled;
484         }
485     }
486 
487     return;
488 }
489 
updateSensors(void)490 void DbusPidZone::updateSensors(void)
491 {
492     processSensorInputs</* fanSensorLogging */ false>(
493         _thermalInputs, std::chrono::high_resolution_clock::now());
494 
495     return;
496 }
497 
initializeCache(void)498 void DbusPidZone::initializeCache(void)
499 {
500     auto nan = std::numeric_limits<double>::quiet_NaN();
501 
502     for (const auto& f : _fanInputs)
503     {
504         _cachedValuesByName[f] = {nan, nan};
505         _cachedFanOutputs[f] = {nan, nan};
506 
507         // Start all fans in fail-safe mode.
508         markSensorMissing(f);
509     }
510 
511     for (const auto& t : _thermalInputs)
512     {
513         _cachedValuesByName[t] = {nan, nan};
514 
515         // Start all sensors in fail-safe mode.
516         markSensorMissing(t);
517     }
518 }
519 
dumpCache(void)520 void DbusPidZone::dumpCache(void)
521 {
522     std::cerr << "Cache values now: \n";
523     for (const auto& [name, value] : _cachedValuesByName)
524     {
525         std::cerr << name << ": " << value.scaled << " " << value.unscaled
526                   << "\n";
527     }
528 
529     std::cerr << "Fan outputs now: \n";
530     for (const auto& [name, value] : _cachedFanOutputs)
531     {
532         std::cerr << name << ": " << value.scaled << " " << value.unscaled
533                   << "\n";
534     }
535 }
536 
processFans(void)537 void DbusPidZone::processFans(void)
538 {
539     for (auto& p : _fans)
540     {
541         p->process();
542     }
543 
544     if (_redundantWrite)
545     {
546         // This is only needed once
547         _redundantWrite = false;
548     }
549 }
550 
processThermals(void)551 void DbusPidZone::processThermals(void)
552 {
553     for (auto& p : _thermals)
554     {
555         p->process();
556     }
557 }
558 
getSensor(const std::string & name)559 Sensor* DbusPidZone::getSensor(const std::string& name)
560 {
561     return _mgr.getSensor(name);
562 }
563 
getSensorNames(void)564 std::vector<std::string> DbusPidZone::getSensorNames(void)
565 {
566     return _thermalInputs;
567 }
568 
getRedundantWrite(void) const569 bool DbusPidZone::getRedundantWrite(void) const
570 {
571     return _redundantWrite;
572 }
573 
manual(bool value)574 bool DbusPidZone::manual(bool value)
575 {
576     std::cerr << "manual: " << value << std::endl;
577     setManualMode(value);
578     return ModeObject::manual(value);
579 }
580 
failSafe() const581 bool DbusPidZone::failSafe() const
582 {
583     return getFailSafeMode();
584 }
585 
addPidControlProcess(std::string name,std::string type,double setpoint,sdbusplus::bus_t & bus,std::string objPath,bool defer)586 void DbusPidZone::addPidControlProcess(std::string name, std::string type,
587                                        double setpoint, sdbusplus::bus_t& bus,
588                                        std::string objPath, bool defer)
589 {
590     _pidsControlProcess[name] = std::make_unique<ProcessObject>(
591         bus, objPath.c_str(),
592         defer ? ProcessObject::action::defer_emit
593               : ProcessObject::action::emit_object_added);
594     // Default enable setting = true
595     _pidsControlProcess[name]->enabled(true);
596     _pidsControlProcess[name]->setpoint(setpoint);
597 
598     if (type == "temp")
599     {
600         _pidsControlProcess[name]->classType("Temperature");
601     }
602     else if (type == "margin")
603     {
604         _pidsControlProcess[name]->classType("Margin");
605     }
606     else if (type == "power")
607     {
608         _pidsControlProcess[name]->classType("Power");
609     }
610     else if (type == "powersum")
611     {
612         _pidsControlProcess[name]->classType("PowerSum");
613     }
614 }
615 
isPidProcessEnabled(std::string name)616 bool DbusPidZone::isPidProcessEnabled(std::string name)
617 {
618     return _pidsControlProcess[name]->enabled();
619 }
620 
addPidFailSafePercent(std::vector<std::string> inputs,double percent)621 void DbusPidZone::addPidFailSafePercent(std::vector<std::string> inputs,
622                                         double percent)
623 {
624     for (const auto& sensorName : inputs)
625     {
626         if (_sensorFailSafePercent.find(sensorName) !=
627             _sensorFailSafePercent.end())
628         {
629             _sensorFailSafePercent[sensorName] =
630                 std::max(_sensorFailSafePercent[sensorName], percent);
631             if (debugEnabled)
632             {
633                 std::cerr << "Sensor " << sensorName
634                           << " failsafe percent updated to "
635                           << _sensorFailSafePercent[sensorName] << "\n";
636             }
637         }
638         else
639         {
640             _sensorFailSafePercent[sensorName] = percent;
641             if (debugEnabled)
642             {
643                 std::cerr << "Sensor " << sensorName
644                           << " failsafe percent set to " << percent << "\n";
645             }
646         }
647     }
648 }
649 
leader() const650 std::string DbusPidZone::leader() const
651 {
652     return _maximumSetPointName;
653 }
654 
updateThermalPowerDebugInterface(std::string pidName,std::string leader,double input,double output)655 void DbusPidZone::updateThermalPowerDebugInterface(
656     std::string pidName, std::string leader, double input, double output)
657 {
658     if (leader.empty())
659     {
660         _pidsControlProcess[pidName]->output(output);
661     }
662     else
663     {
664         _pidsControlProcess[pidName]->leader(leader);
665         _pidsControlProcess[pidName]->input(input);
666     }
667 }
668 
getAccSetPoint(void) const669 bool DbusPidZone::getAccSetPoint(void) const
670 {
671     return _accumulateSetPoint;
672 }
673 
674 } // namespace pid_control
675