xref: /openbmc/phosphor-pid-control/pid/zone.cpp (revision d2768c5703832aa06eaca57020c212dc8161c787)
1 /**
2  * Copyright 2017 Google Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /* Configuration. */
18 #include "zone.hpp"
19 
20 #include "conf.hpp"
21 #include "failsafeloggers/failsafe_logger_utility.hpp"
22 #include "pid/controller.hpp"
23 #include "pid/ec/pid.hpp"
24 #include "pid/fancontroller.hpp"
25 #include "pid/stepwisecontroller.hpp"
26 #include "pid/thermalcontroller.hpp"
27 #include "pid/tuning.hpp"
28 
29 #include <algorithm>
30 #include <chrono>
31 #include <cstring>
32 #include <fstream>
33 #include <iostream>
34 #include <memory>
35 #include <sstream>
36 #include <string>
37 
38 using tstamp = std::chrono::high_resolution_clock::time_point;
39 using namespace std::literals::chrono_literals;
40 
41 // Enforces minimum duration between events
42 // Rreturns true if event should be allowed, false if disallowed
43 bool allowThrottle(const tstamp& now, const std::chrono::seconds& pace)
44 {
45     static tstamp then;
46     static bool first = true;
47 
48     if (first)
49     {
50         // Special case initialization
51         then = now;
52         first = false;
53 
54         // Initialization, always allow
55         return true;
56     }
57 
58     auto elapsed = now - then;
59     if (elapsed < pace)
60     {
61         // Too soon since last time, disallow
62         return false;
63     }
64 
65     // It has been long enough, allow
66     then = now;
67     return true;
68 }
69 
70 namespace pid_control
71 {
72 
73 double DbusPidZone::getMaxSetPointRequest(void) const
74 {
75     return _maximumSetPoint;
76 }
77 
78 bool DbusPidZone::getManualMode(void) const
79 {
80     return _manualMode;
81 }
82 
83 void DbusPidZone::setManualMode(bool mode)
84 {
85     _manualMode = mode;
86 
87     // If returning to automatic mode, need to restore PWM from PID loop
88     if (!mode)
89     {
90         _redundantWrite = true;
91     }
92 }
93 
94 bool DbusPidZone::getFailSafeMode(void) const
95 {
96     // If any keys are present at least one sensor is in fail safe mode.
97     return !_failSafeSensors.empty();
98 }
99 
100 FailSafeSensorsMap DbusPidZone::getFailSafeSensors(void) const
101 {
102     return _failSafeSensors;
103 }
104 
105 void DbusPidZone::markSensorMissing(const std::string& name,
106                                     const std::string& failReason)
107 {
108     if (_missingAcceptable.find(name) != _missingAcceptable.end())
109     {
110         // Disallow sensors in MissingIsAcceptable list from causing failsafe
111         outputFailsafeLogWithZone(_zoneId, this->getFailSafeMode(), name,
112                                   "The sensor is missing but is acceptable.");
113         return;
114     }
115 
116     if (_sensorFailSafePercent[name] == 0)
117     {
118         _failSafeSensors[name] = std::pair(failReason, _zoneFailSafePercent);
119     }
120     else
121     {
122         _failSafeSensors[name] =
123             std::pair(failReason, _sensorFailSafePercent[name]);
124     }
125 
126     if (debugEnabled)
127     {
128         std::cerr << "Sensor " << name << " marked missing\n";
129     }
130 }
131 
132 int64_t DbusPidZone::getZoneID(void) const
133 {
134     return _zoneId;
135 }
136 
137 void DbusPidZone::addSetPoint(double setPoint, const std::string& name)
138 {
139     /* exclude disabled pidloop from _maximumSetPoint calculation*/
140     if (!isPidProcessEnabled(name))
141     {
142         return;
143     }
144 
145     auto profileName = name;
146     if (getAccSetPoint())
147     {
148         /*
149          * If the name of controller is Linear_Temp_CPU0.
150          * The profile name will be Temp_CPU0.
151          */
152         profileName = name.substr(name.find('_') + 1);
153         setPoints[profileName] += setPoint;
154     }
155     else
156     {
157         if (setPoints[profileName] < setPoint)
158         {
159             setPoints[profileName] = setPoint;
160         }
161     }
162 
163     /*
164      * if there are multiple thermal controllers with the same
165      * value, pick the first one in the iterator
166      */
167     if (_maximumSetPoint < setPoints[profileName])
168     {
169         _maximumSetPoint = setPoints[profileName];
170         _maximumSetPointName = profileName;
171     }
172 }
173 
174 void DbusPidZone::addRPMCeiling(double ceiling)
175 {
176     rpmCeilings.push_back(ceiling);
177 }
178 
179 void DbusPidZone::clearRPMCeilings(void)
180 {
181     rpmCeilings.clear();
182 }
183 
184 void DbusPidZone::clearSetPoints(void)
185 {
186     setPoints.clear();
187     _maximumSetPoint = 0;
188     _maximumSetPointName.clear();
189 }
190 
191 double DbusPidZone::getFailSafePercent(void)
192 {
193     if (_failSafeSensors.empty())
194     {
195         return _zoneFailSafePercent;
196     }
197 
198     FailSafeSensorsMap::iterator maxData = std::max_element(
199         _failSafeSensors.begin(), _failSafeSensors.end(),
200         [](const FailSafeSensorPair& firstData,
201            const FailSafeSensorPair& secondData) {
202             return firstData.second.second < secondData.second.second;
203         });
204 
205     // In dbus/dbusconfiguration.cpp, the default sensor failsafepercent is 0 if
206     // there is no setting in json.
207     // Therfore, if the max failsafe duty in _failSafeSensors is 0, set final
208     // failsafe duty to _zoneFailSafePercent.
209     if ((*maxData).second.second == 0)
210     {
211         return _zoneFailSafePercent;
212     }
213 
214     return (*maxData).second.second;
215 }
216 
217 double DbusPidZone::getMinThermalSetPoint(void) const
218 {
219     return _minThermalOutputSetPt;
220 }
221 
222 uint64_t DbusPidZone::getCycleIntervalTime(void) const
223 {
224     return _cycleTime.cycleIntervalTimeMS;
225 }
226 
227 uint64_t DbusPidZone::getUpdateThermalsCycle(void) const
228 {
229     return _cycleTime.updateThermalsTimeMS;
230 }
231 
232 void DbusPidZone::addFanPID(std::unique_ptr<Controller> pid)
233 {
234     _fans.push_back(std::move(pid));
235 }
236 
237 void DbusPidZone::addThermalPID(std::unique_ptr<Controller> pid)
238 {
239     _thermals.push_back(std::move(pid));
240 }
241 
242 double DbusPidZone::getCachedValue(const std::string& name)
243 {
244     return _cachedValuesByName.at(name).scaled;
245 }
246 
247 ValueCacheEntry DbusPidZone::getCachedValues(const std::string& name)
248 {
249     return _cachedValuesByName.at(name);
250 }
251 
252 void DbusPidZone::setOutputCache(std::string_view name,
253                                  const ValueCacheEntry& values)
254 {
255     _cachedFanOutputs[std::string{name}] = values;
256 }
257 
258 void DbusPidZone::addFanInput(const std::string& fan, bool missingAcceptable)
259 {
260     _fanInputs.push_back(fan);
261 
262     if (missingAcceptable)
263     {
264         _missingAcceptable.emplace(fan);
265     }
266 }
267 
268 void DbusPidZone::addThermalInput(const std::string& therm,
269                                   bool missingAcceptable)
270 {
271     /*
272      * One sensor may have stepwise and PID at the same time.
273      * Searching the sensor name before inserting it to avoid duplicated sensor
274      * names.
275      */
276     if (std::find(_thermalInputs.begin(), _thermalInputs.end(), therm) ==
277         _thermalInputs.end())
278     {
279         _thermalInputs.push_back(therm);
280     }
281 
282     if (missingAcceptable)
283     {
284         _missingAcceptable.emplace(therm);
285     }
286 }
287 
288 // Updates desired RPM setpoint from optional text file
289 // Returns true if rpmValue updated, false if left unchanged
290 static bool fileParseRpm(const std::string& fileName, double& rpmValue)
291 {
292     static constexpr std::chrono::seconds throttlePace{3};
293 
294     std::string errText;
295 
296     try
297     {
298         std::ifstream ifs;
299         ifs.open(fileName);
300         if (ifs)
301         {
302             int value;
303             ifs >> value;
304 
305             if (value <= 0)
306             {
307                 errText = "File content could not be parsed to a number";
308             }
309             else if (value <= 100)
310             {
311                 errText = "File must contain RPM value, not PWM value";
312             }
313             else
314             {
315                 rpmValue = static_cast<double>(value);
316                 return true;
317             }
318         }
319     }
320     catch (const std::exception& e)
321     {
322         errText = "Exception: ";
323         errText += e.what();
324     }
325 
326     // The file is optional, intentionally not an error if file not found
327     if (!(errText.empty()))
328     {
329         tstamp now = std::chrono::high_resolution_clock::now();
330         if (allowThrottle(now, throttlePace))
331         {
332             std::cerr << "Unable to read from '" << fileName << "': " << errText
333                       << "\n";
334         }
335     }
336 
337     return false;
338 }
339 
340 void DbusPidZone::determineMaxSetPointRequest(void)
341 {
342     std::vector<double>::iterator result;
343     double minThermalThreshold = getMinThermalSetPoint();
344 
345     if (rpmCeilings.size() > 0)
346     {
347         result = std::min_element(rpmCeilings.begin(), rpmCeilings.end());
348         // if Max set point is larger than the lowest ceiling, reset to lowest
349         // ceiling.
350         if (*result < _maximumSetPoint)
351         {
352             _maximumSetPoint = *result;
353             // When using lowest ceiling, controller name is ceiling.
354             _maximumSetPointName = "Ceiling";
355         }
356     }
357 
358     /*
359      * Combine the maximum SetPoint Name if the controllers have same profile
360      * name. e.g., PID_BB_INLET_TEMP_C + Stepwise_BB_INLET_TEMP_C.
361      */
362     if (getAccSetPoint())
363     {
364         auto profileName = _maximumSetPointName;
365         _maximumSetPointName = "";
366 
367         for (auto& p : _thermals)
368         {
369             auto controllerID = p->getID();
370             auto found = controllerID.find(profileName);
371             if (found != std::string::npos)
372             {
373                 if (_maximumSetPointName.empty())
374                 {
375                     _maximumSetPointName = controllerID;
376                 }
377                 else
378                 {
379                     _maximumSetPointName += " + " + controllerID;
380                 }
381             }
382         }
383     }
384 
385     /*
386      * If the maximum RPM setpoint output is below the minimum RPM
387      * setpoint, set it to the minimum.
388      */
389     if (minThermalThreshold >= _maximumSetPoint)
390     {
391         _maximumSetPoint = minThermalThreshold;
392         _maximumSetPointName = "Minimum";
393     }
394     else if (_maximumSetPointName.compare(_maximumSetPointNamePrev))
395     {
396         std::cerr << "PID Zone " << _zoneId << " max SetPoint "
397                   << _maximumSetPoint << " requested by "
398                   << _maximumSetPointName;
399         for (const auto& sensor : _failSafeSensors)
400         {
401             if (sensor.first.find("Fan") == std::string::npos)
402             {
403                 std::cerr << " " << sensor.first;
404             }
405         }
406         std::cerr << "\n";
407         _maximumSetPointNamePrev.assign(_maximumSetPointName);
408     }
409     if (tuningEnabled)
410     {
411         /*
412          * We received no setpoints from thermal sensors.
413          * This is a case experienced during tuning where they only specify
414          * fan sensors and one large fan PID for all the fans.
415          */
416         static constexpr auto setpointpath = "/etc/thermal.d/setpoint";
417 
418         fileParseRpm(setpointpath, _maximumSetPoint);
419 
420         // Allow per-zone setpoint files to override overall setpoint file
421         std::ostringstream zoneSuffix;
422         zoneSuffix << ".zone" << _zoneId;
423         std::string zoneSetpointPath = setpointpath + zoneSuffix.str();
424 
425         fileParseRpm(zoneSetpointPath, _maximumSetPoint);
426     }
427     return;
428 }
429 
430 void DbusPidZone::initializeLog(void)
431 {
432     /* Print header for log file:
433      * epoch_ms,setpt,fan1,fan1_raw,fan1_pwm,fan1_pwm_raw,fan2,fan2_raw,fan2_pwm,fan2_pwm_raw,fanN,fanN_raw,fanN_pwm,fanN_pwm_raw,sensor1,sensor1_raw,sensor2,sensor2_raw,sensorN,sensorN_raw,failsafe
434      */
435 
436     _log << "epoch_ms,setpt,requester";
437 
438     for (const auto& f : _fanInputs)
439     {
440         _log << "," << f << "," << f << "_raw";
441         _log << "," << f << "_pwm," << f << "_pwm_raw";
442     }
443     for (const auto& t : _thermalInputs)
444     {
445         _log << "," << t << "," << t << "_raw";
446     }
447 
448     _log << ",failsafe";
449     _log << std::endl;
450 }
451 
452 void DbusPidZone::writeLog(const std::string& value)
453 {
454     _log << value;
455 }
456 
457 /*
458  * TODO(venture) This is effectively updating the cache and should check if the
459  * values they're using to update it are new or old, or whatnot.  For instance,
460  * if we haven't heard from the host in X time we need to detect this failure.
461  *
462  * I haven't decided if the Sensor should have a lastUpdated method or whether
463  * that should be for the ReadInterface or etc...
464  */
465 
466 /**
467  * We want the PID loop to run with values cached, so this will get all the
468  * fan tachs for the loop.
469  */
470 void DbusPidZone::updateFanTelemetry(void)
471 {
472     /* TODO(venture): Should I just make _log point to /dev/null when logging
473      * is disabled?  I think it's a waste to try and log things even if the
474      * data is just being dropped though.
475      */
476     const auto now = std::chrono::high_resolution_clock::now();
477     if (loggingEnabled)
478     {
479         _log << std::chrono::duration_cast<std::chrono::milliseconds>(
480                     now.time_since_epoch())
481                     .count();
482         _log << "," << _maximumSetPoint;
483         _log << "," << _maximumSetPointName;
484     }
485 
486     processSensorInputs</* fanSensorLogging */ true>(_fanInputs, now);
487 
488     if (loggingEnabled)
489     {
490         for (const auto& t : _thermalInputs)
491         {
492             const auto& v = _cachedValuesByName[t];
493             _log << "," << v.scaled << "," << v.unscaled;
494         }
495     }
496 
497     return;
498 }
499 
500 void DbusPidZone::updateSensors(void)
501 {
502     processSensorInputs</* fanSensorLogging */ false>(
503         _thermalInputs, std::chrono::high_resolution_clock::now());
504 
505     return;
506 }
507 
508 void DbusPidZone::initializeCache(void)
509 {
510     auto nan = std::numeric_limits<double>::quiet_NaN();
511 
512     for (const auto& f : _fanInputs)
513     {
514         _cachedValuesByName[f] = {nan, nan};
515         _cachedFanOutputs[f] = {nan, nan};
516 
517         // Start all fans in fail-safe mode.
518         markSensorMissing(f, "");
519     }
520 
521     for (const auto& t : _thermalInputs)
522     {
523         _cachedValuesByName[t] = {nan, nan};
524 
525         // Start all sensors in fail-safe mode.
526         markSensorMissing(t, "");
527     }
528 }
529 
530 void DbusPidZone::dumpCache(void)
531 {
532     std::cerr << "Cache values now: \n";
533     for (const auto& [name, value] : _cachedValuesByName)
534     {
535         std::cerr << name << ": " << value.scaled << " " << value.unscaled
536                   << "\n";
537     }
538 
539     std::cerr << "Fan outputs now: \n";
540     for (const auto& [name, value] : _cachedFanOutputs)
541     {
542         std::cerr << name << ": " << value.scaled << " " << value.unscaled
543                   << "\n";
544     }
545 }
546 
547 void DbusPidZone::processFans(void)
548 {
549     for (auto& p : _fans)
550     {
551         p->process();
552     }
553 
554     if (_redundantWrite)
555     {
556         // This is only needed once
557         _redundantWrite = false;
558     }
559 }
560 
561 void DbusPidZone::processThermals(void)
562 {
563     for (auto& p : _thermals)
564     {
565         p->process();
566     }
567 }
568 
569 Sensor* DbusPidZone::getSensor(const std::string& name)
570 {
571     return _mgr.getSensor(name);
572 }
573 
574 std::vector<std::string> DbusPidZone::getSensorNames(void)
575 {
576     return _thermalInputs;
577 }
578 
579 bool DbusPidZone::getRedundantWrite(void) const
580 {
581     return _redundantWrite;
582 }
583 
584 bool DbusPidZone::manual(bool value)
585 {
586     std::cerr << "manual: " << value << std::endl;
587     setManualMode(value);
588     return ModeObject::manual(value);
589 }
590 
591 bool DbusPidZone::failSafe() const
592 {
593     return getFailSafeMode();
594 }
595 
596 void DbusPidZone::addPidControlProcess(
597     const std::string& name, const std::string& type, double setpoint,
598     sdbusplus::bus_t& bus, const std::string& objPath, bool defer)
599 {
600     _pidsControlProcess[name] = std::make_unique<ProcessObject>(
601         bus, objPath.c_str(),
602         defer ? ProcessObject::action::defer_emit
603               : ProcessObject::action::emit_object_added);
604     // Default enable setting = true
605     _pidsControlProcess[name]->enabled(true);
606     _pidsControlProcess[name]->setpoint(setpoint);
607 
608     if (type == "temp")
609     {
610         _pidsControlProcess[name]->classType("Temperature");
611     }
612     else if (type == "margin")
613     {
614         _pidsControlProcess[name]->classType("Margin");
615     }
616     else if (type == "power")
617     {
618         _pidsControlProcess[name]->classType("Power");
619     }
620     else if (type == "powersum")
621     {
622         _pidsControlProcess[name]->classType("PowerSum");
623     }
624 }
625 
626 bool DbusPidZone::isPidProcessEnabled(const std::string& name)
627 {
628     return _pidsControlProcess[name]->enabled();
629 }
630 
631 void DbusPidZone::addPidFailSafePercent(const std::vector<std::string>& inputs,
632                                         double percent)
633 {
634     for (const auto& sensorName : inputs)
635     {
636         if (_sensorFailSafePercent.find(sensorName) !=
637             _sensorFailSafePercent.end())
638         {
639             _sensorFailSafePercent[sensorName] =
640                 std::max(_sensorFailSafePercent[sensorName], percent);
641             if (debugEnabled)
642             {
643                 std::cerr << "Sensor " << sensorName
644                           << " failsafe percent updated to "
645                           << _sensorFailSafePercent[sensorName] << "\n";
646             }
647         }
648         else
649         {
650             _sensorFailSafePercent[sensorName] = percent;
651             if (debugEnabled)
652             {
653                 std::cerr << "Sensor " << sensorName
654                           << " failsafe percent set to " << percent << "\n";
655             }
656         }
657     }
658 }
659 
660 std::string DbusPidZone::leader() const
661 {
662     return _maximumSetPointName;
663 }
664 
665 void DbusPidZone::updateThermalPowerDebugInterface(
666     std::string pidName, std::string leader, double input, double output)
667 {
668     if (leader.empty())
669     {
670         _pidsControlProcess[pidName]->output(output);
671     }
672     else
673     {
674         _pidsControlProcess[pidName]->leader(leader);
675         _pidsControlProcess[pidName]->input(input);
676     }
677 }
678 
679 bool DbusPidZone::getAccSetPoint(void) const
680 {
681     return _accumulateSetPoint;
682 }
683 
684 } // namespace pid_control
685