1 // Copyright 2021 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "metric.hpp"
16 
17 #include "metricblob.pb.n.h"
18 
19 #include "util.hpp"
20 
21 #include <pb_encode.h>
22 #include <sys/statvfs.h>
23 
24 #include <phosphor-logging/log.hpp>
25 
26 #include <cstdint>
27 #include <filesystem>
28 #include <sstream>
29 #include <string>
30 #include <string_view>
31 
32 namespace metric_blob
33 {
34 
35 using phosphor::logging::entry;
36 using phosphor::logging::log;
37 using level = phosphor::logging::level;
38 
BmcHealthSnapshot()39 BmcHealthSnapshot::BmcHealthSnapshot() :
40     done(false), stringId(0), ticksPerSec(0)
41 {}
42 
43 template <typename T>
44 static constexpr auto pbEncodeStr =
45     [](pb_ostream_t* stream, const pb_field_iter_t* field,
__anon3af598100102(pb_ostream_t* stream, const pb_field_iter_t* field, void* const* arg) 46        void* const* arg) noexcept {
47         static_assert(sizeof(*std::declval<T>().data()) == sizeof(pb_byte_t));
48         const auto& s = *reinterpret_cast<const T*>(*arg);
49         return pb_encode_tag_for_field(stream, field) &&
50                pb_encode_string(stream,
51                                 reinterpret_cast<const pb_byte_t*>(s.data()),
52                                 s.size());
53     };
54 
55 template <typename T>
pbStrEncoder(const T & t)56 static pb_callback_t pbStrEncoder(const T& t) noexcept
57 {
58     return {{.encode = pbEncodeStr<T>}, const_cast<T*>(&t)};
59 }
60 
61 template <auto fields, typename T>
62 static constexpr auto pbEncodeSubs =
63     [](pb_ostream_t* stream, const pb_field_iter_t* field,
__anon3af598100202(pb_ostream_t* stream, const pb_field_iter_t* field, void* const* arg) 64        void* const* arg) noexcept {
65         for (const auto& sub : *reinterpret_cast<const std::vector<T>*>(*arg))
66         {
67             if (!pb_encode_tag_for_field(stream, field) ||
68                 !pb_encode_submessage(stream, fields, &sub))
69             {
70                 return false;
71             }
72         }
73         return true;
74     };
75 
76 template <auto fields, typename T>
pbSubsEncoder(const std::vector<T> & t)77 static pb_callback_t pbSubsEncoder(const std::vector<T>& t)
78 {
79     return {{.encode = pbEncodeSubs<fields, T>},
80             const_cast<std::vector<T>*>(&t)};
81 }
82 
83 struct ProcStatEntry
84 {
85     std::string cmdline;
86     std::string tcomm;
87     float utime;
88     float stime;
89 
90     // Processes with the longest utime + stime are ranked first.
91     // Tie breaking is done with cmdline then tcomm.
operator <metric_blob::ProcStatEntry92     bool operator<(const ProcStatEntry& other) const
93     {
94         const float negTime = -(utime + stime);
95         const float negOtherTime = -(other.utime + other.stime);
96         return std::tie(negTime, cmdline, tcomm) <
97                std::tie(negOtherTime, other.cmdline, other.tcomm);
98     }
99 };
100 
getProcStatMetric(BmcHealthSnapshot & obj,long ticksPerSec,std::vector<bmcmetrics_metricproto_BmcProcStatMetric_BmcProcStat> & procs,bool & use)101 static bmcmetrics_metricproto_BmcProcStatMetric getProcStatMetric(
102     BmcHealthSnapshot& obj, long ticksPerSec,
103     std::vector<bmcmetrics_metricproto_BmcProcStatMetric_BmcProcStat>& procs,
104     bool& use) noexcept
105 {
106     if (ticksPerSec == 0)
107     {
108         return {};
109     }
110     constexpr std::string_view procPath = "/proc/";
111 
112     std::vector<ProcStatEntry> entries;
113 
114     for (const auto& procEntry : std::filesystem::directory_iterator(procPath))
115     {
116         const std::string& path = procEntry.path();
117         int pid = -1;
118         if (isNumericPath(path, pid))
119         {
120             ProcStatEntry entry;
121 
122             try
123             {
124                 entry.cmdline = getCmdLine(pid);
125                 TcommUtimeStime t = getTcommUtimeStime(pid, ticksPerSec);
126                 entry.tcomm = t.tcomm;
127                 entry.utime = t.utime;
128                 entry.stime = t.stime;
129 
130                 entries.push_back(entry);
131             }
132             catch (const std::exception& e)
133             {
134                 log<level::ERR>("Could not obtain process stats");
135             }
136         }
137     }
138 
139     std::sort(entries.begin(), entries.end());
140 
141     bool isOthers = false;
142     ProcStatEntry others;
143     others.cmdline = "(Others)";
144     others.utime = others.stime = 0;
145 
146     // Only show this many processes and aggregate all remaining ones into
147     // "others" in order to keep the size of the snapshot reasonably small.
148     // With 10 process stat entries and 10 FD count entries, the size of the
149     // snapshot reaches around 1.5KiB. This is non-trivial, and we have to set
150     // the collection interval long enough so as not to over-stress the IPMI
151     // interface and the data collection service. The value of 10 is chosen
152     // empirically, it might be subject to adjustments when the system is
153     // launched later.
154     constexpr int topN = 10;
155 
156     for (size_t i = 0; i < entries.size(); ++i)
157     {
158         if (i >= topN)
159         {
160             isOthers = true;
161         }
162 
163         const ProcStatEntry& entry = entries[i];
164 
165         if (isOthers)
166         {
167             others.utime += entry.utime;
168             others.stime += entry.stime;
169         }
170         else
171         {
172             std::string fullCmdline = entry.cmdline;
173             if (entry.tcomm.size() > 0)
174             {
175                 fullCmdline += " ";
176                 fullCmdline += entry.tcomm;
177             }
178             procs.emplace_back(
179                 bmcmetrics_metricproto_BmcProcStatMetric_BmcProcStat{
180                     .sidx_cmdline = obj.getStringID(fullCmdline),
181                     .utime = entry.utime,
182                     .stime = entry.stime,
183                 });
184         }
185     }
186 
187     if (isOthers)
188     {
189         procs.emplace_back(bmcmetrics_metricproto_BmcProcStatMetric_BmcProcStat{
190             .sidx_cmdline = obj.getStringID(others.cmdline),
191             .utime = others.utime,
192             .stime = others.stime,
193 
194         });
195     }
196 
197     use = true;
198     return bmcmetrics_metricproto_BmcProcStatMetric{
199         .stats = pbSubsEncoder<
200             bmcmetrics_metricproto_BmcProcStatMetric_BmcProcStat_fields>(procs),
201     };
202 }
203 
getFdCount(int pid)204 int getFdCount(int pid)
205 {
206     const std::string& fdPath = "/proc/" + std::to_string(pid) + "/fd";
207     return std::distance(std::filesystem::directory_iterator(fdPath),
208                          std::filesystem::directory_iterator{});
209 }
210 
211 struct FdStatEntry
212 {
213     int fdCount;
214     std::string cmdline;
215     std::string tcomm;
216 
217     // Processes with the largest fdCount goes first.
218     // Tie-breaking using cmdline then tcomm.
operator <metric_blob::FdStatEntry219     bool operator<(const FdStatEntry& other) const
220     {
221         const int negFdCount = -fdCount;
222         const int negOtherFdCount = -other.fdCount;
223         return std::tie(negFdCount, cmdline, tcomm) <
224                std::tie(negOtherFdCount, other.cmdline, other.tcomm);
225     }
226 };
227 
getFdStatMetric(BmcHealthSnapshot & obj,long ticksPerSec,std::vector<bmcmetrics_metricproto_BmcFdStatMetric_BmcFdStat> & fds,bool & use)228 static bmcmetrics_metricproto_BmcFdStatMetric getFdStatMetric(
229     BmcHealthSnapshot& obj, long ticksPerSec,
230     std::vector<bmcmetrics_metricproto_BmcFdStatMetric_BmcFdStat>& fds,
231     bool& use) noexcept
232 {
233     if (ticksPerSec == 0)
234     {
235         return {};
236     }
237 
238     // Sort by fd count, no tie-breaking
239     std::vector<FdStatEntry> entries;
240 
241     const std::string_view procPath = "/proc/";
242     for (const auto& procEntry : std::filesystem::directory_iterator(procPath))
243     {
244         const std::string& path = procEntry.path();
245         int pid = 0;
246         FdStatEntry entry;
247         if (isNumericPath(path, pid))
248         {
249             try
250             {
251                 entry.fdCount = getFdCount(pid);
252                 TcommUtimeStime t = getTcommUtimeStime(pid, ticksPerSec);
253                 entry.cmdline = getCmdLine(pid);
254                 entry.tcomm = t.tcomm;
255                 entries.push_back(entry);
256             }
257             catch (const std::exception& e)
258             {
259                 log<level::ERR>("Could not get file descriptor stats");
260             }
261         }
262     }
263 
264     std::sort(entries.begin(), entries.end());
265 
266     bool isOthers = false;
267 
268     // Only report the detailed fd count and cmdline for the top 10 entries,
269     // and collapse all others into "others".
270     constexpr int topN = 10;
271 
272     FdStatEntry others;
273     others.cmdline = "(Others)";
274     others.fdCount = 0;
275 
276     for (size_t i = 0; i < entries.size(); ++i)
277     {
278         if (i >= topN)
279         {
280             isOthers = true;
281         }
282 
283         const FdStatEntry& entry = entries[i];
284         if (isOthers)
285         {
286             others.fdCount += entry.fdCount;
287         }
288         else
289         {
290             std::string fullCmdline = entry.cmdline;
291             if (entry.tcomm.size() > 0)
292             {
293                 fullCmdline += " ";
294                 fullCmdline += entry.tcomm;
295             }
296             fds.emplace_back(bmcmetrics_metricproto_BmcFdStatMetric_BmcFdStat{
297                 .sidx_cmdline = obj.getStringID(fullCmdline),
298                 .fd_count = entry.fdCount,
299             });
300         }
301     }
302 
303     if (isOthers)
304     {
305         fds.emplace_back(bmcmetrics_metricproto_BmcFdStatMetric_BmcFdStat{
306             .sidx_cmdline = obj.getStringID(others.cmdline),
307             .fd_count = others.fdCount,
308         });
309     }
310 
311     use = true;
312     return bmcmetrics_metricproto_BmcFdStatMetric{
313         .stats = pbSubsEncoder<
314             bmcmetrics_metricproto_BmcFdStatMetric_BmcFdStat_fields>(fds),
315     };
316 }
317 
getECCMetric(bool & use)318 static bmcmetrics_metricproto_BmcECCMetric getECCMetric(bool& use) noexcept
319 {
320     EccCounts eccCounts;
321     use = getECCErrorCounts(eccCounts);
322     if (!use)
323     {
324         return {};
325     }
326     return bmcmetrics_metricproto_BmcECCMetric{
327         .correctable_error_count = eccCounts.correctableErrCount,
328         .uncorrectable_error_count = eccCounts.uncorrectableErrCount,
329     };
330 }
331 
getMemMetric()332 static bmcmetrics_metricproto_BmcMemoryMetric getMemMetric() noexcept
333 {
334     bmcmetrics_metricproto_BmcMemoryMetric ret = {};
335     auto data = readFileThenGrepIntoString("/proc/meminfo");
336     int value;
337     if (parseMeminfoValue(data, "MemAvailable:", value))
338     {
339         ret.mem_available = value;
340     }
341     if (parseMeminfoValue(data, "Slab:", value))
342     {
343         ret.slab = value;
344     }
345 
346     if (parseMeminfoValue(data, "KernelStack:", value))
347     {
348         ret.kernel_stack = value;
349     }
350     return ret;
351 }
352 
353 static bmcmetrics_metricproto_BmcUptimeMetric
getUptimeMetric(bool & use)354     getUptimeMetric(bool& use) noexcept
355 {
356     bmcmetrics_metricproto_BmcUptimeMetric ret = {};
357 
358     double uptime = 0;
359     {
360         auto data = readFileThenGrepIntoString("/proc/uptime");
361         double idleProcessTime = 0;
362         if (!parseProcUptime(data, uptime, idleProcessTime))
363         {
364             log<level::ERR>("Error parsing /proc/uptime");
365             return ret;
366         }
367         ret.uptime = uptime;
368         ret.idle_process_time = idleProcessTime;
369     }
370 
371     BootTimesMonotonic btm;
372     if (!getBootTimesMonotonic(btm))
373     {
374         log<level::ERR>("Could not get boot time");
375         return ret;
376     }
377     if (btm.firmwareTime == 0 && btm.powerOnSecCounterTime != 0)
378     {
379         ret.firmware_boot_time_sec =
380             static_cast<double>(btm.powerOnSecCounterTime) - uptime;
381     }
382     else
383     {
384         ret.firmware_boot_time_sec =
385             static_cast<double>(btm.firmwareTime - btm.loaderTime) / 1e6;
386     }
387     ret.loader_boot_time_sec = static_cast<double>(btm.loaderTime) / 1e6;
388     if (btm.initrdTime != 0)
389     {
390         ret.kernel_boot_time_sec = static_cast<double>(btm.initrdTime) / 1e6;
391         ret.initrd_boot_time_sec =
392             static_cast<double>(btm.userspaceTime - btm.initrdTime) / 1e6;
393         ret.userspace_boot_time_sec =
394             static_cast<double>(btm.finishTime - btm.userspaceTime) / 1e6;
395     }
396     else
397     {
398         ret.kernel_boot_time_sec = static_cast<double>(btm.userspaceTime) / 1e6;
399         ret.initrd_boot_time_sec = 0;
400         ret.userspace_boot_time_sec =
401             static_cast<double>(btm.finishTime - btm.userspaceTime) / 1e6;
402     }
403 
404     use = true;
405     return ret;
406 }
407 
408 static bmcmetrics_metricproto_BmcDiskSpaceMetric
getStorageMetric(bool & use)409     getStorageMetric(bool& use) noexcept
410 {
411     bmcmetrics_metricproto_BmcDiskSpaceMetric ret = {};
412     struct statvfs rwFiData, tmpFiData;
413     if (statvfs("/", &rwFiData) < 0)
414     {
415         log<level::ERR>("Could not call statvfs");
416     }
417     else
418     {
419         ret.rwfs_kib_available = (rwFiData.f_bsize * rwFiData.f_bfree) / 1024;
420         use = true;
421     }
422     if (statvfs("/tmp", &tmpFiData) < 0)
423     {
424         log<level::ERR>("Could not call statvfs");
425     }
426     else
427     {
428         ret.tmpfs_kib_available =
429             (tmpFiData.f_bsize * tmpFiData.f_bfree) / 1024;
430         use = true;
431     }
432     return ret;
433 }
434 
doWork()435 void BmcHealthSnapshot::doWork()
436 {
437     // The next metrics require a sane ticks_per_sec value, typically 100 on
438     // the BMC. In the very rare circumstance when it's 0, exit early and return
439     // a partially complete snapshot (no process).
440     ticksPerSec = getTicksPerSec();
441 
442     static constexpr auto stcb = [](pb_ostream_t* stream,
443                                     const pb_field_t* field,
444                                     void* const* arg) noexcept {
445         auto& self = *reinterpret_cast<BmcHealthSnapshot*>(*arg);
446         std::vector<std::string_view> strs(self.stringTable.size());
447         for (const auto& [str, i] : self.stringTable)
448         {
449             strs[i] = str;
450         }
451         for (auto& str : strs)
452         {
453             bmcmetrics_metricproto_BmcStringTable_StringEntry msg = {
454                 .value = pbStrEncoder(str),
455             };
456             if (!pb_encode_tag_for_field(stream, field) ||
457                 !pb_encode_submessage(
458                     stream,
459                     bmcmetrics_metricproto_BmcStringTable_StringEntry_fields,
460                     &msg))
461             {
462                 return false;
463             }
464         }
465         return true;
466     };
467     std::vector<bmcmetrics_metricproto_BmcProcStatMetric_BmcProcStat> procs;
468     std::vector<bmcmetrics_metricproto_BmcFdStatMetric_BmcFdStat> fds;
469     bmcmetrics_metricproto_BmcMetricSnapshot snapshot = {
470         .has_string_table = true,
471         .string_table =
472             {
473                 .entries = {{.encode = stcb}, this},
474             },
475         .has_memory_metric = true,
476         .memory_metric = getMemMetric(),
477         .has_uptime_metric = false,
478         .uptime_metric = getUptimeMetric(snapshot.has_uptime_metric),
479         .has_storage_space_metric = false,
480         .storage_space_metric =
481             getStorageMetric(snapshot.has_storage_space_metric),
482         .has_procstat_metric = false,
483         .procstat_metric = getProcStatMetric(*this, ticksPerSec, procs,
484                                              snapshot.has_procstat_metric),
485         .has_fdstat_metric = false,
486         .fdstat_metric = getFdStatMetric(*this, ticksPerSec, fds,
487                                          snapshot.has_fdstat_metric),
488         .has_ecc_metric = false,
489         .ecc_metric = getECCMetric(snapshot.has_ecc_metric),
490     };
491     pb_ostream_t nost = {};
492     if (!pb_encode(&nost, bmcmetrics_metricproto_BmcMetricSnapshot_fields,
493                    &snapshot))
494     {
495         auto msg = std::format("Getting pb size: {}", PB_GET_ERROR(&nost));
496         log<level::ERR>(msg.c_str());
497         return;
498     }
499     pbDump.resize(nost.bytes_written);
500     auto ost = pb_ostream_from_buffer(
501         reinterpret_cast<pb_byte_t*>(pbDump.data()), pbDump.size());
502     if (!pb_encode(&ost, bmcmetrics_metricproto_BmcMetricSnapshot_fields,
503                    &snapshot))
504     {
505         auto msg = std::format("Writing pb msg: {}", PB_GET_ERROR(&ost));
506         log<level::ERR>(msg.c_str());
507         return;
508     }
509     done = true;
510 }
511 
512 // BmcBlobSessionStat (9) but passing meta as reference instead of pointer,
513 // since the metadata must not be null at this point.
stat(blobs::BlobMeta & meta)514 bool BmcHealthSnapshot::stat(blobs::BlobMeta& meta)
515 {
516     if (!done)
517     {
518         // Bits 8~15 are blob-specific state flags.
519         // For this blob, bit 8 is set when metric collection is still in
520         // progress.
521         meta.blobState |= (1 << 8);
522     }
523     else
524     {
525         meta.blobState = 0;
526         meta.blobState = blobs::StateFlags::open_read;
527         meta.size = pbDump.size();
528     }
529     return true;
530 }
531 
read(uint32_t offset,uint32_t requestedSize)532 std::string_view BmcHealthSnapshot::read(uint32_t offset,
533                                          uint32_t requestedSize)
534 {
535     uint32_t size = static_cast<uint32_t>(pbDump.size());
536     if (offset >= size)
537     {
538         return {};
539     }
540     return std::string_view(pbDump.data() + offset,
541                             std::min(requestedSize, size - offset));
542 }
543 
getStringID(const std::string_view s)544 int BmcHealthSnapshot::getStringID(const std::string_view s)
545 {
546     int ret = 0;
547     auto itr = stringTable.find(s.data());
548     if (itr == stringTable.end())
549     {
550         stringTable[s.data()] = stringId;
551         ret = stringId;
552         ++stringId;
553     }
554     else
555     {
556         ret = itr->second;
557     }
558     return ret;
559 }
560 
561 } // namespace metric_blob
562