xref: /openbmc/bmcweb/include/multipart_parser.hpp (revision 64fe8020cfd6f2b6fb25249cc42daa9550c595b9)
1 // SPDX-License-Identifier: Apache-2.0
2 // SPDX-FileCopyrightText: Copyright OpenBMC Authors
3 #pragma once
4 
5 #include "http_request.hpp"
6 
7 #include <boost/beast/http/fields.hpp>
8 
9 #include <algorithm>
10 #include <array>
11 #include <cstddef>
12 #include <ranges>
13 #include <string>
14 #include <string_view>
15 #include <vector>
16 
17 enum class ParserError
18 {
19     PARSER_SUCCESS,
20     ERROR_BOUNDARY_FORMAT,
21     ERROR_BOUNDARY_CR,
22     ERROR_BOUNDARY_LF,
23     ERROR_BOUNDARY_DATA,
24     ERROR_EMPTY_HEADER,
25     ERROR_HEADER_NAME,
26     ERROR_HEADER_VALUE,
27     ERROR_HEADER_ENDING,
28     ERROR_UNEXPECTED_END_OF_HEADER,
29     ERROR_UNEXPECTED_END_OF_INPUT,
30     ERROR_DATA_AFTER_FINAL_BOUNDARY,
31     ERROR_OUT_OF_RANGE
32 };
33 
34 enum class State
35 {
36     START,
37     START_BOUNDARY,
38     HEADER_FIELD_START,
39     HEADER_FIELD,
40     HEADER_VALUE_START,
41     HEADER_VALUE,
42     HEADER_VALUE_ALMOST_DONE,
43     HEADERS_ALMOST_DONE,
44     PART_DATA_START,
45     PART_DATA,
46     END
47 };
48 
49 enum class Boundary
50 {
51     NON_BOUNDARY,
52     PART_BOUNDARY,
53     END_BOUNDARY,
54 };
55 
56 struct FormPart
57 {
58     boost::beast::http::fields fields;
59     std::string content;
60 };
61 
62 class MultipartParser
63 {
64   public:
65     MultipartParser() = default;
66 
parse(const crow::Request & req)67     [[nodiscard]] ParserError parse(const crow::Request& req)
68     {
69         std::string_view contentType = req.getHeaderValue("content-type");
70 
71         const std::string boundaryFormat = "multipart/form-data; boundary=";
72         if (!contentType.starts_with(boundaryFormat))
73         {
74             return ParserError::ERROR_BOUNDARY_FORMAT;
75         }
76 
77         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
78 
79         boundary = "\r\n--";
80         boundary += ctBoundary;
81         indexBoundary();
82         lookbehind.resize(boundary.size() + 8);
83         state = State::START;
84 
85         const std::string& buffer = req.body();
86         size_t len = buffer.size();
87         char cl = 0;
88 
89         for (size_t i = 0; i < len; i++)
90         {
91             char c = buffer[i];
92             switch (state)
93             {
94                 case State::START:
95                     index = 0;
96                     state = State::START_BOUNDARY;
97                     [[fallthrough]];
98                 case State::START_BOUNDARY:
99                     if (index == boundary.size() - 2)
100                     {
101                         if (c != cr)
102                         {
103                             return ParserError::ERROR_BOUNDARY_CR;
104                         }
105                         index++;
106                         break;
107                     }
108                     else if (index - 1 == boundary.size() - 2)
109                     {
110                         if (c != lf)
111                         {
112                             return ParserError::ERROR_BOUNDARY_LF;
113                         }
114                         index = 0;
115                         mime_fields.emplace_back();
116                         state = State::HEADER_FIELD_START;
117                         break;
118                     }
119                     if (c != boundary[index + 2])
120                     {
121                         return ParserError::ERROR_BOUNDARY_DATA;
122                     }
123                     index++;
124                     break;
125                 case State::HEADER_FIELD_START:
126                     currentHeaderName.resize(0);
127                     state = State::HEADER_FIELD;
128                     headerFieldMark = i;
129                     index = 0;
130                     [[fallthrough]];
131                 case State::HEADER_FIELD:
132                     if (c == cr)
133                     {
134                         headerFieldMark = 0;
135                         state = State::HEADERS_ALMOST_DONE;
136                         break;
137                     }
138 
139                     index++;
140                     if (c == hyphen)
141                     {
142                         break;
143                     }
144 
145                     if (c == colon)
146                     {
147                         if (index == 1)
148                         {
149                             return ParserError::ERROR_EMPTY_HEADER;
150                         }
151 
152                         currentHeaderName.append(&buffer[headerFieldMark],
153                                                  i - headerFieldMark);
154                         state = State::HEADER_VALUE_START;
155                         break;
156                     }
157                     cl = lower(c);
158                     if (cl < 'a' || cl > 'z')
159                     {
160                         return ParserError::ERROR_HEADER_NAME;
161                     }
162                     break;
163                 case State::HEADER_VALUE_START:
164                     if (c == space)
165                     {
166                         break;
167                     }
168                     headerValueMark = i;
169                     state = State::HEADER_VALUE;
170                     [[fallthrough]];
171                 case State::HEADER_VALUE:
172                     if (c == cr)
173                     {
174                         std::string_view value(&buffer[headerValueMark],
175                                                i - headerValueMark);
176                         mime_fields.rbegin()->fields.set(currentHeaderName,
177                                                          value);
178                         state = State::HEADER_VALUE_ALMOST_DONE;
179                     }
180                     break;
181                 case State::HEADER_VALUE_ALMOST_DONE:
182                     if (c != lf)
183                     {
184                         return ParserError::ERROR_HEADER_VALUE;
185                     }
186                     state = State::HEADER_FIELD_START;
187                     break;
188                 case State::HEADERS_ALMOST_DONE:
189                     if (c != lf)
190                     {
191                         return ParserError::ERROR_HEADER_ENDING;
192                     }
193                     if (index > 0)
194                     {
195                         return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
196                     }
197                     state = State::PART_DATA_START;
198                     break;
199                 case State::PART_DATA_START:
200                     state = State::PART_DATA;
201                     partDataMark = i;
202                     [[fallthrough]];
203                 case State::PART_DATA:
204                 {
205                     if (index == 0)
206                     {
207                         skipNonBoundary(buffer, boundary.size() - 1, i);
208                         c = buffer[i];
209                     }
210                     if (auto ec = processPartData(buffer, i, c);
211                         ec != ParserError::PARSER_SUCCESS)
212                     {
213                         return ec;
214                     }
215                     break;
216                 }
217                 case State::END:
218                     switch (index)
219                     {
220                         case 0:
221                             if (c != cr)
222                             {
223                                 return ParserError::
224                                     ERROR_DATA_AFTER_FINAL_BOUNDARY;
225                             }
226                             index++;
227                             break;
228                         case 1:
229                             if (c != lf)
230                             {
231                                 return ParserError::
232                                     ERROR_DATA_AFTER_FINAL_BOUNDARY;
233                             }
234                             index++;
235                             break;
236                         default:
237                             return ParserError::ERROR_DATA_AFTER_FINAL_BOUNDARY;
238                     }
239                     break;
240                 default:
241                     return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
242             }
243         }
244 
245         if (state != State::END)
246         {
247             return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
248         }
249 
250         return ParserError::PARSER_SUCCESS;
251     }
252     std::vector<FormPart> mime_fields;
253     std::string boundary;
254 
255   private:
indexBoundary()256     void indexBoundary()
257     {
258         std::ranges::fill(boundaryIndex, 0);
259         for (const char current : boundary)
260         {
261             boundaryIndex[static_cast<unsigned char>(current)] = true;
262         }
263     }
264 
lower(char c)265     static char lower(char c)
266     {
267         return static_cast<char>(c | 0x20);
268     }
269 
isBoundaryChar(char c) const270     bool isBoundaryChar(char c) const
271     {
272         return boundaryIndex[static_cast<unsigned char>(c)];
273     }
274 
skipNonBoundary(const std::string & buffer,size_t boundaryEnd,size_t & i)275     void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
276                          size_t& i)
277     {
278         // boyer-moore derived algorithm to safely skip non-boundary data
279         while (i + boundary.size() <= buffer.length())
280         {
281             if (isBoundaryChar(buffer[i + boundaryEnd]))
282             {
283                 break;
284             }
285             i += boundary.size();
286         }
287     }
288 
processPartData(const std::string & buffer,size_t & i,char c)289     ParserError processPartData(const std::string& buffer, size_t& i, char c)
290     {
291         size_t prevIndex = index;
292 
293         if (index < boundary.size())
294         {
295             if (boundary[index] == c)
296             {
297                 if (index == 0)
298                 {
299                     const char* start = &buffer[partDataMark];
300                     size_t size = i - partDataMark;
301                     mime_fields.rbegin()->content +=
302                         std::string_view(start, size);
303                 }
304                 index++;
305             }
306             else
307             {
308                 index = 0;
309             }
310         }
311         else if (index == boundary.size())
312         {
313             index++;
314             if (c == cr)
315             {
316                 // cr = part boundary
317                 flags = Boundary::PART_BOUNDARY;
318             }
319             else if (c == hyphen)
320             {
321                 // hyphen = end boundary
322                 flags = Boundary::END_BOUNDARY;
323             }
324             else
325             {
326                 index = 0;
327             }
328         }
329         else
330         {
331             if (flags == Boundary::PART_BOUNDARY)
332             {
333                 index = 0;
334                 if (c == lf)
335                 {
336                     // unset the PART_BOUNDARY flag
337                     flags = Boundary::NON_BOUNDARY;
338                     mime_fields.emplace_back();
339                     state = State::HEADER_FIELD_START;
340                     return ParserError::PARSER_SUCCESS;
341                 }
342             }
343             if (flags == Boundary::END_BOUNDARY)
344             {
345                 if (c == hyphen)
346                 {
347                     state = State::END;
348                 }
349                 else
350                 {
351                     flags = Boundary::NON_BOUNDARY;
352                     index = 0;
353                 }
354             }
355         }
356 
357         if (index > 0)
358         {
359             if ((index - 1) >= lookbehind.size())
360             {
361                 // Should never happen, but when it does it won't cause crash
362                 return ParserError::ERROR_OUT_OF_RANGE;
363             }
364             lookbehind[index - 1] = c;
365         }
366         else if (prevIndex > 0)
367         {
368             // if our boundary turned out to be rubbish, the captured
369             // lookbehind belongs to partData
370 
371             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
372             partDataMark = i;
373 
374             // reconsider the current character even so it interrupted
375             // the sequence it could be the beginning of a new sequence
376             i--;
377         }
378         if (state == State::END)
379         {
380             index = 0;
381         }
382         return ParserError::PARSER_SUCCESS;
383     }
384 
385     std::string currentHeaderName;
386     std::string currentHeaderValue;
387 
388     static constexpr char cr = '\r';
389     static constexpr char lf = '\n';
390     static constexpr char space = ' ';
391     static constexpr char hyphen = '-';
392     static constexpr char colon = ':';
393 
394     std::array<bool, 256> boundaryIndex{};
395     std::string lookbehind;
396     State state{State::START};
397     Boundary flags{Boundary::NON_BOUNDARY};
398     size_t index = 0;
399     size_t partDataMark = 0;
400     size_t headerFieldMark = 0;
401     size_t headerValueMark = 0;
402 };
403