xref: /openbmc/bmcweb/include/multipart_parser.hpp (revision d78572018fc2022091ff8b8eb5a7fef2172ba3d6)
1 // SPDX-License-Identifier: Apache-2.0
2 // SPDX-FileCopyrightText: Copyright OpenBMC Authors
3 #pragma once
4 
5 #include "http_request.hpp"
6 
7 #include <boost/beast/http/fields.hpp>
8 
9 #include <algorithm>
10 #include <array>
11 #include <cstddef>
12 #include <ranges>
13 #include <string>
14 #include <string_view>
15 #include <vector>
16 
17 enum class ParserError
18 {
19     PARSER_SUCCESS,
20     ERROR_BOUNDARY_FORMAT,
21     ERROR_BOUNDARY_CR,
22     ERROR_BOUNDARY_LF,
23     ERROR_BOUNDARY_DATA,
24     ERROR_EMPTY_HEADER,
25     ERROR_HEADER_NAME,
26     ERROR_HEADER_VALUE,
27     ERROR_HEADER_ENDING,
28     ERROR_UNEXPECTED_END_OF_HEADER,
29     ERROR_UNEXPECTED_END_OF_INPUT,
30     ERROR_OUT_OF_RANGE
31 };
32 
33 enum class State
34 {
35     START,
36     START_BOUNDARY,
37     HEADER_FIELD_START,
38     HEADER_FIELD,
39     HEADER_VALUE_START,
40     HEADER_VALUE,
41     HEADER_VALUE_ALMOST_DONE,
42     HEADERS_ALMOST_DONE,
43     PART_DATA_START,
44     PART_DATA,
45     END
46 };
47 
48 enum class Boundary
49 {
50     NON_BOUNDARY,
51     PART_BOUNDARY,
52     END_BOUNDARY,
53 };
54 
55 struct FormPart
56 {
57     boost::beast::http::fields fields;
58     std::string content;
59 };
60 
61 class MultipartParser
62 {
63   public:
64     MultipartParser() = default;
65 
parse(const crow::Request & req)66     [[nodiscard]] ParserError parse(const crow::Request& req)
67     {
68         std::string_view contentType = req.getHeaderValue("content-type");
69 
70         const std::string boundaryFormat = "multipart/form-data; boundary=";
71         if (!contentType.starts_with(boundaryFormat))
72         {
73             return ParserError::ERROR_BOUNDARY_FORMAT;
74         }
75 
76         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
77 
78         boundary = "\r\n--";
79         boundary += ctBoundary;
80         indexBoundary();
81         lookbehind.resize(boundary.size() + 8);
82         state = State::START;
83 
84         const std::string& buffer = req.body();
85         size_t len = buffer.size();
86         char cl = 0;
87 
88         for (size_t i = 0; i < len; i++)
89         {
90             char c = buffer[i];
91             switch (state)
92             {
93                 case State::START:
94                     index = 0;
95                     state = State::START_BOUNDARY;
96                     [[fallthrough]];
97                 case State::START_BOUNDARY:
98                     if (index == boundary.size() - 2)
99                     {
100                         if (c != cr)
101                         {
102                             return ParserError::ERROR_BOUNDARY_CR;
103                         }
104                         index++;
105                         break;
106                     }
107                     else if (index - 1 == boundary.size() - 2)
108                     {
109                         if (c != lf)
110                         {
111                             return ParserError::ERROR_BOUNDARY_LF;
112                         }
113                         index = 0;
114                         mime_fields.emplace_back();
115                         state = State::HEADER_FIELD_START;
116                         break;
117                     }
118                     if (c != boundary[index + 2])
119                     {
120                         return ParserError::ERROR_BOUNDARY_DATA;
121                     }
122                     index++;
123                     break;
124                 case State::HEADER_FIELD_START:
125                     currentHeaderName.resize(0);
126                     state = State::HEADER_FIELD;
127                     headerFieldMark = i;
128                     index = 0;
129                     [[fallthrough]];
130                 case State::HEADER_FIELD:
131                     if (c == cr)
132                     {
133                         headerFieldMark = 0;
134                         state = State::HEADERS_ALMOST_DONE;
135                         break;
136                     }
137 
138                     index++;
139                     if (c == hyphen)
140                     {
141                         break;
142                     }
143 
144                     if (c == colon)
145                     {
146                         if (index == 1)
147                         {
148                             return ParserError::ERROR_EMPTY_HEADER;
149                         }
150 
151                         currentHeaderName.append(&buffer[headerFieldMark],
152                                                  i - headerFieldMark);
153                         state = State::HEADER_VALUE_START;
154                         break;
155                     }
156                     cl = lower(c);
157                     if (cl < 'a' || cl > 'z')
158                     {
159                         return ParserError::ERROR_HEADER_NAME;
160                     }
161                     break;
162                 case State::HEADER_VALUE_START:
163                     if (c == space)
164                     {
165                         break;
166                     }
167                     headerValueMark = i;
168                     state = State::HEADER_VALUE;
169                     [[fallthrough]];
170                 case State::HEADER_VALUE:
171                     if (c == cr)
172                     {
173                         std::string_view value(&buffer[headerValueMark],
174                                                i - headerValueMark);
175                         mime_fields.rbegin()->fields.set(currentHeaderName,
176                                                          value);
177                         state = State::HEADER_VALUE_ALMOST_DONE;
178                     }
179                     break;
180                 case State::HEADER_VALUE_ALMOST_DONE:
181                     if (c != lf)
182                     {
183                         return ParserError::ERROR_HEADER_VALUE;
184                     }
185                     state = State::HEADER_FIELD_START;
186                     break;
187                 case State::HEADERS_ALMOST_DONE:
188                     if (c != lf)
189                     {
190                         return ParserError::ERROR_HEADER_ENDING;
191                     }
192                     if (index > 0)
193                     {
194                         return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
195                     }
196                     state = State::PART_DATA_START;
197                     break;
198                 case State::PART_DATA_START:
199                     state = State::PART_DATA;
200                     partDataMark = i;
201                     [[fallthrough]];
202                 case State::PART_DATA:
203                 {
204                     if (index == 0)
205                     {
206                         skipNonBoundary(buffer, boundary.size() - 1, i);
207                         c = buffer[i];
208                     }
209                     if (auto ec = processPartData(buffer, i, c);
210                         ec != ParserError::PARSER_SUCCESS)
211                     {
212                         return ec;
213                     }
214                     break;
215                 }
216                 case State::END:
217                     break;
218                 default:
219                     return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
220             }
221         }
222 
223         if (state != State::END)
224         {
225             return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
226         }
227 
228         return ParserError::PARSER_SUCCESS;
229     }
230     std::vector<FormPart> mime_fields;
231     std::string boundary;
232 
233   private:
indexBoundary()234     void indexBoundary()
235     {
236         std::ranges::fill(boundaryIndex, 0);
237         for (const char current : boundary)
238         {
239             boundaryIndex[static_cast<unsigned char>(current)] = true;
240         }
241     }
242 
lower(char c)243     static char lower(char c)
244     {
245         return static_cast<char>(c | 0x20);
246     }
247 
isBoundaryChar(char c) const248     bool isBoundaryChar(char c) const
249     {
250         return boundaryIndex[static_cast<unsigned char>(c)];
251     }
252 
skipNonBoundary(const std::string & buffer,size_t boundaryEnd,size_t & i)253     void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
254                          size_t& i)
255     {
256         // boyer-moore derived algorithm to safely skip non-boundary data
257         while (i + boundary.size() <= buffer.length())
258         {
259             if (isBoundaryChar(buffer[i + boundaryEnd]))
260             {
261                 break;
262             }
263             i += boundary.size();
264         }
265     }
266 
processPartData(const std::string & buffer,size_t & i,char c)267     ParserError processPartData(const std::string& buffer, size_t& i, char c)
268     {
269         size_t prevIndex = index;
270 
271         if (index < boundary.size())
272         {
273             if (boundary[index] == c)
274             {
275                 if (index == 0)
276                 {
277                     const char* start = &buffer[partDataMark];
278                     size_t size = i - partDataMark;
279                     mime_fields.rbegin()->content +=
280                         std::string_view(start, size);
281                 }
282                 index++;
283             }
284             else
285             {
286                 index = 0;
287             }
288         }
289         else if (index == boundary.size())
290         {
291             index++;
292             if (c == cr)
293             {
294                 // cr = part boundary
295                 flags = Boundary::PART_BOUNDARY;
296             }
297             else if (c == hyphen)
298             {
299                 // hyphen = end boundary
300                 flags = Boundary::END_BOUNDARY;
301             }
302             else
303             {
304                 index = 0;
305             }
306         }
307         else
308         {
309             if (flags == Boundary::PART_BOUNDARY)
310             {
311                 index = 0;
312                 if (c == lf)
313                 {
314                     // unset the PART_BOUNDARY flag
315                     flags = Boundary::NON_BOUNDARY;
316                     mime_fields.emplace_back();
317                     state = State::HEADER_FIELD_START;
318                     return ParserError::PARSER_SUCCESS;
319                 }
320             }
321             if (flags == Boundary::END_BOUNDARY)
322             {
323                 if (c == hyphen)
324                 {
325                     state = State::END;
326                 }
327                 else
328                 {
329                     flags = Boundary::NON_BOUNDARY;
330                     index = 0;
331                 }
332             }
333         }
334 
335         if (index > 0)
336         {
337             if ((index - 1) >= lookbehind.size())
338             {
339                 // Should never happen, but when it does it won't cause crash
340                 return ParserError::ERROR_OUT_OF_RANGE;
341             }
342             lookbehind[index - 1] = c;
343         }
344         else if (prevIndex > 0)
345         {
346             // if our boundary turned out to be rubbish, the captured
347             // lookbehind belongs to partData
348 
349             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
350             partDataMark = i;
351 
352             // reconsider the current character even so it interrupted
353             // the sequence it could be the beginning of a new sequence
354             i--;
355         }
356         return ParserError::PARSER_SUCCESS;
357     }
358 
359     std::string currentHeaderName;
360     std::string currentHeaderValue;
361 
362     static constexpr char cr = '\r';
363     static constexpr char lf = '\n';
364     static constexpr char space = ' ';
365     static constexpr char hyphen = '-';
366     static constexpr char colon = ':';
367 
368     std::array<bool, 256> boundaryIndex{};
369     std::string lookbehind;
370     State state{State::START};
371     Boundary flags{Boundary::NON_BOUNDARY};
372     size_t index = 0;
373     size_t partDataMark = 0;
374     size_t headerFieldMark = 0;
375     size_t headerValueMark = 0;
376 };
377