xref: /openbmc/bmcweb/include/multipart_parser.hpp (revision 40e9b92ec19acffb46f83a6e55b18974da5d708e)
1 // SPDX-License-Identifier: Apache-2.0
2 // SPDX-FileCopyrightText: Copyright OpenBMC Authors
3 #pragma once
4 
5 #include "http_request.hpp"
6 
7 #include <boost/beast/http/fields.hpp>
8 
9 #include <ranges>
10 #include <string>
11 #include <string_view>
12 
13 enum class ParserError
14 {
15     PARSER_SUCCESS,
16     ERROR_BOUNDARY_FORMAT,
17     ERROR_BOUNDARY_CR,
18     ERROR_BOUNDARY_LF,
19     ERROR_BOUNDARY_DATA,
20     ERROR_EMPTY_HEADER,
21     ERROR_HEADER_NAME,
22     ERROR_HEADER_VALUE,
23     ERROR_HEADER_ENDING,
24     ERROR_UNEXPECTED_END_OF_HEADER,
25     ERROR_UNEXPECTED_END_OF_INPUT,
26     ERROR_OUT_OF_RANGE
27 };
28 
29 enum class State
30 {
31     START,
32     START_BOUNDARY,
33     HEADER_FIELD_START,
34     HEADER_FIELD,
35     HEADER_VALUE_START,
36     HEADER_VALUE,
37     HEADER_VALUE_ALMOST_DONE,
38     HEADERS_ALMOST_DONE,
39     PART_DATA_START,
40     PART_DATA,
41     END
42 };
43 
44 enum class Boundary
45 {
46     NON_BOUNDARY,
47     PART_BOUNDARY,
48     END_BOUNDARY,
49 };
50 
51 struct FormPart
52 {
53     boost::beast::http::fields fields;
54     std::string content;
55 };
56 
57 class MultipartParser
58 {
59   public:
60     MultipartParser() = default;
61 
parse(const crow::Request & req)62     [[nodiscard]] ParserError parse(const crow::Request& req)
63     {
64         std::string_view contentType = req.getHeaderValue("content-type");
65 
66         const std::string boundaryFormat = "multipart/form-data; boundary=";
67         if (!contentType.starts_with(boundaryFormat))
68         {
69             return ParserError::ERROR_BOUNDARY_FORMAT;
70         }
71 
72         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
73 
74         boundary = "\r\n--";
75         boundary += ctBoundary;
76         indexBoundary();
77         lookbehind.resize(boundary.size() + 8);
78         state = State::START;
79 
80         const std::string& buffer = req.body();
81         size_t len = buffer.size();
82         char cl = 0;
83 
84         for (size_t i = 0; i < len; i++)
85         {
86             char c = buffer[i];
87             switch (state)
88             {
89                 case State::START:
90                     index = 0;
91                     state = State::START_BOUNDARY;
92                     [[fallthrough]];
93                 case State::START_BOUNDARY:
94                     if (index == boundary.size() - 2)
95                     {
96                         if (c != cr)
97                         {
98                             return ParserError::ERROR_BOUNDARY_CR;
99                         }
100                         index++;
101                         break;
102                     }
103                     else if (index - 1 == boundary.size() - 2)
104                     {
105                         if (c != lf)
106                         {
107                             return ParserError::ERROR_BOUNDARY_LF;
108                         }
109                         index = 0;
110                         mime_fields.emplace_back();
111                         state = State::HEADER_FIELD_START;
112                         break;
113                     }
114                     if (c != boundary[index + 2])
115                     {
116                         return ParserError::ERROR_BOUNDARY_DATA;
117                     }
118                     index++;
119                     break;
120                 case State::HEADER_FIELD_START:
121                     currentHeaderName.resize(0);
122                     state = State::HEADER_FIELD;
123                     headerFieldMark = i;
124                     index = 0;
125                     [[fallthrough]];
126                 case State::HEADER_FIELD:
127                     if (c == cr)
128                     {
129                         headerFieldMark = 0;
130                         state = State::HEADERS_ALMOST_DONE;
131                         break;
132                     }
133 
134                     index++;
135                     if (c == hyphen)
136                     {
137                         break;
138                     }
139 
140                     if (c == colon)
141                     {
142                         if (index == 1)
143                         {
144                             return ParserError::ERROR_EMPTY_HEADER;
145                         }
146 
147                         currentHeaderName.append(&buffer[headerFieldMark],
148                                                  i - headerFieldMark);
149                         state = State::HEADER_VALUE_START;
150                         break;
151                     }
152                     cl = lower(c);
153                     if (cl < 'a' || cl > 'z')
154                     {
155                         return ParserError::ERROR_HEADER_NAME;
156                     }
157                     break;
158                 case State::HEADER_VALUE_START:
159                     if (c == space)
160                     {
161                         break;
162                     }
163                     headerValueMark = i;
164                     state = State::HEADER_VALUE;
165                     [[fallthrough]];
166                 case State::HEADER_VALUE:
167                     if (c == cr)
168                     {
169                         std::string_view value(&buffer[headerValueMark],
170                                                i - headerValueMark);
171                         mime_fields.rbegin()->fields.set(currentHeaderName,
172                                                          value);
173                         state = State::HEADER_VALUE_ALMOST_DONE;
174                     }
175                     break;
176                 case State::HEADER_VALUE_ALMOST_DONE:
177                     if (c != lf)
178                     {
179                         return ParserError::ERROR_HEADER_VALUE;
180                     }
181                     state = State::HEADER_FIELD_START;
182                     break;
183                 case State::HEADERS_ALMOST_DONE:
184                     if (c != lf)
185                     {
186                         return ParserError::ERROR_HEADER_ENDING;
187                     }
188                     if (index > 0)
189                     {
190                         return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
191                     }
192                     state = State::PART_DATA_START;
193                     break;
194                 case State::PART_DATA_START:
195                     state = State::PART_DATA;
196                     partDataMark = i;
197                     [[fallthrough]];
198                 case State::PART_DATA:
199                 {
200                     if (index == 0)
201                     {
202                         skipNonBoundary(buffer, boundary.size() - 1, i);
203                         c = buffer[i];
204                     }
205                     if (auto ec = processPartData(buffer, i, c);
206                         ec != ParserError::PARSER_SUCCESS)
207                     {
208                         return ec;
209                     }
210                     break;
211                 }
212                 case State::END:
213                     break;
214                 default:
215                     return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
216             }
217         }
218 
219         if (state != State::END)
220         {
221             return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
222         }
223 
224         return ParserError::PARSER_SUCCESS;
225     }
226     std::vector<FormPart> mime_fields;
227     std::string boundary;
228 
229   private:
indexBoundary()230     void indexBoundary()
231     {
232         std::ranges::fill(boundaryIndex, 0);
233         for (const char current : boundary)
234         {
235             boundaryIndex[static_cast<unsigned char>(current)] = true;
236         }
237     }
238 
lower(char c)239     static char lower(char c)
240     {
241         return static_cast<char>(c | 0x20);
242     }
243 
isBoundaryChar(char c) const244     bool isBoundaryChar(char c) const
245     {
246         return boundaryIndex[static_cast<unsigned char>(c)];
247     }
248 
skipNonBoundary(const std::string & buffer,size_t boundaryEnd,size_t & i)249     void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
250                          size_t& i)
251     {
252         // boyer-moore derived algorithm to safely skip non-boundary data
253         while (i + boundary.size() <= buffer.length())
254         {
255             if (isBoundaryChar(buffer[i + boundaryEnd]))
256             {
257                 break;
258             }
259             i += boundary.size();
260         }
261     }
262 
processPartData(const std::string & buffer,size_t & i,char c)263     ParserError processPartData(const std::string& buffer, size_t& i, char c)
264     {
265         size_t prevIndex = index;
266 
267         if (index < boundary.size())
268         {
269             if (boundary[index] == c)
270             {
271                 if (index == 0)
272                 {
273                     const char* start = &buffer[partDataMark];
274                     size_t size = i - partDataMark;
275                     mime_fields.rbegin()->content +=
276                         std::string_view(start, size);
277                 }
278                 index++;
279             }
280             else
281             {
282                 index = 0;
283             }
284         }
285         else if (index == boundary.size())
286         {
287             index++;
288             if (c == cr)
289             {
290                 // cr = part boundary
291                 flags = Boundary::PART_BOUNDARY;
292             }
293             else if (c == hyphen)
294             {
295                 // hyphen = end boundary
296                 flags = Boundary::END_BOUNDARY;
297             }
298             else
299             {
300                 index = 0;
301             }
302         }
303         else
304         {
305             if (flags == Boundary::PART_BOUNDARY)
306             {
307                 index = 0;
308                 if (c == lf)
309                 {
310                     // unset the PART_BOUNDARY flag
311                     flags = Boundary::NON_BOUNDARY;
312                     mime_fields.emplace_back();
313                     state = State::HEADER_FIELD_START;
314                     return ParserError::PARSER_SUCCESS;
315                 }
316             }
317             if (flags == Boundary::END_BOUNDARY)
318             {
319                 if (c == hyphen)
320                 {
321                     state = State::END;
322                 }
323                 else
324                 {
325                     flags = Boundary::NON_BOUNDARY;
326                     index = 0;
327                 }
328             }
329         }
330 
331         if (index > 0)
332         {
333             if ((index - 1) >= lookbehind.size())
334             {
335                 // Should never happen, but when it does it won't cause crash
336                 return ParserError::ERROR_OUT_OF_RANGE;
337             }
338             lookbehind[index - 1] = c;
339         }
340         else if (prevIndex > 0)
341         {
342             // if our boundary turned out to be rubbish, the captured
343             // lookbehind belongs to partData
344 
345             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
346             partDataMark = i;
347 
348             // reconsider the current character even so it interrupted
349             // the sequence it could be the beginning of a new sequence
350             i--;
351         }
352         return ParserError::PARSER_SUCCESS;
353     }
354 
355     std::string currentHeaderName;
356     std::string currentHeaderValue;
357 
358     static constexpr char cr = '\r';
359     static constexpr char lf = '\n';
360     static constexpr char space = ' ';
361     static constexpr char hyphen = '-';
362     static constexpr char colon = ':';
363 
364     std::array<bool, 256> boundaryIndex{};
365     std::string lookbehind;
366     State state{State::START};
367     Boundary flags{Boundary::NON_BOUNDARY};
368     size_t index = 0;
369     size_t partDataMark = 0;
370     size_t headerFieldMark = 0;
371     size_t headerValueMark = 0;
372 };
373