xref: /openbmc/bmcweb/include/multipart_parser.hpp (revision d78572018fc2022091ff8b8eb5a7fef2172ba3d6)
140e9b92eSEd Tanous // SPDX-License-Identifier: Apache-2.0
240e9b92eSEd Tanous // SPDX-FileCopyrightText: Copyright OpenBMC Authors
3af4edf68SEd Tanous #pragma once
4af4edf68SEd Tanous 
53ccb3adbSEd Tanous #include "http_request.hpp"
63ccb3adbSEd Tanous 
7af4edf68SEd Tanous #include <boost/beast/http/fields.hpp>
8af4edf68SEd Tanous 
9*d7857201SEd Tanous #include <algorithm>
10*d7857201SEd Tanous #include <array>
11*d7857201SEd Tanous #include <cstddef>
123544d2a7SEd Tanous #include <ranges>
13af4edf68SEd Tanous #include <string>
14af4edf68SEd Tanous #include <string_view>
15*d7857201SEd Tanous #include <vector>
16af4edf68SEd Tanous 
17af4edf68SEd Tanous enum class ParserError
18af4edf68SEd Tanous {
19af4edf68SEd Tanous     PARSER_SUCCESS,
20af4edf68SEd Tanous     ERROR_BOUNDARY_FORMAT,
21af4edf68SEd Tanous     ERROR_BOUNDARY_CR,
22af4edf68SEd Tanous     ERROR_BOUNDARY_LF,
23af4edf68SEd Tanous     ERROR_BOUNDARY_DATA,
24af4edf68SEd Tanous     ERROR_EMPTY_HEADER,
25af4edf68SEd Tanous     ERROR_HEADER_NAME,
26af4edf68SEd Tanous     ERROR_HEADER_VALUE,
2718e3f7fbSKrzysztof Grobelny     ERROR_HEADER_ENDING,
2818e3f7fbSKrzysztof Grobelny     ERROR_UNEXPECTED_END_OF_HEADER,
2918e3f7fbSKrzysztof Grobelny     ERROR_UNEXPECTED_END_OF_INPUT,
3018e3f7fbSKrzysztof Grobelny     ERROR_OUT_OF_RANGE
31af4edf68SEd Tanous };
32af4edf68SEd Tanous 
33af4edf68SEd Tanous enum class State
34af4edf68SEd Tanous {
35af4edf68SEd Tanous     START,
36af4edf68SEd Tanous     START_BOUNDARY,
37af4edf68SEd Tanous     HEADER_FIELD_START,
38af4edf68SEd Tanous     HEADER_FIELD,
39af4edf68SEd Tanous     HEADER_VALUE_START,
40af4edf68SEd Tanous     HEADER_VALUE,
41af4edf68SEd Tanous     HEADER_VALUE_ALMOST_DONE,
42af4edf68SEd Tanous     HEADERS_ALMOST_DONE,
43af4edf68SEd Tanous     PART_DATA_START,
44af4edf68SEd Tanous     PART_DATA,
45af4edf68SEd Tanous     END
46af4edf68SEd Tanous };
47af4edf68SEd Tanous 
48af4edf68SEd Tanous enum class Boundary
49af4edf68SEd Tanous {
50af4edf68SEd Tanous     NON_BOUNDARY,
51af4edf68SEd Tanous     PART_BOUNDARY,
52af4edf68SEd Tanous     END_BOUNDARY,
53af4edf68SEd Tanous };
54af4edf68SEd Tanous 
55af4edf68SEd Tanous struct FormPart
56af4edf68SEd Tanous {
57af4edf68SEd Tanous     boost::beast::http::fields fields;
58af4edf68SEd Tanous     std::string content;
59af4edf68SEd Tanous };
60af4edf68SEd Tanous 
61af4edf68SEd Tanous class MultipartParser
62af4edf68SEd Tanous {
63af4edf68SEd Tanous   public:
64af4edf68SEd Tanous     MultipartParser() = default;
65af4edf68SEd Tanous 
parse(const crow::Request & req)66af4edf68SEd Tanous     [[nodiscard]] ParserError parse(const crow::Request& req)
67af4edf68SEd Tanous     {
68af4edf68SEd Tanous         std::string_view contentType = req.getHeaderValue("content-type");
69af4edf68SEd Tanous 
70af4edf68SEd Tanous         const std::string boundaryFormat = "multipart/form-data; boundary=";
7111ba3979SEd Tanous         if (!contentType.starts_with(boundaryFormat))
72af4edf68SEd Tanous         {
73af4edf68SEd Tanous             return ParserError::ERROR_BOUNDARY_FORMAT;
74af4edf68SEd Tanous         }
75af4edf68SEd Tanous 
76af4edf68SEd Tanous         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
77af4edf68SEd Tanous 
78af4edf68SEd Tanous         boundary = "\r\n--";
79af4edf68SEd Tanous         boundary += ctBoundary;
80af4edf68SEd Tanous         indexBoundary();
81af4edf68SEd Tanous         lookbehind.resize(boundary.size() + 8);
82af4edf68SEd Tanous         state = State::START;
83af4edf68SEd Tanous 
840e31e952SPatrick Williams         const std::string& buffer = req.body();
850e31e952SPatrick Williams         size_t len = buffer.size();
86af4edf68SEd Tanous         char cl = 0;
87af4edf68SEd Tanous 
88af4edf68SEd Tanous         for (size_t i = 0; i < len; i++)
89af4edf68SEd Tanous         {
90af4edf68SEd Tanous             char c = buffer[i];
91af4edf68SEd Tanous             switch (state)
92af4edf68SEd Tanous             {
93af4edf68SEd Tanous                 case State::START:
94af4edf68SEd Tanous                     index = 0;
95af4edf68SEd Tanous                     state = State::START_BOUNDARY;
96af4edf68SEd Tanous                     [[fallthrough]];
97af4edf68SEd Tanous                 case State::START_BOUNDARY:
98af4edf68SEd Tanous                     if (index == boundary.size() - 2)
99af4edf68SEd Tanous                     {
100af4edf68SEd Tanous                         if (c != cr)
101af4edf68SEd Tanous                         {
102af4edf68SEd Tanous                             return ParserError::ERROR_BOUNDARY_CR;
103af4edf68SEd Tanous                         }
104af4edf68SEd Tanous                         index++;
105af4edf68SEd Tanous                         break;
106af4edf68SEd Tanous                     }
107af4edf68SEd Tanous                     else if (index - 1 == boundary.size() - 2)
108af4edf68SEd Tanous                     {
109af4edf68SEd Tanous                         if (c != lf)
110af4edf68SEd Tanous                         {
111af4edf68SEd Tanous                             return ParserError::ERROR_BOUNDARY_LF;
112af4edf68SEd Tanous                         }
113af4edf68SEd Tanous                         index = 0;
11426eee3a1SPatrick Williams                         mime_fields.emplace_back();
115af4edf68SEd Tanous                         state = State::HEADER_FIELD_START;
116af4edf68SEd Tanous                         break;
117af4edf68SEd Tanous                     }
118af4edf68SEd Tanous                     if (c != boundary[index + 2])
119af4edf68SEd Tanous                     {
120af4edf68SEd Tanous                         return ParserError::ERROR_BOUNDARY_DATA;
121af4edf68SEd Tanous                     }
122af4edf68SEd Tanous                     index++;
123af4edf68SEd Tanous                     break;
124af4edf68SEd Tanous                 case State::HEADER_FIELD_START:
125af4edf68SEd Tanous                     currentHeaderName.resize(0);
126af4edf68SEd Tanous                     state = State::HEADER_FIELD;
127af4edf68SEd Tanous                     headerFieldMark = i;
128af4edf68SEd Tanous                     index = 0;
129af4edf68SEd Tanous                     [[fallthrough]];
130af4edf68SEd Tanous                 case State::HEADER_FIELD:
131af4edf68SEd Tanous                     if (c == cr)
132af4edf68SEd Tanous                     {
133af4edf68SEd Tanous                         headerFieldMark = 0;
134af4edf68SEd Tanous                         state = State::HEADERS_ALMOST_DONE;
135af4edf68SEd Tanous                         break;
136af4edf68SEd Tanous                     }
137af4edf68SEd Tanous 
138af4edf68SEd Tanous                     index++;
139af4edf68SEd Tanous                     if (c == hyphen)
140af4edf68SEd Tanous                     {
141af4edf68SEd Tanous                         break;
142af4edf68SEd Tanous                     }
143af4edf68SEd Tanous 
144af4edf68SEd Tanous                     if (c == colon)
145af4edf68SEd Tanous                     {
146af4edf68SEd Tanous                         if (index == 1)
147af4edf68SEd Tanous                         {
148af4edf68SEd Tanous                             return ParserError::ERROR_EMPTY_HEADER;
149af4edf68SEd Tanous                         }
150ca45aa3cSEd Tanous 
1510e31e952SPatrick Williams                         currentHeaderName.append(&buffer[headerFieldMark],
152af4edf68SEd Tanous                                                  i - headerFieldMark);
153af4edf68SEd Tanous                         state = State::HEADER_VALUE_START;
154af4edf68SEd Tanous                         break;
155af4edf68SEd Tanous                     }
156af4edf68SEd Tanous                     cl = lower(c);
157af4edf68SEd Tanous                     if (cl < 'a' || cl > 'z')
158af4edf68SEd Tanous                     {
159af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_NAME;
160af4edf68SEd Tanous                     }
161af4edf68SEd Tanous                     break;
162af4edf68SEd Tanous                 case State::HEADER_VALUE_START:
163af4edf68SEd Tanous                     if (c == space)
164af4edf68SEd Tanous                     {
165af4edf68SEd Tanous                         break;
166af4edf68SEd Tanous                     }
167af4edf68SEd Tanous                     headerValueMark = i;
168af4edf68SEd Tanous                     state = State::HEADER_VALUE;
169af4edf68SEd Tanous                     [[fallthrough]];
170af4edf68SEd Tanous                 case State::HEADER_VALUE:
171af4edf68SEd Tanous                     if (c == cr)
172af4edf68SEd Tanous                     {
1730e31e952SPatrick Williams                         std::string_view value(&buffer[headerValueMark],
174af4edf68SEd Tanous                                                i - headerValueMark);
175af4edf68SEd Tanous                         mime_fields.rbegin()->fields.set(currentHeaderName,
176af4edf68SEd Tanous                                                          value);
177af4edf68SEd Tanous                         state = State::HEADER_VALUE_ALMOST_DONE;
178af4edf68SEd Tanous                     }
179af4edf68SEd Tanous                     break;
180af4edf68SEd Tanous                 case State::HEADER_VALUE_ALMOST_DONE:
181af4edf68SEd Tanous                     if (c != lf)
182af4edf68SEd Tanous                     {
183af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_VALUE;
184af4edf68SEd Tanous                     }
185af4edf68SEd Tanous                     state = State::HEADER_FIELD_START;
186af4edf68SEd Tanous                     break;
187af4edf68SEd Tanous                 case State::HEADERS_ALMOST_DONE:
188af4edf68SEd Tanous                     if (c != lf)
189af4edf68SEd Tanous                     {
190af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_ENDING;
191af4edf68SEd Tanous                     }
19218e3f7fbSKrzysztof Grobelny                     if (index > 0)
19318e3f7fbSKrzysztof Grobelny                     {
19418e3f7fbSKrzysztof Grobelny                         return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
19518e3f7fbSKrzysztof Grobelny                     }
196af4edf68SEd Tanous                     state = State::PART_DATA_START;
197af4edf68SEd Tanous                     break;
198af4edf68SEd Tanous                 case State::PART_DATA_START:
199af4edf68SEd Tanous                     state = State::PART_DATA;
200af4edf68SEd Tanous                     partDataMark = i;
201af4edf68SEd Tanous                     [[fallthrough]];
202af4edf68SEd Tanous                 case State::PART_DATA:
20318e3f7fbSKrzysztof Grobelny                 {
204af4edf68SEd Tanous                     if (index == 0)
205af4edf68SEd Tanous                     {
2060e31e952SPatrick Williams                         skipNonBoundary(buffer, boundary.size() - 1, i);
207af4edf68SEd Tanous                         c = buffer[i];
208af4edf68SEd Tanous                     }
2090e31e952SPatrick Williams                     if (auto ec = processPartData(buffer, i, c);
2100e31e952SPatrick Williams                         ec != ParserError::PARSER_SUCCESS)
21118e3f7fbSKrzysztof Grobelny                     {
21218e3f7fbSKrzysztof Grobelny                         return ec;
21318e3f7fbSKrzysztof Grobelny                     }
214af4edf68SEd Tanous                     break;
21518e3f7fbSKrzysztof Grobelny                 }
216af4edf68SEd Tanous                 case State::END:
217af4edf68SEd Tanous                     break;
2184da0490bSEd Tanous                 default:
2194da0490bSEd Tanous                     return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
220af4edf68SEd Tanous             }
221af4edf68SEd Tanous         }
22218e3f7fbSKrzysztof Grobelny 
22318e3f7fbSKrzysztof Grobelny         if (state != State::END)
22418e3f7fbSKrzysztof Grobelny         {
22518e3f7fbSKrzysztof Grobelny             return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
22618e3f7fbSKrzysztof Grobelny         }
22718e3f7fbSKrzysztof Grobelny 
228af4edf68SEd Tanous         return ParserError::PARSER_SUCCESS;
229af4edf68SEd Tanous     }
230af4edf68SEd Tanous     std::vector<FormPart> mime_fields;
231af4edf68SEd Tanous     std::string boundary;
232af4edf68SEd Tanous 
233af4edf68SEd Tanous   private:
indexBoundary()234af4edf68SEd Tanous     void indexBoundary()
235af4edf68SEd Tanous     {
2363544d2a7SEd Tanous         std::ranges::fill(boundaryIndex, 0);
237af4edf68SEd Tanous         for (const char current : boundary)
238af4edf68SEd Tanous         {
239af4edf68SEd Tanous             boundaryIndex[static_cast<unsigned char>(current)] = true;
240af4edf68SEd Tanous         }
241af4edf68SEd Tanous     }
242af4edf68SEd Tanous 
lower(char c)24356d2396dSEd Tanous     static char lower(char c)
244af4edf68SEd Tanous     {
245af4edf68SEd Tanous         return static_cast<char>(c | 0x20);
246af4edf68SEd Tanous     }
247af4edf68SEd Tanous 
isBoundaryChar(char c) const2489de65b34SEd Tanous     bool isBoundaryChar(char c) const
249af4edf68SEd Tanous     {
250af4edf68SEd Tanous         return boundaryIndex[static_cast<unsigned char>(c)];
251af4edf68SEd Tanous     }
252af4edf68SEd Tanous 
skipNonBoundary(const std::string & buffer,size_t boundaryEnd,size_t & i)2530e31e952SPatrick Williams     void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
254af4edf68SEd Tanous                          size_t& i)
255af4edf68SEd Tanous     {
256af4edf68SEd Tanous         // boyer-moore derived algorithm to safely skip non-boundary data
2570e31e952SPatrick Williams         while (i + boundary.size() <= buffer.length())
258af4edf68SEd Tanous         {
259af4edf68SEd Tanous             if (isBoundaryChar(buffer[i + boundaryEnd]))
260af4edf68SEd Tanous             {
261af4edf68SEd Tanous                 break;
262af4edf68SEd Tanous             }
263af4edf68SEd Tanous             i += boundary.size();
264af4edf68SEd Tanous         }
265af4edf68SEd Tanous     }
266af4edf68SEd Tanous 
processPartData(const std::string & buffer,size_t & i,char c)2670e31e952SPatrick Williams     ParserError processPartData(const std::string& buffer, size_t& i, char c)
268af4edf68SEd Tanous     {
26918e3f7fbSKrzysztof Grobelny         size_t prevIndex = index;
270af4edf68SEd Tanous 
271af4edf68SEd Tanous         if (index < boundary.size())
272af4edf68SEd Tanous         {
273af4edf68SEd Tanous             if (boundary[index] == c)
274af4edf68SEd Tanous             {
275af4edf68SEd Tanous                 if (index == 0)
276af4edf68SEd Tanous                 {
2770e31e952SPatrick Williams                     const char* start = &buffer[partDataMark];
278ca45aa3cSEd Tanous                     size_t size = i - partDataMark;
279bd79bce8SPatrick Williams                     mime_fields.rbegin()->content +=
280bd79bce8SPatrick Williams                         std::string_view(start, size);
281af4edf68SEd Tanous                 }
282af4edf68SEd Tanous                 index++;
283af4edf68SEd Tanous             }
284af4edf68SEd Tanous             else
285af4edf68SEd Tanous             {
286af4edf68SEd Tanous                 index = 0;
287af4edf68SEd Tanous             }
288af4edf68SEd Tanous         }
289af4edf68SEd Tanous         else if (index == boundary.size())
290af4edf68SEd Tanous         {
291af4edf68SEd Tanous             index++;
292af4edf68SEd Tanous             if (c == cr)
293af4edf68SEd Tanous             {
294af4edf68SEd Tanous                 // cr = part boundary
295af4edf68SEd Tanous                 flags = Boundary::PART_BOUNDARY;
296af4edf68SEd Tanous             }
297af4edf68SEd Tanous             else if (c == hyphen)
298af4edf68SEd Tanous             {
299af4edf68SEd Tanous                 // hyphen = end boundary
300af4edf68SEd Tanous                 flags = Boundary::END_BOUNDARY;
301af4edf68SEd Tanous             }
302af4edf68SEd Tanous             else
303af4edf68SEd Tanous             {
304af4edf68SEd Tanous                 index = 0;
305af4edf68SEd Tanous             }
306af4edf68SEd Tanous         }
307af4edf68SEd Tanous         else
308af4edf68SEd Tanous         {
309af4edf68SEd Tanous             if (flags == Boundary::PART_BOUNDARY)
310af4edf68SEd Tanous             {
311af4edf68SEd Tanous                 index = 0;
312af4edf68SEd Tanous                 if (c == lf)
313af4edf68SEd Tanous                 {
314af4edf68SEd Tanous                     // unset the PART_BOUNDARY flag
315af4edf68SEd Tanous                     flags = Boundary::NON_BOUNDARY;
31626eee3a1SPatrick Williams                     mime_fields.emplace_back();
317af4edf68SEd Tanous                     state = State::HEADER_FIELD_START;
31818e3f7fbSKrzysztof Grobelny                     return ParserError::PARSER_SUCCESS;
319af4edf68SEd Tanous                 }
320af4edf68SEd Tanous             }
321af4edf68SEd Tanous             if (flags == Boundary::END_BOUNDARY)
322af4edf68SEd Tanous             {
323af4edf68SEd Tanous                 if (c == hyphen)
324af4edf68SEd Tanous                 {
325af4edf68SEd Tanous                     state = State::END;
326af4edf68SEd Tanous                 }
32718e3f7fbSKrzysztof Grobelny                 else
32818e3f7fbSKrzysztof Grobelny                 {
32918e3f7fbSKrzysztof Grobelny                     flags = Boundary::NON_BOUNDARY;
33018e3f7fbSKrzysztof Grobelny                     index = 0;
33118e3f7fbSKrzysztof Grobelny                 }
332af4edf68SEd Tanous             }
333af4edf68SEd Tanous         }
334af4edf68SEd Tanous 
335af4edf68SEd Tanous         if (index > 0)
336af4edf68SEd Tanous         {
33718e3f7fbSKrzysztof Grobelny             if ((index - 1) >= lookbehind.size())
33818e3f7fbSKrzysztof Grobelny             {
33918e3f7fbSKrzysztof Grobelny                 // Should never happen, but when it does it won't cause crash
34018e3f7fbSKrzysztof Grobelny                 return ParserError::ERROR_OUT_OF_RANGE;
34118e3f7fbSKrzysztof Grobelny             }
342af4edf68SEd Tanous             lookbehind[index - 1] = c;
343af4edf68SEd Tanous         }
344af4edf68SEd Tanous         else if (prevIndex > 0)
345af4edf68SEd Tanous         {
346af4edf68SEd Tanous             // if our boundary turned out to be rubbish, the captured
347af4edf68SEd Tanous             // lookbehind belongs to partData
348af4edf68SEd Tanous 
349af4edf68SEd Tanous             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
350af4edf68SEd Tanous             partDataMark = i;
351af4edf68SEd Tanous 
352af4edf68SEd Tanous             // reconsider the current character even so it interrupted
353af4edf68SEd Tanous             // the sequence it could be the beginning of a new sequence
354af4edf68SEd Tanous             i--;
355af4edf68SEd Tanous         }
35618e3f7fbSKrzysztof Grobelny         return ParserError::PARSER_SUCCESS;
357af4edf68SEd Tanous     }
358af4edf68SEd Tanous 
359af4edf68SEd Tanous     std::string currentHeaderName;
360af4edf68SEd Tanous     std::string currentHeaderValue;
361af4edf68SEd Tanous 
362af4edf68SEd Tanous     static constexpr char cr = '\r';
363af4edf68SEd Tanous     static constexpr char lf = '\n';
364af4edf68SEd Tanous     static constexpr char space = ' ';
365af4edf68SEd Tanous     static constexpr char hyphen = '-';
366af4edf68SEd Tanous     static constexpr char colon = ':';
367af4edf68SEd Tanous 
368d3a9e084SEd Tanous     std::array<bool, 256> boundaryIndex{};
369af4edf68SEd Tanous     std::string lookbehind;
370d3a9e084SEd Tanous     State state{State::START};
371d3a9e084SEd Tanous     Boundary flags{Boundary::NON_BOUNDARY};
372af4edf68SEd Tanous     size_t index = 0;
373af4edf68SEd Tanous     size_t partDataMark = 0;
374af4edf68SEd Tanous     size_t headerFieldMark = 0;
375af4edf68SEd Tanous     size_t headerValueMark = 0;
376af4edf68SEd Tanous };
377