1 #pragma once
2 
3 #include "http_request.hpp"
4 
5 #include <boost/beast/http/fields.hpp>
6 
7 #include <ranges>
8 #include <string>
9 #include <string_view>
10 
11 enum class ParserError
12 {
13     PARSER_SUCCESS,
14     ERROR_BOUNDARY_FORMAT,
15     ERROR_BOUNDARY_CR,
16     ERROR_BOUNDARY_LF,
17     ERROR_BOUNDARY_DATA,
18     ERROR_EMPTY_HEADER,
19     ERROR_HEADER_NAME,
20     ERROR_HEADER_VALUE,
21     ERROR_HEADER_ENDING,
22     ERROR_UNEXPECTED_END_OF_HEADER,
23     ERROR_UNEXPECTED_END_OF_INPUT,
24     ERROR_OUT_OF_RANGE
25 };
26 
27 enum class State
28 {
29     START,
30     START_BOUNDARY,
31     HEADER_FIELD_START,
32     HEADER_FIELD,
33     HEADER_VALUE_START,
34     HEADER_VALUE,
35     HEADER_VALUE_ALMOST_DONE,
36     HEADERS_ALMOST_DONE,
37     PART_DATA_START,
38     PART_DATA,
39     END
40 };
41 
42 enum class Boundary
43 {
44     NON_BOUNDARY,
45     PART_BOUNDARY,
46     END_BOUNDARY,
47 };
48 
49 struct FormPart
50 {
51     boost::beast::http::fields fields;
52     std::string content;
53 };
54 
55 class MultipartParser
56 {
57   public:
58     MultipartParser() = default;
59 
60     [[nodiscard]] ParserError parse(const crow::Request& req)
61     {
62         std::string_view contentType = req.getHeaderValue("content-type");
63 
64         const std::string boundaryFormat = "multipart/form-data; boundary=";
65         if (!contentType.starts_with(boundaryFormat))
66         {
67             return ParserError::ERROR_BOUNDARY_FORMAT;
68         }
69 
70         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
71 
72         boundary = "\r\n--";
73         boundary += ctBoundary;
74         indexBoundary();
75         lookbehind.resize(boundary.size() + 8);
76         state = State::START;
77 
78         const std::string& buffer = req.body();
79         size_t len = buffer.size();
80         char cl = 0;
81 
82         for (size_t i = 0; i < len; i++)
83         {
84             char c = buffer[i];
85             switch (state)
86             {
87                 case State::START:
88                     index = 0;
89                     state = State::START_BOUNDARY;
90                     [[fallthrough]];
91                 case State::START_BOUNDARY:
92                     if (index == boundary.size() - 2)
93                     {
94                         if (c != cr)
95                         {
96                             return ParserError::ERROR_BOUNDARY_CR;
97                         }
98                         index++;
99                         break;
100                     }
101                     else if (index - 1 == boundary.size() - 2)
102                     {
103                         if (c != lf)
104                         {
105                             return ParserError::ERROR_BOUNDARY_LF;
106                         }
107                         index = 0;
108                         mime_fields.emplace_back();
109                         state = State::HEADER_FIELD_START;
110                         break;
111                     }
112                     if (c != boundary[index + 2])
113                     {
114                         return ParserError::ERROR_BOUNDARY_DATA;
115                     }
116                     index++;
117                     break;
118                 case State::HEADER_FIELD_START:
119                     currentHeaderName.resize(0);
120                     state = State::HEADER_FIELD;
121                     headerFieldMark = i;
122                     index = 0;
123                     [[fallthrough]];
124                 case State::HEADER_FIELD:
125                     if (c == cr)
126                     {
127                         headerFieldMark = 0;
128                         state = State::HEADERS_ALMOST_DONE;
129                         break;
130                     }
131 
132                     index++;
133                     if (c == hyphen)
134                     {
135                         break;
136                     }
137 
138                     if (c == colon)
139                     {
140                         if (index == 1)
141                         {
142                             return ParserError::ERROR_EMPTY_HEADER;
143                         }
144 
145                         currentHeaderName.append(&buffer[headerFieldMark],
146                                                  i - headerFieldMark);
147                         state = State::HEADER_VALUE_START;
148                         break;
149                     }
150                     cl = lower(c);
151                     if (cl < 'a' || cl > 'z')
152                     {
153                         return ParserError::ERROR_HEADER_NAME;
154                     }
155                     break;
156                 case State::HEADER_VALUE_START:
157                     if (c == space)
158                     {
159                         break;
160                     }
161                     headerValueMark = i;
162                     state = State::HEADER_VALUE;
163                     [[fallthrough]];
164                 case State::HEADER_VALUE:
165                     if (c == cr)
166                     {
167                         std::string_view value(&buffer[headerValueMark],
168                                                i - headerValueMark);
169                         mime_fields.rbegin()->fields.set(currentHeaderName,
170                                                          value);
171                         state = State::HEADER_VALUE_ALMOST_DONE;
172                     }
173                     break;
174                 case State::HEADER_VALUE_ALMOST_DONE:
175                     if (c != lf)
176                     {
177                         return ParserError::ERROR_HEADER_VALUE;
178                     }
179                     state = State::HEADER_FIELD_START;
180                     break;
181                 case State::HEADERS_ALMOST_DONE:
182                     if (c != lf)
183                     {
184                         return ParserError::ERROR_HEADER_ENDING;
185                     }
186                     if (index > 0)
187                     {
188                         return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
189                     }
190                     state = State::PART_DATA_START;
191                     break;
192                 case State::PART_DATA_START:
193                     state = State::PART_DATA;
194                     partDataMark = i;
195                     [[fallthrough]];
196                 case State::PART_DATA:
197                 {
198                     if (index == 0)
199                     {
200                         skipNonBoundary(buffer, boundary.size() - 1, i);
201                         c = buffer[i];
202                     }
203                     if (auto ec = processPartData(buffer, i, c);
204                         ec != ParserError::PARSER_SUCCESS)
205                     {
206                         return ec;
207                     }
208                     break;
209                 }
210                 case State::END:
211                     break;
212                 default:
213                     return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
214             }
215         }
216 
217         if (state != State::END)
218         {
219             return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
220         }
221 
222         return ParserError::PARSER_SUCCESS;
223     }
224     std::vector<FormPart> mime_fields;
225     std::string boundary;
226 
227   private:
228     void indexBoundary()
229     {
230         std::ranges::fill(boundaryIndex, 0);
231         for (const char current : boundary)
232         {
233             boundaryIndex[static_cast<unsigned char>(current)] = true;
234         }
235     }
236 
237     static char lower(char c)
238     {
239         return static_cast<char>(c | 0x20);
240     }
241 
242     bool isBoundaryChar(char c) const
243     {
244         return boundaryIndex[static_cast<unsigned char>(c)];
245     }
246 
247     void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
248                          size_t& i)
249     {
250         // boyer-moore derived algorithm to safely skip non-boundary data
251         while (i + boundary.size() <= buffer.length())
252         {
253             if (isBoundaryChar(buffer[i + boundaryEnd]))
254             {
255                 break;
256             }
257             i += boundary.size();
258         }
259     }
260 
261     ParserError processPartData(const std::string& buffer, size_t& i, char c)
262     {
263         size_t prevIndex = index;
264 
265         if (index < boundary.size())
266         {
267             if (boundary[index] == c)
268             {
269                 if (index == 0)
270                 {
271                     const char* start = &buffer[partDataMark];
272                     size_t size = i - partDataMark;
273                     mime_fields.rbegin()->content +=
274                         std::string_view(start, size);
275                 }
276                 index++;
277             }
278             else
279             {
280                 index = 0;
281             }
282         }
283         else if (index == boundary.size())
284         {
285             index++;
286             if (c == cr)
287             {
288                 // cr = part boundary
289                 flags = Boundary::PART_BOUNDARY;
290             }
291             else if (c == hyphen)
292             {
293                 // hyphen = end boundary
294                 flags = Boundary::END_BOUNDARY;
295             }
296             else
297             {
298                 index = 0;
299             }
300         }
301         else
302         {
303             if (flags == Boundary::PART_BOUNDARY)
304             {
305                 index = 0;
306                 if (c == lf)
307                 {
308                     // unset the PART_BOUNDARY flag
309                     flags = Boundary::NON_BOUNDARY;
310                     mime_fields.emplace_back();
311                     state = State::HEADER_FIELD_START;
312                     return ParserError::PARSER_SUCCESS;
313                 }
314             }
315             if (flags == Boundary::END_BOUNDARY)
316             {
317                 if (c == hyphen)
318                 {
319                     state = State::END;
320                 }
321                 else
322                 {
323                     flags = Boundary::NON_BOUNDARY;
324                     index = 0;
325                 }
326             }
327         }
328 
329         if (index > 0)
330         {
331             if ((index - 1) >= lookbehind.size())
332             {
333                 // Should never happen, but when it does it won't cause crash
334                 return ParserError::ERROR_OUT_OF_RANGE;
335             }
336             lookbehind[index - 1] = c;
337         }
338         else if (prevIndex > 0)
339         {
340             // if our boundary turned out to be rubbish, the captured
341             // lookbehind belongs to partData
342 
343             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
344             partDataMark = i;
345 
346             // reconsider the current character even so it interrupted
347             // the sequence it could be the beginning of a new sequence
348             i--;
349         }
350         return ParserError::PARSER_SUCCESS;
351     }
352 
353     std::string currentHeaderName;
354     std::string currentHeaderValue;
355 
356     static constexpr char cr = '\r';
357     static constexpr char lf = '\n';
358     static constexpr char space = ' ';
359     static constexpr char hyphen = '-';
360     static constexpr char colon = ':';
361 
362     std::array<bool, 256> boundaryIndex{};
363     std::string lookbehind;
364     State state{State::START};
365     Boundary flags{Boundary::NON_BOUNDARY};
366     size_t index = 0;
367     size_t partDataMark = 0;
368     size_t headerFieldMark = 0;
369     size_t headerValueMark = 0;
370 };
371