1 #pragma once
2 
3 #include "http_request.hpp"
4 
5 #include <boost/beast/http/fields.hpp>
6 
7 #include <ranges>
8 #include <string>
9 #include <string_view>
10 
11 enum class ParserError
12 {
13     PARSER_SUCCESS,
14     ERROR_BOUNDARY_FORMAT,
15     ERROR_BOUNDARY_CR,
16     ERROR_BOUNDARY_LF,
17     ERROR_BOUNDARY_DATA,
18     ERROR_EMPTY_HEADER,
19     ERROR_HEADER_NAME,
20     ERROR_HEADER_VALUE,
21     ERROR_HEADER_ENDING,
22     ERROR_UNEXPECTED_END_OF_HEADER,
23     ERROR_UNEXPECTED_END_OF_INPUT,
24     ERROR_OUT_OF_RANGE
25 };
26 
27 enum class State
28 {
29     START,
30     START_BOUNDARY,
31     HEADER_FIELD_START,
32     HEADER_FIELD,
33     HEADER_VALUE_START,
34     HEADER_VALUE,
35     HEADER_VALUE_ALMOST_DONE,
36     HEADERS_ALMOST_DONE,
37     PART_DATA_START,
38     PART_DATA,
39     END
40 };
41 
42 enum class Boundary
43 {
44     NON_BOUNDARY,
45     PART_BOUNDARY,
46     END_BOUNDARY,
47 };
48 
49 struct FormPart
50 {
51     boost::beast::http::fields fields;
52     std::string content;
53 };
54 
55 class MultipartParser
56 {
57   public:
58     MultipartParser() = default;
59 
60     [[nodiscard]] ParserError parse(const crow::Request& req)
61     {
62         std::string_view contentType = req.getHeaderValue("content-type");
63 
64         const std::string boundaryFormat = "multipart/form-data; boundary=";
65         if (!contentType.starts_with(boundaryFormat))
66         {
67             return ParserError::ERROR_BOUNDARY_FORMAT;
68         }
69 
70         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
71 
72         boundary = "\r\n--";
73         boundary += ctBoundary;
74         indexBoundary();
75         lookbehind.resize(boundary.size() + 8);
76         state = State::START;
77 
78         const std::string& buffer = req.body();
79         size_t len = buffer.size();
80         char cl = 0;
81 
82         for (size_t i = 0; i < len; i++)
83         {
84             char c = buffer[i];
85             switch (state)
86             {
87                 case State::START:
88                     index = 0;
89                     state = State::START_BOUNDARY;
90                     [[fallthrough]];
91                 case State::START_BOUNDARY:
92                     if (index == boundary.size() - 2)
93                     {
94                         if (c != cr)
95                         {
96                             return ParserError::ERROR_BOUNDARY_CR;
97                         }
98                         index++;
99                         break;
100                     }
101                     else if (index - 1 == boundary.size() - 2)
102                     {
103                         if (c != lf)
104                         {
105                             return ParserError::ERROR_BOUNDARY_LF;
106                         }
107                         index = 0;
108                         mime_fields.push_back({});
109                         state = State::HEADER_FIELD_START;
110                         break;
111                     }
112                     if (c != boundary[index + 2])
113                     {
114                         return ParserError::ERROR_BOUNDARY_DATA;
115                     }
116                     index++;
117                     break;
118                 case State::HEADER_FIELD_START:
119                     currentHeaderName.resize(0);
120                     state = State::HEADER_FIELD;
121                     headerFieldMark = i;
122                     index = 0;
123                     [[fallthrough]];
124                 case State::HEADER_FIELD:
125                     if (c == cr)
126                     {
127                         headerFieldMark = 0;
128                         state = State::HEADERS_ALMOST_DONE;
129                         break;
130                     }
131 
132                     index++;
133                     if (c == hyphen)
134                     {
135                         break;
136                     }
137 
138                     if (c == colon)
139                     {
140                         if (index == 1)
141                         {
142                             return ParserError::ERROR_EMPTY_HEADER;
143                         }
144 
145                         currentHeaderName.append(&buffer[headerFieldMark],
146                                                  i - headerFieldMark);
147                         state = State::HEADER_VALUE_START;
148                         break;
149                     }
150                     cl = lower(c);
151                     if (cl < 'a' || cl > 'z')
152                     {
153                         return ParserError::ERROR_HEADER_NAME;
154                     }
155                     break;
156                 case State::HEADER_VALUE_START:
157                     if (c == space)
158                     {
159                         break;
160                     }
161                     headerValueMark = i;
162                     state = State::HEADER_VALUE;
163                     [[fallthrough]];
164                 case State::HEADER_VALUE:
165                     if (c == cr)
166                     {
167                         std::string_view value(&buffer[headerValueMark],
168                                                i - headerValueMark);
169                         mime_fields.rbegin()->fields.set(currentHeaderName,
170                                                          value);
171                         state = State::HEADER_VALUE_ALMOST_DONE;
172                     }
173                     break;
174                 case State::HEADER_VALUE_ALMOST_DONE:
175                     if (c != lf)
176                     {
177                         return ParserError::ERROR_HEADER_VALUE;
178                     }
179                     state = State::HEADER_FIELD_START;
180                     break;
181                 case State::HEADERS_ALMOST_DONE:
182                     if (c != lf)
183                     {
184                         return ParserError::ERROR_HEADER_ENDING;
185                     }
186                     if (index > 0)
187                     {
188                         return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
189                     }
190                     state = State::PART_DATA_START;
191                     break;
192                 case State::PART_DATA_START:
193                     state = State::PART_DATA;
194                     partDataMark = i;
195                     [[fallthrough]];
196                 case State::PART_DATA:
197                 {
198                     if (index == 0)
199                     {
200                         skipNonBoundary(buffer, boundary.size() - 1, i);
201                         c = buffer[i];
202                     }
203                     if (auto ec = processPartData(buffer, i, c);
204                         ec != ParserError::PARSER_SUCCESS)
205                     {
206                         return ec;
207                     }
208                     break;
209                 }
210                 case State::END:
211                     break;
212             }
213         }
214 
215         if (state != State::END)
216         {
217             return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
218         }
219 
220         return ParserError::PARSER_SUCCESS;
221     }
222     std::vector<FormPart> mime_fields;
223     std::string boundary;
224 
225   private:
226     void indexBoundary()
227     {
228         std::ranges::fill(boundaryIndex, 0);
229         for (const char current : boundary)
230         {
231             boundaryIndex[static_cast<unsigned char>(current)] = true;
232         }
233     }
234 
235     static char lower(char c)
236     {
237         return static_cast<char>(c | 0x20);
238     }
239 
240     inline bool isBoundaryChar(char c) const
241     {
242         return boundaryIndex[static_cast<unsigned char>(c)];
243     }
244 
245     void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
246                          size_t& i)
247     {
248         // boyer-moore derived algorithm to safely skip non-boundary data
249         while (i + boundary.size() <= buffer.length())
250         {
251             if (isBoundaryChar(buffer[i + boundaryEnd]))
252             {
253                 break;
254             }
255             i += boundary.size();
256         }
257     }
258 
259     ParserError processPartData(const std::string& buffer, size_t& i, char c)
260     {
261         size_t prevIndex = index;
262 
263         if (index < boundary.size())
264         {
265             if (boundary[index] == c)
266             {
267                 if (index == 0)
268                 {
269                     const char* start = &buffer[partDataMark];
270                     size_t size = i - partDataMark;
271                     mime_fields.rbegin()->content += std::string_view(start,
272                                                                       size);
273                 }
274                 index++;
275             }
276             else
277             {
278                 index = 0;
279             }
280         }
281         else if (index == boundary.size())
282         {
283             index++;
284             if (c == cr)
285             {
286                 // cr = part boundary
287                 flags = Boundary::PART_BOUNDARY;
288             }
289             else if (c == hyphen)
290             {
291                 // hyphen = end boundary
292                 flags = Boundary::END_BOUNDARY;
293             }
294             else
295             {
296                 index = 0;
297             }
298         }
299         else
300         {
301             if (flags == Boundary::PART_BOUNDARY)
302             {
303                 index = 0;
304                 if (c == lf)
305                 {
306                     // unset the PART_BOUNDARY flag
307                     flags = Boundary::NON_BOUNDARY;
308                     mime_fields.push_back({});
309                     state = State::HEADER_FIELD_START;
310                     return ParserError::PARSER_SUCCESS;
311                 }
312             }
313             if (flags == Boundary::END_BOUNDARY)
314             {
315                 if (c == hyphen)
316                 {
317                     state = State::END;
318                 }
319                 else
320                 {
321                     flags = Boundary::NON_BOUNDARY;
322                     index = 0;
323                 }
324             }
325         }
326 
327         if (index > 0)
328         {
329             if ((index - 1) >= lookbehind.size())
330             {
331                 // Should never happen, but when it does it won't cause crash
332                 return ParserError::ERROR_OUT_OF_RANGE;
333             }
334             lookbehind[index - 1] = c;
335         }
336         else if (prevIndex > 0)
337         {
338             // if our boundary turned out to be rubbish, the captured
339             // lookbehind belongs to partData
340 
341             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
342             partDataMark = i;
343 
344             // reconsider the current character even so it interrupted
345             // the sequence it could be the beginning of a new sequence
346             i--;
347         }
348         return ParserError::PARSER_SUCCESS;
349     }
350 
351     std::string currentHeaderName;
352     std::string currentHeaderValue;
353 
354     static constexpr char cr = '\r';
355     static constexpr char lf = '\n';
356     static constexpr char space = ' ';
357     static constexpr char hyphen = '-';
358     static constexpr char colon = ':';
359 
360     std::array<bool, 256> boundaryIndex{};
361     std::string lookbehind;
362     State state{State::START};
363     Boundary flags{Boundary::NON_BOUNDARY};
364     size_t index = 0;
365     size_t partDataMark = 0;
366     size_t headerFieldMark = 0;
367     size_t headerValueMark = 0;
368 };
369