1 #pragma once
2 
3 #include "http_request.hpp"
4 
5 #include <boost/beast/http/fields.hpp>
6 
7 #include <string>
8 #include <string_view>
9 
10 enum class ParserError
11 {
12     PARSER_SUCCESS,
13     ERROR_BOUNDARY_FORMAT,
14     ERROR_BOUNDARY_CR,
15     ERROR_BOUNDARY_LF,
16     ERROR_BOUNDARY_DATA,
17     ERROR_EMPTY_HEADER,
18     ERROR_HEADER_NAME,
19     ERROR_HEADER_VALUE,
20     ERROR_HEADER_ENDING,
21     ERROR_UNEXPECTED_END_OF_HEADER,
22     ERROR_UNEXPECTED_END_OF_INPUT,
23     ERROR_OUT_OF_RANGE
24 };
25 
26 enum class State
27 {
28     START,
29     START_BOUNDARY,
30     HEADER_FIELD_START,
31     HEADER_FIELD,
32     HEADER_VALUE_START,
33     HEADER_VALUE,
34     HEADER_VALUE_ALMOST_DONE,
35     HEADERS_ALMOST_DONE,
36     PART_DATA_START,
37     PART_DATA,
38     END
39 };
40 
41 enum class Boundary
42 {
43     NON_BOUNDARY,
44     PART_BOUNDARY,
45     END_BOUNDARY,
46 };
47 
48 struct FormPart
49 {
50     boost::beast::http::fields fields;
51     std::string content;
52 };
53 
54 class MultipartParser
55 {
56   public:
57     MultipartParser() = default;
58 
59     [[nodiscard]] ParserError parse(const crow::Request& req)
60     {
61         std::string_view contentType = req.getHeaderValue("content-type");
62 
63         const std::string boundaryFormat = "multipart/form-data; boundary=";
64         if (!contentType.starts_with(boundaryFormat))
65         {
66             return ParserError::ERROR_BOUNDARY_FORMAT;
67         }
68 
69         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
70 
71         boundary = "\r\n--";
72         boundary += ctBoundary;
73         indexBoundary();
74         lookbehind.resize(boundary.size() + 8);
75         state = State::START;
76 
77         const std::string& buffer = req.body();
78         size_t len = buffer.size();
79         char cl = 0;
80 
81         for (size_t i = 0; i < len; i++)
82         {
83             char c = buffer[i];
84             switch (state)
85             {
86                 case State::START:
87                     index = 0;
88                     state = State::START_BOUNDARY;
89                     [[fallthrough]];
90                 case State::START_BOUNDARY:
91                     if (index == boundary.size() - 2)
92                     {
93                         if (c != cr)
94                         {
95                             return ParserError::ERROR_BOUNDARY_CR;
96                         }
97                         index++;
98                         break;
99                     }
100                     else if (index - 1 == boundary.size() - 2)
101                     {
102                         if (c != lf)
103                         {
104                             return ParserError::ERROR_BOUNDARY_LF;
105                         }
106                         index = 0;
107                         mime_fields.push_back({});
108                         state = State::HEADER_FIELD_START;
109                         break;
110                     }
111                     if (c != boundary[index + 2])
112                     {
113                         return ParserError::ERROR_BOUNDARY_DATA;
114                     }
115                     index++;
116                     break;
117                 case State::HEADER_FIELD_START:
118                     currentHeaderName.resize(0);
119                     state = State::HEADER_FIELD;
120                     headerFieldMark = i;
121                     index = 0;
122                     [[fallthrough]];
123                 case State::HEADER_FIELD:
124                     if (c == cr)
125                     {
126                         headerFieldMark = 0;
127                         state = State::HEADERS_ALMOST_DONE;
128                         break;
129                     }
130 
131                     index++;
132                     if (c == hyphen)
133                     {
134                         break;
135                     }
136 
137                     if (c == colon)
138                     {
139                         if (index == 1)
140                         {
141                             return ParserError::ERROR_EMPTY_HEADER;
142                         }
143 
144                         currentHeaderName.append(&buffer[headerFieldMark],
145                                                  i - headerFieldMark);
146                         state = State::HEADER_VALUE_START;
147                         break;
148                     }
149                     cl = lower(c);
150                     if (cl < 'a' || cl > 'z')
151                     {
152                         return ParserError::ERROR_HEADER_NAME;
153                     }
154                     break;
155                 case State::HEADER_VALUE_START:
156                     if (c == space)
157                     {
158                         break;
159                     }
160                     headerValueMark = i;
161                     state = State::HEADER_VALUE;
162                     [[fallthrough]];
163                 case State::HEADER_VALUE:
164                     if (c == cr)
165                     {
166                         std::string_view value(&buffer[headerValueMark],
167                                                i - headerValueMark);
168                         mime_fields.rbegin()->fields.set(currentHeaderName,
169                                                          value);
170                         state = State::HEADER_VALUE_ALMOST_DONE;
171                     }
172                     break;
173                 case State::HEADER_VALUE_ALMOST_DONE:
174                     if (c != lf)
175                     {
176                         return ParserError::ERROR_HEADER_VALUE;
177                     }
178                     state = State::HEADER_FIELD_START;
179                     break;
180                 case State::HEADERS_ALMOST_DONE:
181                     if (c != lf)
182                     {
183                         return ParserError::ERROR_HEADER_ENDING;
184                     }
185                     if (index > 0)
186                     {
187                         return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
188                     }
189                     state = State::PART_DATA_START;
190                     break;
191                 case State::PART_DATA_START:
192                     state = State::PART_DATA;
193                     partDataMark = i;
194                     [[fallthrough]];
195                 case State::PART_DATA:
196                 {
197                     if (index == 0)
198                     {
199                         skipNonBoundary(buffer, boundary.size() - 1, i);
200                         c = buffer[i];
201                     }
202                     if (auto ec = processPartData(buffer, i, c);
203                         ec != ParserError::PARSER_SUCCESS)
204                     {
205                         return ec;
206                     }
207                     break;
208                 }
209                 case State::END:
210                     break;
211             }
212         }
213 
214         if (state != State::END)
215         {
216             return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
217         }
218 
219         return ParserError::PARSER_SUCCESS;
220     }
221     std::vector<FormPart> mime_fields;
222     std::string boundary;
223 
224   private:
225     void indexBoundary()
226     {
227         std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
228         for (const char current : boundary)
229         {
230             boundaryIndex[static_cast<unsigned char>(current)] = true;
231         }
232     }
233 
234     static char lower(char c)
235     {
236         return static_cast<char>(c | 0x20);
237     }
238 
239     inline bool isBoundaryChar(char c) const
240     {
241         return boundaryIndex[static_cast<unsigned char>(c)];
242     }
243 
244     void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
245                          size_t& i)
246     {
247         // boyer-moore derived algorithm to safely skip non-boundary data
248         while (i + boundary.size() <= buffer.length())
249         {
250             if (isBoundaryChar(buffer[i + boundaryEnd]))
251             {
252                 break;
253             }
254             i += boundary.size();
255         }
256     }
257 
258     ParserError processPartData(const std::string& buffer, size_t& i, char c)
259     {
260         size_t prevIndex = index;
261 
262         if (index < boundary.size())
263         {
264             if (boundary[index] == c)
265             {
266                 if (index == 0)
267                 {
268                     const char* start = &buffer[partDataMark];
269                     size_t size = i - partDataMark;
270                     mime_fields.rbegin()->content += std::string_view(start,
271                                                                       size);
272                 }
273                 index++;
274             }
275             else
276             {
277                 index = 0;
278             }
279         }
280         else if (index == boundary.size())
281         {
282             index++;
283             if (c == cr)
284             {
285                 // cr = part boundary
286                 flags = Boundary::PART_BOUNDARY;
287             }
288             else if (c == hyphen)
289             {
290                 // hyphen = end boundary
291                 flags = Boundary::END_BOUNDARY;
292             }
293             else
294             {
295                 index = 0;
296             }
297         }
298         else
299         {
300             if (flags == Boundary::PART_BOUNDARY)
301             {
302                 index = 0;
303                 if (c == lf)
304                 {
305                     // unset the PART_BOUNDARY flag
306                     flags = Boundary::NON_BOUNDARY;
307                     mime_fields.push_back({});
308                     state = State::HEADER_FIELD_START;
309                     return ParserError::PARSER_SUCCESS;
310                 }
311             }
312             if (flags == Boundary::END_BOUNDARY)
313             {
314                 if (c == hyphen)
315                 {
316                     state = State::END;
317                 }
318                 else
319                 {
320                     flags = Boundary::NON_BOUNDARY;
321                     index = 0;
322                 }
323             }
324         }
325 
326         if (index > 0)
327         {
328             if ((index - 1) >= lookbehind.size())
329             {
330                 // Should never happen, but when it does it won't cause crash
331                 return ParserError::ERROR_OUT_OF_RANGE;
332             }
333             lookbehind[index - 1] = c;
334         }
335         else if (prevIndex > 0)
336         {
337             // if our boundary turned out to be rubbish, the captured
338             // lookbehind belongs to partData
339 
340             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
341             partDataMark = i;
342 
343             // reconsider the current character even so it interrupted
344             // the sequence it could be the beginning of a new sequence
345             i--;
346         }
347         return ParserError::PARSER_SUCCESS;
348     }
349 
350     std::string currentHeaderName;
351     std::string currentHeaderValue;
352 
353     static constexpr char cr = '\r';
354     static constexpr char lf = '\n';
355     static constexpr char space = ' ';
356     static constexpr char hyphen = '-';
357     static constexpr char colon = ':';
358 
359     std::array<bool, 256> boundaryIndex{};
360     std::string lookbehind;
361     State state{State::START};
362     Boundary flags{Boundary::NON_BOUNDARY};
363     size_t index = 0;
364     size_t partDataMark = 0;
365     size_t headerFieldMark = 0;
366     size_t headerValueMark = 0;
367 };
368