1 #pragma once
2 
3 #include <boost/beast/http/fields.hpp>
4 #include <http_request.hpp>
5 
6 #include <string>
7 #include <string_view>
8 
9 enum class ParserError
10 {
11     PARSER_SUCCESS,
12     ERROR_BOUNDARY_FORMAT,
13     ERROR_BOUNDARY_CR,
14     ERROR_BOUNDARY_LF,
15     ERROR_BOUNDARY_DATA,
16     ERROR_EMPTY_HEADER,
17     ERROR_HEADER_NAME,
18     ERROR_HEADER_VALUE,
19     ERROR_HEADER_ENDING,
20     ERROR_UNEXPECTED_END_OF_HEADER,
21     ERROR_UNEXPECTED_END_OF_INPUT,
22     ERROR_OUT_OF_RANGE
23 };
24 
25 enum class State
26 {
27     START,
28     START_BOUNDARY,
29     HEADER_FIELD_START,
30     HEADER_FIELD,
31     HEADER_VALUE_START,
32     HEADER_VALUE,
33     HEADER_VALUE_ALMOST_DONE,
34     HEADERS_ALMOST_DONE,
35     PART_DATA_START,
36     PART_DATA,
37     END
38 };
39 
40 enum class Boundary
41 {
42     NON_BOUNDARY,
43     PART_BOUNDARY,
44     END_BOUNDARY,
45 };
46 
47 struct FormPart
48 {
49     boost::beast::http::fields fields;
50     std::string content;
51 };
52 
53 class MultipartParser
54 {
55   public:
56     MultipartParser() = default;
57 
58     [[nodiscard]] ParserError parse(const crow::Request& req)
59     {
60         std::string_view contentType = req.getHeaderValue("content-type");
61 
62         const std::string boundaryFormat = "multipart/form-data; boundary=";
63         if (!contentType.starts_with(boundaryFormat))
64         {
65             return ParserError::ERROR_BOUNDARY_FORMAT;
66         }
67 
68         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
69 
70         boundary = "\r\n--";
71         boundary += ctBoundary;
72         indexBoundary();
73         lookbehind.resize(boundary.size() + 8);
74         state = State::START;
75 
76         const char* buffer = req.body.data();
77         size_t len = req.body.size();
78         char cl = 0;
79 
80         for (size_t i = 0; i < len; i++)
81         {
82             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
83             char c = buffer[i];
84             switch (state)
85             {
86                 case State::START:
87                     index = 0;
88                     state = State::START_BOUNDARY;
89                     [[fallthrough]];
90                 case State::START_BOUNDARY:
91                     if (index == boundary.size() - 2)
92                     {
93                         if (c != cr)
94                         {
95                             return ParserError::ERROR_BOUNDARY_CR;
96                         }
97                         index++;
98                         break;
99                     }
100                     else if (index - 1 == boundary.size() - 2)
101                     {
102                         if (c != lf)
103                         {
104                             return ParserError::ERROR_BOUNDARY_LF;
105                         }
106                         index = 0;
107                         mime_fields.push_back({});
108                         state = State::HEADER_FIELD_START;
109                         break;
110                     }
111                     if (c != boundary[index + 2])
112                     {
113                         return ParserError::ERROR_BOUNDARY_DATA;
114                     }
115                     index++;
116                     break;
117                 case State::HEADER_FIELD_START:
118                     currentHeaderName.resize(0);
119                     state = State::HEADER_FIELD;
120                     headerFieldMark = i;
121                     index = 0;
122                     [[fallthrough]];
123                 case State::HEADER_FIELD:
124                     if (c == cr)
125                     {
126                         headerFieldMark = 0;
127                         state = State::HEADERS_ALMOST_DONE;
128                         break;
129                     }
130 
131                     index++;
132                     if (c == hyphen)
133                     {
134                         break;
135                     }
136 
137                     if (c == colon)
138                     {
139                         if (index == 1)
140                         {
141                             return ParserError::ERROR_EMPTY_HEADER;
142                         }
143 
144                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
145                         currentHeaderName.append(buffer + headerFieldMark,
146                                                  i - headerFieldMark);
147                         state = State::HEADER_VALUE_START;
148                         break;
149                     }
150                     cl = lower(c);
151                     if (cl < 'a' || cl > 'z')
152                     {
153                         return ParserError::ERROR_HEADER_NAME;
154                     }
155                     break;
156                 case State::HEADER_VALUE_START:
157                     if (c == space)
158                     {
159                         break;
160                     }
161                     headerValueMark = i;
162                     state = State::HEADER_VALUE;
163                     [[fallthrough]];
164                 case State::HEADER_VALUE:
165                     if (c == cr)
166                     {
167                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
168                         std::string_view value(buffer + headerValueMark,
169                                                i - headerValueMark);
170                         mime_fields.rbegin()->fields.set(currentHeaderName,
171                                                          value);
172                         state = State::HEADER_VALUE_ALMOST_DONE;
173                     }
174                     break;
175                 case State::HEADER_VALUE_ALMOST_DONE:
176                     if (c != lf)
177                     {
178                         return ParserError::ERROR_HEADER_VALUE;
179                     }
180                     state = State::HEADER_FIELD_START;
181                     break;
182                 case State::HEADERS_ALMOST_DONE:
183                     if (c != lf)
184                     {
185                         return ParserError::ERROR_HEADER_ENDING;
186                     }
187                     if (index > 0)
188                     {
189                         return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
190                     }
191                     state = State::PART_DATA_START;
192                     break;
193                 case State::PART_DATA_START:
194                     state = State::PART_DATA;
195                     partDataMark = i;
196                     [[fallthrough]];
197                 case State::PART_DATA:
198                 {
199                     if (index == 0)
200                     {
201                         skipNonBoundary(buffer, len, boundary.size() - 1, i);
202 
203                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
204                         c = buffer[i];
205                     }
206                     const ParserError ec = processPartData(buffer, i, c);
207                     if (ec != ParserError::PARSER_SUCCESS)
208                     {
209                         return ec;
210                     }
211                     break;
212                 }
213                 case State::END:
214                     break;
215             }
216         }
217 
218         if (state != State::END)
219         {
220             return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
221         }
222 
223         return ParserError::PARSER_SUCCESS;
224     }
225     std::vector<FormPart> mime_fields;
226     std::string boundary;
227 
228   private:
229     void indexBoundary()
230     {
231         std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
232         for (const char current : boundary)
233         {
234             boundaryIndex[static_cast<unsigned char>(current)] = true;
235         }
236     }
237 
238     static char lower(char c)
239     {
240         return static_cast<char>(c | 0x20);
241     }
242 
243     inline bool isBoundaryChar(char c) const
244     {
245         return boundaryIndex[static_cast<unsigned char>(c)];
246     }
247 
248     void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
249                          size_t& i)
250     {
251         // boyer-moore derived algorithm to safely skip non-boundary data
252         while (i + boundary.size() <= len)
253         {
254             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
255             if (isBoundaryChar(buffer[i + boundaryEnd]))
256             {
257                 break;
258             }
259             i += boundary.size();
260         }
261     }
262 
263     ParserError processPartData(const char* buffer, size_t& i, char c)
264     {
265         size_t prevIndex = index;
266 
267         if (index < boundary.size())
268         {
269             if (boundary[index] == c)
270             {
271                 if (index == 0)
272                 {
273                     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
274                     const char* start = buffer + partDataMark;
275                     size_t size = i - partDataMark;
276                     mime_fields.rbegin()->content +=
277                         std::string_view(start, size);
278                 }
279                 index++;
280             }
281             else
282             {
283                 index = 0;
284             }
285         }
286         else if (index == boundary.size())
287         {
288             index++;
289             if (c == cr)
290             {
291                 // cr = part boundary
292                 flags = Boundary::PART_BOUNDARY;
293             }
294             else if (c == hyphen)
295             {
296                 // hyphen = end boundary
297                 flags = Boundary::END_BOUNDARY;
298             }
299             else
300             {
301                 index = 0;
302             }
303         }
304         else
305         {
306             if (flags == Boundary::PART_BOUNDARY)
307             {
308                 index = 0;
309                 if (c == lf)
310                 {
311                     // unset the PART_BOUNDARY flag
312                     flags = Boundary::NON_BOUNDARY;
313                     mime_fields.push_back({});
314                     state = State::HEADER_FIELD_START;
315                     return ParserError::PARSER_SUCCESS;
316                 }
317             }
318             if (flags == Boundary::END_BOUNDARY)
319             {
320                 if (c == hyphen)
321                 {
322                     state = State::END;
323                 }
324                 else
325                 {
326                     flags = Boundary::NON_BOUNDARY;
327                     index = 0;
328                 }
329             }
330         }
331 
332         if (index > 0)
333         {
334             if ((index - 1) >= lookbehind.size())
335             {
336                 // Should never happen, but when it does it won't cause crash
337                 return ParserError::ERROR_OUT_OF_RANGE;
338             }
339             lookbehind[index - 1] = c;
340         }
341         else if (prevIndex > 0)
342         {
343             // if our boundary turned out to be rubbish, the captured
344             // lookbehind belongs to partData
345 
346             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
347             partDataMark = i;
348 
349             // reconsider the current character even so it interrupted
350             // the sequence it could be the beginning of a new sequence
351             i--;
352         }
353         return ParserError::PARSER_SUCCESS;
354     }
355 
356     std::string currentHeaderName;
357     std::string currentHeaderValue;
358 
359     static constexpr char cr = '\r';
360     static constexpr char lf = '\n';
361     static constexpr char space = ' ';
362     static constexpr char hyphen = '-';
363     static constexpr char colon = ':';
364 
365     std::array<bool, 256> boundaryIndex{};
366     std::string lookbehind;
367     State state{State::START};
368     Boundary flags{Boundary::NON_BOUNDARY};
369     size_t index = 0;
370     size_t partDataMark = 0;
371     size_t headerFieldMark = 0;
372     size_t headerValueMark = 0;
373 };
374