1 #pragma once
2 
3 #include "http_request.hpp"
4 
5 #include <boost/beast/http/fields.hpp>
6 
7 #include <string>
8 #include <string_view>
9 
10 enum class ParserError
11 {
12     PARSER_SUCCESS,
13     ERROR_BOUNDARY_FORMAT,
14     ERROR_BOUNDARY_CR,
15     ERROR_BOUNDARY_LF,
16     ERROR_BOUNDARY_DATA,
17     ERROR_EMPTY_HEADER,
18     ERROR_HEADER_NAME,
19     ERROR_HEADER_VALUE,
20     ERROR_HEADER_ENDING,
21     ERROR_UNEXPECTED_END_OF_HEADER,
22     ERROR_UNEXPECTED_END_OF_INPUT,
23     ERROR_OUT_OF_RANGE
24 };
25 
26 enum class State
27 {
28     START,
29     START_BOUNDARY,
30     HEADER_FIELD_START,
31     HEADER_FIELD,
32     HEADER_VALUE_START,
33     HEADER_VALUE,
34     HEADER_VALUE_ALMOST_DONE,
35     HEADERS_ALMOST_DONE,
36     PART_DATA_START,
37     PART_DATA,
38     END
39 };
40 
41 enum class Boundary
42 {
43     NON_BOUNDARY,
44     PART_BOUNDARY,
45     END_BOUNDARY,
46 };
47 
48 struct FormPart
49 {
50     boost::beast::http::fields fields;
51     std::string content;
52 };
53 
54 class MultipartParser
55 {
56   public:
57     MultipartParser() = default;
58 
59     [[nodiscard]] ParserError parse(const crow::Request& req)
60     {
61         std::string_view contentType = req.getHeaderValue("content-type");
62 
63         const std::string boundaryFormat = "multipart/form-data; boundary=";
64         if (!contentType.starts_with(boundaryFormat))
65         {
66             return ParserError::ERROR_BOUNDARY_FORMAT;
67         }
68 
69         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
70 
71         boundary = "\r\n--";
72         boundary += ctBoundary;
73         indexBoundary();
74         lookbehind.resize(boundary.size() + 8);
75         state = State::START;
76 
77         const char* buffer = req.body().data();
78         size_t len = req.body().size();
79         char cl = 0;
80 
81         for (size_t i = 0; i < len; i++)
82         {
83             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
84             char c = buffer[i];
85             switch (state)
86             {
87                 case State::START:
88                     index = 0;
89                     state = State::START_BOUNDARY;
90                     [[fallthrough]];
91                 case State::START_BOUNDARY:
92                     if (index == boundary.size() - 2)
93                     {
94                         if (c != cr)
95                         {
96                             return ParserError::ERROR_BOUNDARY_CR;
97                         }
98                         index++;
99                         break;
100                     }
101                     else if (index - 1 == boundary.size() - 2)
102                     {
103                         if (c != lf)
104                         {
105                             return ParserError::ERROR_BOUNDARY_LF;
106                         }
107                         index = 0;
108                         mime_fields.push_back({});
109                         state = State::HEADER_FIELD_START;
110                         break;
111                     }
112                     if (c != boundary[index + 2])
113                     {
114                         return ParserError::ERROR_BOUNDARY_DATA;
115                     }
116                     index++;
117                     break;
118                 case State::HEADER_FIELD_START:
119                     currentHeaderName.resize(0);
120                     state = State::HEADER_FIELD;
121                     headerFieldMark = i;
122                     index = 0;
123                     [[fallthrough]];
124                 case State::HEADER_FIELD:
125                     if (c == cr)
126                     {
127                         headerFieldMark = 0;
128                         state = State::HEADERS_ALMOST_DONE;
129                         break;
130                     }
131 
132                     index++;
133                     if (c == hyphen)
134                     {
135                         break;
136                     }
137 
138                     if (c == colon)
139                     {
140                         if (index == 1)
141                         {
142                             return ParserError::ERROR_EMPTY_HEADER;
143                         }
144 
145                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
146                         currentHeaderName.append(buffer + headerFieldMark,
147                                                  i - headerFieldMark);
148                         state = State::HEADER_VALUE_START;
149                         break;
150                     }
151                     cl = lower(c);
152                     if (cl < 'a' || cl > 'z')
153                     {
154                         return ParserError::ERROR_HEADER_NAME;
155                     }
156                     break;
157                 case State::HEADER_VALUE_START:
158                     if (c == space)
159                     {
160                         break;
161                     }
162                     headerValueMark = i;
163                     state = State::HEADER_VALUE;
164                     [[fallthrough]];
165                 case State::HEADER_VALUE:
166                     if (c == cr)
167                     {
168                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
169                         std::string_view value(buffer + headerValueMark,
170                                                i - headerValueMark);
171                         mime_fields.rbegin()->fields.set(currentHeaderName,
172                                                          value);
173                         state = State::HEADER_VALUE_ALMOST_DONE;
174                     }
175                     break;
176                 case State::HEADER_VALUE_ALMOST_DONE:
177                     if (c != lf)
178                     {
179                         return ParserError::ERROR_HEADER_VALUE;
180                     }
181                     state = State::HEADER_FIELD_START;
182                     break;
183                 case State::HEADERS_ALMOST_DONE:
184                     if (c != lf)
185                     {
186                         return ParserError::ERROR_HEADER_ENDING;
187                     }
188                     if (index > 0)
189                     {
190                         return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
191                     }
192                     state = State::PART_DATA_START;
193                     break;
194                 case State::PART_DATA_START:
195                     state = State::PART_DATA;
196                     partDataMark = i;
197                     [[fallthrough]];
198                 case State::PART_DATA:
199                 {
200                     if (index == 0)
201                     {
202                         skipNonBoundary(buffer, len, boundary.size() - 1, i);
203 
204                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
205                         c = buffer[i];
206                     }
207                     const ParserError ec = processPartData(buffer, i, c);
208                     if (ec != ParserError::PARSER_SUCCESS)
209                     {
210                         return ec;
211                     }
212                     break;
213                 }
214                 case State::END:
215                     break;
216             }
217         }
218 
219         if (state != State::END)
220         {
221             return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
222         }
223 
224         return ParserError::PARSER_SUCCESS;
225     }
226     std::vector<FormPart> mime_fields;
227     std::string boundary;
228 
229   private:
230     void indexBoundary()
231     {
232         std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
233         for (const char current : boundary)
234         {
235             boundaryIndex[static_cast<unsigned char>(current)] = true;
236         }
237     }
238 
239     static char lower(char c)
240     {
241         return static_cast<char>(c | 0x20);
242     }
243 
244     inline bool isBoundaryChar(char c) const
245     {
246         return boundaryIndex[static_cast<unsigned char>(c)];
247     }
248 
249     void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
250                          size_t& i)
251     {
252         // boyer-moore derived algorithm to safely skip non-boundary data
253         while (i + boundary.size() <= len)
254         {
255             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
256             if (isBoundaryChar(buffer[i + boundaryEnd]))
257             {
258                 break;
259             }
260             i += boundary.size();
261         }
262     }
263 
264     ParserError processPartData(const char* buffer, size_t& i, char c)
265     {
266         size_t prevIndex = index;
267 
268         if (index < boundary.size())
269         {
270             if (boundary[index] == c)
271             {
272                 if (index == 0)
273                 {
274                     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
275                     const char* start = buffer + partDataMark;
276                     size_t size = i - partDataMark;
277                     mime_fields.rbegin()->content += std::string_view(start,
278                                                                       size);
279                 }
280                 index++;
281             }
282             else
283             {
284                 index = 0;
285             }
286         }
287         else if (index == boundary.size())
288         {
289             index++;
290             if (c == cr)
291             {
292                 // cr = part boundary
293                 flags = Boundary::PART_BOUNDARY;
294             }
295             else if (c == hyphen)
296             {
297                 // hyphen = end boundary
298                 flags = Boundary::END_BOUNDARY;
299             }
300             else
301             {
302                 index = 0;
303             }
304         }
305         else
306         {
307             if (flags == Boundary::PART_BOUNDARY)
308             {
309                 index = 0;
310                 if (c == lf)
311                 {
312                     // unset the PART_BOUNDARY flag
313                     flags = Boundary::NON_BOUNDARY;
314                     mime_fields.push_back({});
315                     state = State::HEADER_FIELD_START;
316                     return ParserError::PARSER_SUCCESS;
317                 }
318             }
319             if (flags == Boundary::END_BOUNDARY)
320             {
321                 if (c == hyphen)
322                 {
323                     state = State::END;
324                 }
325                 else
326                 {
327                     flags = Boundary::NON_BOUNDARY;
328                     index = 0;
329                 }
330             }
331         }
332 
333         if (index > 0)
334         {
335             if ((index - 1) >= lookbehind.size())
336             {
337                 // Should never happen, but when it does it won't cause crash
338                 return ParserError::ERROR_OUT_OF_RANGE;
339             }
340             lookbehind[index - 1] = c;
341         }
342         else if (prevIndex > 0)
343         {
344             // if our boundary turned out to be rubbish, the captured
345             // lookbehind belongs to partData
346 
347             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
348             partDataMark = i;
349 
350             // reconsider the current character even so it interrupted
351             // the sequence it could be the beginning of a new sequence
352             i--;
353         }
354         return ParserError::PARSER_SUCCESS;
355     }
356 
357     std::string currentHeaderName;
358     std::string currentHeaderValue;
359 
360     static constexpr char cr = '\r';
361     static constexpr char lf = '\n';
362     static constexpr char space = ' ';
363     static constexpr char hyphen = '-';
364     static constexpr char colon = ':';
365 
366     std::array<bool, 256> boundaryIndex{};
367     std::string lookbehind;
368     State state{State::START};
369     Boundary flags{Boundary::NON_BOUNDARY};
370     size_t index = 0;
371     size_t partDataMark = 0;
372     size_t headerFieldMark = 0;
373     size_t headerValueMark = 0;
374 };
375