1 #pragma once
2 
3 #include <boost/beast/http/fields.hpp>
4 #include <http_request.hpp>
5 
6 #include <string>
7 #include <string_view>
8 
9 enum class ParserError
10 {
11     PARSER_SUCCESS,
12     ERROR_BOUNDARY_FORMAT,
13     ERROR_BOUNDARY_CR,
14     ERROR_BOUNDARY_LF,
15     ERROR_BOUNDARY_DATA,
16     ERROR_EMPTY_HEADER,
17     ERROR_HEADER_NAME,
18     ERROR_HEADER_VALUE,
19     ERROR_HEADER_ENDING
20 };
21 
22 enum class State
23 {
24     START,
25     START_BOUNDARY,
26     HEADER_FIELD_START,
27     HEADER_FIELD,
28     HEADER_VALUE_START,
29     HEADER_VALUE,
30     HEADER_VALUE_ALMOST_DONE,
31     HEADERS_ALMOST_DONE,
32     PART_DATA_START,
33     PART_DATA,
34     END
35 };
36 
37 enum class Boundary
38 {
39     NON_BOUNDARY,
40     PART_BOUNDARY,
41     END_BOUNDARY,
42 };
43 
44 struct FormPart
45 {
46     boost::beast::http::fields fields;
47     std::string content;
48 };
49 
50 class MultipartParser
51 {
52   public:
53     MultipartParser() = default;
54 
55     [[nodiscard]] ParserError parse(const crow::Request& req)
56     {
57         std::string_view contentType = req.getHeaderValue("content-type");
58 
59         const std::string boundaryFormat = "multipart/form-data; boundary=";
60         if (!contentType.starts_with(boundaryFormat))
61         {
62             return ParserError::ERROR_BOUNDARY_FORMAT;
63         }
64 
65         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
66 
67         boundary = "\r\n--";
68         boundary += ctBoundary;
69         indexBoundary();
70         lookbehind.resize(boundary.size() + 8);
71         state = State::START;
72 
73         const char* buffer = req.body.data();
74         size_t len = req.body.size();
75         size_t prevIndex = index;
76         char cl = 0;
77 
78         for (size_t i = 0; i < len; i++)
79         {
80             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
81             char c = buffer[i];
82             switch (state)
83             {
84                 case State::START:
85                     index = 0;
86                     state = State::START_BOUNDARY;
87                     [[fallthrough]];
88                 case State::START_BOUNDARY:
89                     if (index == boundary.size() - 2)
90                     {
91                         if (c != cr)
92                         {
93                             return ParserError::ERROR_BOUNDARY_CR;
94                         }
95                         index++;
96                         break;
97                     }
98                     else if (index - 1 == boundary.size() - 2)
99                     {
100                         if (c != lf)
101                         {
102                             return ParserError::ERROR_BOUNDARY_LF;
103                         }
104                         index = 0;
105                         mime_fields.push_back({});
106                         state = State::HEADER_FIELD_START;
107                         break;
108                     }
109                     if (c != boundary[index + 2])
110                     {
111                         return ParserError::ERROR_BOUNDARY_DATA;
112                     }
113                     index++;
114                     break;
115                 case State::HEADER_FIELD_START:
116                     currentHeaderName.resize(0);
117                     state = State::HEADER_FIELD;
118                     headerFieldMark = i;
119                     index = 0;
120                     [[fallthrough]];
121                 case State::HEADER_FIELD:
122                     if (c == cr)
123                     {
124                         headerFieldMark = 0;
125                         state = State::HEADERS_ALMOST_DONE;
126                         break;
127                     }
128 
129                     index++;
130                     if (c == hyphen)
131                     {
132                         break;
133                     }
134 
135                     if (c == colon)
136                     {
137                         if (index == 1)
138                         {
139                             return ParserError::ERROR_EMPTY_HEADER;
140                         }
141 
142                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
143                         currentHeaderName.append(buffer + headerFieldMark,
144                                                  i - headerFieldMark);
145                         state = State::HEADER_VALUE_START;
146                         break;
147                     }
148                     cl = lower(c);
149                     if (cl < 'a' || cl > 'z')
150                     {
151                         return ParserError::ERROR_HEADER_NAME;
152                     }
153                     break;
154                 case State::HEADER_VALUE_START:
155                     if (c == space)
156                     {
157                         break;
158                     }
159                     headerValueMark = i;
160                     state = State::HEADER_VALUE;
161                     [[fallthrough]];
162                 case State::HEADER_VALUE:
163                     if (c == cr)
164                     {
165                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
166                         std::string_view value(buffer + headerValueMark,
167                                                i - headerValueMark);
168                         mime_fields.rbegin()->fields.set(currentHeaderName,
169                                                          value);
170                         state = State::HEADER_VALUE_ALMOST_DONE;
171                     }
172                     break;
173                 case State::HEADER_VALUE_ALMOST_DONE:
174                     if (c != lf)
175                     {
176                         return ParserError::ERROR_HEADER_VALUE;
177                     }
178                     state = State::HEADER_FIELD_START;
179                     break;
180                 case State::HEADERS_ALMOST_DONE:
181                     if (c != lf)
182                     {
183                         return ParserError::ERROR_HEADER_ENDING;
184                     }
185                     state = State::PART_DATA_START;
186                     break;
187                 case State::PART_DATA_START:
188                     state = State::PART_DATA;
189                     partDataMark = i;
190                     [[fallthrough]];
191                 case State::PART_DATA:
192                     if (index == 0)
193                     {
194                         skipNonBoundary(buffer, len, boundary.size() - 1, i);
195 
196                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
197                         c = buffer[i];
198                     }
199                     processPartData(prevIndex, buffer, i, c);
200                     break;
201                 case State::END:
202                     break;
203             }
204         }
205         return ParserError::PARSER_SUCCESS;
206     }
207     std::vector<FormPart> mime_fields;
208     std::string boundary;
209 
210   private:
211     void indexBoundary()
212     {
213         std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
214         for (const char current : boundary)
215         {
216             boundaryIndex[static_cast<unsigned char>(current)] = true;
217         }
218     }
219 
220     static char lower(char c)
221     {
222         return static_cast<char>(c | 0x20);
223     }
224 
225     inline bool isBoundaryChar(char c) const
226     {
227         return boundaryIndex[static_cast<unsigned char>(c)];
228     }
229 
230     void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
231                          size_t& i)
232     {
233         // boyer-moore derived algorithm to safely skip non-boundary data
234         while (i + boundary.size() <= len)
235         {
236             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
237             if (isBoundaryChar(buffer[i + boundaryEnd]))
238             {
239                 break;
240             }
241             i += boundary.size();
242         }
243     }
244 
245     void processPartData(size_t& prevIndex, const char* buffer, size_t& i,
246                          char c)
247     {
248         prevIndex = index;
249 
250         if (index < boundary.size())
251         {
252             if (boundary[index] == c)
253             {
254                 if (index == 0)
255                 {
256                     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
257                     const char* start = buffer + partDataMark;
258                     size_t size = i - partDataMark;
259                     mime_fields.rbegin()->content +=
260                         std::string_view(start, size);
261                 }
262                 index++;
263             }
264             else
265             {
266                 index = 0;
267             }
268         }
269         else if (index == boundary.size())
270         {
271             index++;
272             if (c == cr)
273             {
274                 // cr = part boundary
275                 flags = Boundary::PART_BOUNDARY;
276             }
277             else if (c == hyphen)
278             {
279                 // hyphen = end boundary
280                 flags = Boundary::END_BOUNDARY;
281             }
282             else
283             {
284                 index = 0;
285             }
286         }
287         else
288         {
289             if (flags == Boundary::PART_BOUNDARY)
290             {
291                 index = 0;
292                 if (c == lf)
293                 {
294                     // unset the PART_BOUNDARY flag
295                     flags = Boundary::NON_BOUNDARY;
296                     mime_fields.push_back({});
297                     state = State::HEADER_FIELD_START;
298                     return;
299                 }
300             }
301             if (flags == Boundary::END_BOUNDARY)
302             {
303                 if (c == hyphen)
304                 {
305                     state = State::END;
306                 }
307             }
308         }
309 
310         if (index > 0)
311         {
312             lookbehind[index - 1] = c;
313         }
314         else if (prevIndex > 0)
315         {
316             // if our boundary turned out to be rubbish, the captured
317             // lookbehind belongs to partData
318 
319             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
320             prevIndex = 0;
321             partDataMark = i;
322 
323             // reconsider the current character even so it interrupted
324             // the sequence it could be the beginning of a new sequence
325             i--;
326         }
327     }
328 
329     std::string currentHeaderName;
330     std::string currentHeaderValue;
331 
332     static constexpr char cr = '\r';
333     static constexpr char lf = '\n';
334     static constexpr char space = ' ';
335     static constexpr char hyphen = '-';
336     static constexpr char colon = ':';
337 
338     std::array<bool, 256> boundaryIndex{};
339     std::string lookbehind;
340     State state{State::START};
341     Boundary flags{Boundary::NON_BOUNDARY};
342     size_t index = 0;
343     size_t partDataMark = 0;
344     size_t headerFieldMark = 0;
345     size_t headerValueMark = 0;
346 };
347