1 #pragma once
2 
3 #include <boost/algorithm/string/predicate.hpp>
4 #include <boost/beast/http/fields.hpp>
5 #include <http_request.hpp>
6 
7 #include <string>
8 #include <string_view>
9 
10 enum class ParserError
11 {
12     PARSER_SUCCESS,
13     ERROR_BOUNDARY_FORMAT,
14     ERROR_BOUNDARY_CR,
15     ERROR_BOUNDARY_LF,
16     ERROR_BOUNDARY_DATA,
17     ERROR_EMPTY_HEADER,
18     ERROR_HEADER_NAME,
19     ERROR_HEADER_VALUE,
20     ERROR_HEADER_ENDING
21 };
22 
23 enum class State
24 {
25     START,
26     START_BOUNDARY,
27     HEADER_FIELD_START,
28     HEADER_FIELD,
29     HEADER_VALUE_START,
30     HEADER_VALUE,
31     HEADER_VALUE_ALMOST_DONE,
32     HEADERS_ALMOST_DONE,
33     PART_DATA_START,
34     PART_DATA,
35     END
36 };
37 
38 enum class Boundary
39 {
40     NON_BOUNDARY,
41     PART_BOUNDARY,
42     END_BOUNDARY,
43 };
44 
45 struct FormPart
46 {
47     boost::beast::http::fields fields;
48     std::string content;
49 };
50 
51 class MultipartParser
52 {
53   public:
54     MultipartParser() = default;
55 
56     [[nodiscard]] ParserError parse(const crow::Request& req)
57     {
58         std::string_view contentType = req.getHeaderValue("content-type");
59 
60         const std::string boundaryFormat = "multipart/form-data; boundary=";
61         if (!boost::starts_with(req.getHeaderValue("content-type"),
62                                 boundaryFormat))
63         {
64             return ParserError::ERROR_BOUNDARY_FORMAT;
65         }
66 
67         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
68 
69         boundary = "\r\n--";
70         boundary += ctBoundary;
71         indexBoundary();
72         lookbehind.resize(boundary.size() + 8);
73         state = State::START;
74 
75         const char* buffer = req.body.data();
76         size_t len = req.body.size();
77         size_t prevIndex = index;
78         char cl = 0;
79 
80         for (size_t i = 0; i < len; i++)
81         {
82             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
83             char c = buffer[i];
84             switch (state)
85             {
86                 case State::START:
87                     index = 0;
88                     state = State::START_BOUNDARY;
89                     [[fallthrough]];
90                 case State::START_BOUNDARY:
91                     if (index == boundary.size() - 2)
92                     {
93                         if (c != cr)
94                         {
95                             return ParserError::ERROR_BOUNDARY_CR;
96                         }
97                         index++;
98                         break;
99                     }
100                     else if (index - 1 == boundary.size() - 2)
101                     {
102                         if (c != lf)
103                         {
104                             return ParserError::ERROR_BOUNDARY_LF;
105                         }
106                         index = 0;
107                         mime_fields.push_back({});
108                         state = State::HEADER_FIELD_START;
109                         break;
110                     }
111                     if (c != boundary[index + 2])
112                     {
113                         return ParserError::ERROR_BOUNDARY_DATA;
114                     }
115                     index++;
116                     break;
117                 case State::HEADER_FIELD_START:
118                     currentHeaderName.resize(0);
119                     state = State::HEADER_FIELD;
120                     headerFieldMark = i;
121                     index = 0;
122                     [[fallthrough]];
123                 case State::HEADER_FIELD:
124                     if (c == cr)
125                     {
126                         headerFieldMark = 0;
127                         state = State::HEADERS_ALMOST_DONE;
128                         break;
129                     }
130 
131                     index++;
132                     if (c == hyphen)
133                     {
134                         break;
135                     }
136 
137                     if (c == colon)
138                     {
139                         if (index == 1)
140                         {
141                             return ParserError::ERROR_EMPTY_HEADER;
142                         }
143 
144                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
145                         currentHeaderName.append(buffer + headerFieldMark,
146                                                  i - headerFieldMark);
147                         state = State::HEADER_VALUE_START;
148                         break;
149                     }
150                     cl = lower(c);
151                     if (cl < 'a' || cl > 'z')
152                     {
153                         return ParserError::ERROR_HEADER_NAME;
154                     }
155                     break;
156                 case State::HEADER_VALUE_START:
157                     if (c == space)
158                     {
159                         break;
160                     }
161                     headerValueMark = i;
162                     state = State::HEADER_VALUE;
163                     [[fallthrough]];
164                 case State::HEADER_VALUE:
165                     if (c == cr)
166                     {
167                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
168                         std::string_view value(buffer + headerValueMark,
169                                                i - headerValueMark);
170                         mime_fields.rbegin()->fields.set(currentHeaderName,
171                                                          value);
172                         state = State::HEADER_VALUE_ALMOST_DONE;
173                     }
174                     break;
175                 case State::HEADER_VALUE_ALMOST_DONE:
176                     if (c != lf)
177                     {
178                         return ParserError::ERROR_HEADER_VALUE;
179                     }
180                     state = State::HEADER_FIELD_START;
181                     break;
182                 case State::HEADERS_ALMOST_DONE:
183                     if (c != lf)
184                     {
185                         return ParserError::ERROR_HEADER_ENDING;
186                     }
187                     state = State::PART_DATA_START;
188                     break;
189                 case State::PART_DATA_START:
190                     state = State::PART_DATA;
191                     partDataMark = i;
192                     [[fallthrough]];
193                 case State::PART_DATA:
194                     if (index == 0)
195                     {
196                         skipNonBoundary(buffer, len, boundary.size() - 1, i);
197 
198                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
199                         c = buffer[i];
200                     }
201                     processPartData(prevIndex, index, buffer, i, c, state);
202                     break;
203                 case State::END:
204                     break;
205             }
206         }
207         return ParserError::PARSER_SUCCESS;
208     }
209     std::vector<FormPart> mime_fields;
210     std::string boundary;
211 
212   private:
213     void indexBoundary()
214     {
215         std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
216         for (const char current : boundary)
217         {
218             boundaryIndex[static_cast<unsigned char>(current)] = true;
219         }
220     }
221 
222     static char lower(char c)
223     {
224         return static_cast<char>(c | 0x20);
225     }
226 
227     inline bool isBoundaryChar(char c) const
228     {
229         return boundaryIndex[static_cast<unsigned char>(c)];
230     }
231 
232     void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
233                          size_t& i)
234     {
235         // boyer-moore derived algorithm to safely skip non-boundary data
236         while (i + boundary.size() <= len)
237         {
238             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
239             if (isBoundaryChar(buffer[i + boundaryEnd]))
240             {
241                 break;
242             }
243             i += boundary.size();
244         }
245     }
246 
247     void processPartData(size_t& prevIndex, size_t& index, const char* buffer,
248                          size_t& i, char c, State& state)
249     {
250         prevIndex = index;
251 
252         if (index < boundary.size())
253         {
254             if (boundary[index] == c)
255             {
256                 if (index == 0)
257                 {
258                     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
259                     const char* start = buffer + partDataMark;
260                     size_t size = i - partDataMark;
261                     mime_fields.rbegin()->content +=
262                         std::string_view(start, size);
263                 }
264                 index++;
265             }
266             else
267             {
268                 index = 0;
269             }
270         }
271         else if (index == boundary.size())
272         {
273             index++;
274             if (c == cr)
275             {
276                 // cr = part boundary
277                 flags = Boundary::PART_BOUNDARY;
278             }
279             else if (c == hyphen)
280             {
281                 // hyphen = end boundary
282                 flags = Boundary::END_BOUNDARY;
283             }
284             else
285             {
286                 index = 0;
287             }
288         }
289         else
290         {
291             if (flags == Boundary::PART_BOUNDARY)
292             {
293                 index = 0;
294                 if (c == lf)
295                 {
296                     // unset the PART_BOUNDARY flag
297                     flags = Boundary::NON_BOUNDARY;
298                     mime_fields.push_back({});
299                     state = State::HEADER_FIELD_START;
300                     return;
301                 }
302             }
303             if (flags == Boundary::END_BOUNDARY)
304             {
305                 if (c == hyphen)
306                 {
307                     state = State::END;
308                 }
309             }
310         }
311 
312         if (index > 0)
313         {
314             lookbehind[index - 1] = c;
315         }
316         else if (prevIndex > 0)
317         {
318             // if our boundary turned out to be rubbish, the captured
319             // lookbehind belongs to partData
320 
321             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
322             prevIndex = 0;
323             partDataMark = i;
324 
325             // reconsider the current character even so it interrupted
326             // the sequence it could be the beginning of a new sequence
327             i--;
328         }
329     }
330 
331     std::string currentHeaderName;
332     std::string currentHeaderValue;
333 
334     static constexpr char cr = '\r';
335     static constexpr char lf = '\n';
336     static constexpr char space = ' ';
337     static constexpr char hyphen = '-';
338     static constexpr char colon = ':';
339 
340     std::array<bool, 256> boundaryIndex{};
341     std::string lookbehind;
342     State state{State::START};
343     Boundary flags{Boundary::NON_BOUNDARY};
344     size_t index = 0;
345     size_t partDataMark = 0;
346     size_t headerFieldMark = 0;
347     size_t headerValueMark = 0;
348 };
349