1 #pragma once
2 
3 #include <boost/algorithm/string/predicate.hpp>
4 #include <boost/beast/http/fields.hpp>
5 #include <http_request.hpp>
6 
7 #include <string>
8 #include <string_view>
9 
10 enum class ParserError
11 {
12     PARSER_SUCCESS,
13     ERROR_BOUNDARY_FORMAT,
14     ERROR_BOUNDARY_CR,
15     ERROR_BOUNDARY_LF,
16     ERROR_BOUNDARY_DATA,
17     ERROR_EMPTY_HEADER,
18     ERROR_HEADER_NAME,
19     ERROR_HEADER_VALUE,
20     ERROR_HEADER_ENDING
21 };
22 
23 enum class State
24 {
25     START,
26     START_BOUNDARY,
27     HEADER_FIELD_START,
28     HEADER_FIELD,
29     HEADER_VALUE_START,
30     HEADER_VALUE,
31     HEADER_VALUE_ALMOST_DONE,
32     HEADERS_ALMOST_DONE,
33     PART_DATA_START,
34     PART_DATA,
35     END
36 };
37 
38 enum class Boundary
39 {
40     NON_BOUNDARY,
41     PART_BOUNDARY,
42     END_BOUNDARY,
43 };
44 
45 struct FormPart
46 {
47     boost::beast::http::fields fields;
48     std::string content;
49 };
50 
51 class MultipartParser
52 {
53   public:
54     MultipartParser() = default;
55 
56     [[nodiscard]] ParserError parse(const crow::Request& req)
57     {
58         std::string_view contentType = req.getHeaderValue("content-type");
59 
60         const std::string boundaryFormat = "multipart/form-data; boundary=";
61         if (!boost::starts_with(req.getHeaderValue("content-type"),
62                                 boundaryFormat))
63         {
64             return ParserError::ERROR_BOUNDARY_FORMAT;
65         }
66 
67         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
68 
69         boundary = "\r\n--";
70         boundary += ctBoundary;
71         indexBoundary();
72         lookbehind.resize(boundary.size() + 8);
73         state = State::START;
74 
75         const char* buffer = req.body.data();
76         size_t len = req.body.size();
77         size_t prevIndex = index;
78         char cl = 0;
79 
80         for (size_t i = 0; i < len; i++)
81         {
82             char c = buffer[i];
83             switch (state)
84             {
85                 case State::START:
86                     index = 0;
87                     state = State::START_BOUNDARY;
88                     [[fallthrough]];
89                 case State::START_BOUNDARY:
90                     if (index == boundary.size() - 2)
91                     {
92                         if (c != cr)
93                         {
94                             return ParserError::ERROR_BOUNDARY_CR;
95                         }
96                         index++;
97                         break;
98                     }
99                     else if (index - 1 == boundary.size() - 2)
100                     {
101                         if (c != lf)
102                         {
103                             return ParserError::ERROR_BOUNDARY_LF;
104                         }
105                         index = 0;
106                         mime_fields.push_back({});
107                         state = State::HEADER_FIELD_START;
108                         break;
109                     }
110                     if (c != boundary[index + 2])
111                     {
112                         return ParserError::ERROR_BOUNDARY_DATA;
113                     }
114                     index++;
115                     break;
116                 case State::HEADER_FIELD_START:
117                     currentHeaderName.resize(0);
118                     state = State::HEADER_FIELD;
119                     headerFieldMark = i;
120                     index = 0;
121                     [[fallthrough]];
122                 case State::HEADER_FIELD:
123                     if (c == cr)
124                     {
125                         headerFieldMark = 0;
126                         state = State::HEADERS_ALMOST_DONE;
127                         break;
128                     }
129 
130                     index++;
131                     if (c == hyphen)
132                     {
133                         break;
134                     }
135 
136                     if (c == colon)
137                     {
138                         if (index == 1)
139                         {
140                             return ParserError::ERROR_EMPTY_HEADER;
141                         }
142                         currentHeaderName.append(buffer + headerFieldMark,
143                                                  i - headerFieldMark);
144                         state = State::HEADER_VALUE_START;
145                         break;
146                     }
147                     cl = lower(c);
148                     if (cl < 'a' || cl > 'z')
149                     {
150                         return ParserError::ERROR_HEADER_NAME;
151                     }
152                     break;
153                 case State::HEADER_VALUE_START:
154                     if (c == space)
155                     {
156                         break;
157                     }
158                     headerValueMark = i;
159                     state = State::HEADER_VALUE;
160                     [[fallthrough]];
161                 case State::HEADER_VALUE:
162                     if (c == cr)
163                     {
164                         std::string_view value(buffer + headerValueMark,
165                                                i - headerValueMark);
166                         mime_fields.rbegin()->fields.set(currentHeaderName,
167                                                          value);
168                         state = State::HEADER_VALUE_ALMOST_DONE;
169                     }
170                     break;
171                 case State::HEADER_VALUE_ALMOST_DONE:
172                     if (c != lf)
173                     {
174                         return ParserError::ERROR_HEADER_VALUE;
175                     }
176                     state = State::HEADER_FIELD_START;
177                     break;
178                 case State::HEADERS_ALMOST_DONE:
179                     if (c != lf)
180                     {
181                         return ParserError::ERROR_HEADER_ENDING;
182                     }
183                     state = State::PART_DATA_START;
184                     break;
185                 case State::PART_DATA_START:
186                     state = State::PART_DATA;
187                     partDataMark = i;
188                     [[fallthrough]];
189                 case State::PART_DATA:
190                     if (index == 0)
191                     {
192                         skipNonBoundary(buffer, len, boundary.size() - 1, i);
193                         c = buffer[i];
194                     }
195                     processPartData(prevIndex, index, buffer, i, c, state);
196                     break;
197                 case State::END:
198                     break;
199             }
200         }
201         return ParserError::PARSER_SUCCESS;
202     }
203     std::vector<FormPart> mime_fields;
204     std::string boundary;
205 
206   private:
207     void indexBoundary()
208     {
209         std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
210         for (const char current : boundary)
211         {
212             boundaryIndex[static_cast<unsigned char>(current)] = true;
213         }
214     }
215 
216     char lower(char c) const
217     {
218         return static_cast<char>(c | 0x20);
219     }
220 
221     inline bool isBoundaryChar(char c) const
222     {
223         return boundaryIndex[static_cast<unsigned char>(c)];
224     }
225 
226     void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
227                          size_t& i)
228     {
229         // boyer-moore derived algorithm to safely skip non-boundary data
230         while (i + boundary.size() <= len)
231         {
232             if (isBoundaryChar(buffer[i + boundaryEnd]))
233             {
234                 break;
235             }
236             i += boundary.size();
237         }
238     }
239 
240     void processPartData(size_t& prevIndex, size_t& index, const char* buffer,
241                          size_t& i, char c, State& state)
242     {
243         prevIndex = index;
244 
245         if (index < boundary.size())
246         {
247             if (boundary[index] == c)
248             {
249                 if (index == 0)
250                 {
251                     mime_fields.rbegin()->content += std::string_view(
252                         buffer + partDataMark, i - partDataMark);
253                 }
254                 index++;
255             }
256             else
257             {
258                 index = 0;
259             }
260         }
261         else if (index == boundary.size())
262         {
263             index++;
264             if (c == cr)
265             {
266                 // cr = part boundary
267                 flags = Boundary::PART_BOUNDARY;
268             }
269             else if (c == hyphen)
270             {
271                 // hyphen = end boundary
272                 flags = Boundary::END_BOUNDARY;
273             }
274             else
275             {
276                 index = 0;
277             }
278         }
279         else
280         {
281             if (flags == Boundary::PART_BOUNDARY)
282             {
283                 index = 0;
284                 if (c == lf)
285                 {
286                     // unset the PART_BOUNDARY flag
287                     flags = Boundary::NON_BOUNDARY;
288                     mime_fields.push_back({});
289                     state = State::HEADER_FIELD_START;
290                     return;
291                 }
292             }
293             if (flags == Boundary::END_BOUNDARY)
294             {
295                 if (c == hyphen)
296                 {
297                     state = State::END;
298                 }
299             }
300         }
301 
302         if (index > 0)
303         {
304             lookbehind[index - 1] = c;
305         }
306         else if (prevIndex > 0)
307         {
308             // if our boundary turned out to be rubbish, the captured
309             // lookbehind belongs to partData
310 
311             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
312             prevIndex = 0;
313             partDataMark = i;
314 
315             // reconsider the current character even so it interrupted
316             // the sequence it could be the beginning of a new sequence
317             i--;
318         }
319     }
320 
321     std::string currentHeaderName;
322     std::string currentHeaderValue;
323 
324     static constexpr char cr = '\r';
325     static constexpr char lf = '\n';
326     static constexpr char space = ' ';
327     static constexpr char hyphen = '-';
328     static constexpr char colon = ':';
329 
330     std::array<bool, 256> boundaryIndex;
331     std::string lookbehind;
332     State state;
333     Boundary flags;
334     size_t index = 0;
335     size_t partDataMark = 0;
336     size_t headerFieldMark = 0;
337     size_t headerValueMark = 0;
338 };
339