1af4edf68SEd Tanous #pragma once
2af4edf68SEd Tanous 
3af4edf68SEd Tanous #include <boost/algorithm/string/predicate.hpp>
4af4edf68SEd Tanous #include <boost/beast/http/fields.hpp>
5af4edf68SEd Tanous #include <http_request.hpp>
6af4edf68SEd Tanous 
7af4edf68SEd Tanous #include <string>
8af4edf68SEd Tanous #include <string_view>
9af4edf68SEd Tanous 
10af4edf68SEd Tanous enum class ParserError
11af4edf68SEd Tanous {
12af4edf68SEd Tanous     PARSER_SUCCESS,
13af4edf68SEd Tanous     ERROR_BOUNDARY_FORMAT,
14af4edf68SEd Tanous     ERROR_BOUNDARY_CR,
15af4edf68SEd Tanous     ERROR_BOUNDARY_LF,
16af4edf68SEd Tanous     ERROR_BOUNDARY_DATA,
17af4edf68SEd Tanous     ERROR_EMPTY_HEADER,
18af4edf68SEd Tanous     ERROR_HEADER_NAME,
19af4edf68SEd Tanous     ERROR_HEADER_VALUE,
20af4edf68SEd Tanous     ERROR_HEADER_ENDING
21af4edf68SEd Tanous };
22af4edf68SEd Tanous 
23af4edf68SEd Tanous enum class State
24af4edf68SEd Tanous {
25af4edf68SEd Tanous     START,
26af4edf68SEd Tanous     START_BOUNDARY,
27af4edf68SEd Tanous     HEADER_FIELD_START,
28af4edf68SEd Tanous     HEADER_FIELD,
29af4edf68SEd Tanous     HEADER_VALUE_START,
30af4edf68SEd Tanous     HEADER_VALUE,
31af4edf68SEd Tanous     HEADER_VALUE_ALMOST_DONE,
32af4edf68SEd Tanous     HEADERS_ALMOST_DONE,
33af4edf68SEd Tanous     PART_DATA_START,
34af4edf68SEd Tanous     PART_DATA,
35af4edf68SEd Tanous     END
36af4edf68SEd Tanous };
37af4edf68SEd Tanous 
38af4edf68SEd Tanous enum class Boundary
39af4edf68SEd Tanous {
40af4edf68SEd Tanous     NON_BOUNDARY,
41af4edf68SEd Tanous     PART_BOUNDARY,
42af4edf68SEd Tanous     END_BOUNDARY,
43af4edf68SEd Tanous };
44af4edf68SEd Tanous 
45af4edf68SEd Tanous struct FormPart
46af4edf68SEd Tanous {
47af4edf68SEd Tanous     boost::beast::http::fields fields;
48af4edf68SEd Tanous     std::string content;
49af4edf68SEd Tanous };
50af4edf68SEd Tanous 
51af4edf68SEd Tanous class MultipartParser
52af4edf68SEd Tanous {
53af4edf68SEd Tanous   public:
54af4edf68SEd Tanous     MultipartParser() = default;
55af4edf68SEd Tanous 
56af4edf68SEd Tanous     [[nodiscard]] ParserError parse(const crow::Request& req)
57af4edf68SEd Tanous     {
58af4edf68SEd Tanous         std::string_view contentType = req.getHeaderValue("content-type");
59af4edf68SEd Tanous 
60af4edf68SEd Tanous         const std::string boundaryFormat = "multipart/form-data; boundary=";
61af4edf68SEd Tanous         if (!boost::starts_with(req.getHeaderValue("content-type"),
62af4edf68SEd Tanous                                 boundaryFormat))
63af4edf68SEd Tanous         {
64af4edf68SEd Tanous             return ParserError::ERROR_BOUNDARY_FORMAT;
65af4edf68SEd Tanous         }
66af4edf68SEd Tanous 
67af4edf68SEd Tanous         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
68af4edf68SEd Tanous 
69af4edf68SEd Tanous         boundary = "\r\n--";
70af4edf68SEd Tanous         boundary += ctBoundary;
71af4edf68SEd Tanous         indexBoundary();
72af4edf68SEd Tanous         lookbehind.resize(boundary.size() + 8);
73af4edf68SEd Tanous         state = State::START;
74af4edf68SEd Tanous 
75af4edf68SEd Tanous         const char* buffer = req.body.data();
76af4edf68SEd Tanous         size_t len = req.body.size();
77af4edf68SEd Tanous         size_t prevIndex = index;
78af4edf68SEd Tanous         char cl = 0;
79af4edf68SEd Tanous 
80af4edf68SEd Tanous         for (size_t i = 0; i < len; i++)
81af4edf68SEd Tanous         {
82*ca45aa3cSEd Tanous             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
83af4edf68SEd Tanous             char c = buffer[i];
84af4edf68SEd Tanous             switch (state)
85af4edf68SEd Tanous             {
86af4edf68SEd Tanous                 case State::START:
87af4edf68SEd Tanous                     index = 0;
88af4edf68SEd Tanous                     state = State::START_BOUNDARY;
89af4edf68SEd Tanous                     [[fallthrough]];
90af4edf68SEd Tanous                 case State::START_BOUNDARY:
91af4edf68SEd Tanous                     if (index == boundary.size() - 2)
92af4edf68SEd Tanous                     {
93af4edf68SEd Tanous                         if (c != cr)
94af4edf68SEd Tanous                         {
95af4edf68SEd Tanous                             return ParserError::ERROR_BOUNDARY_CR;
96af4edf68SEd Tanous                         }
97af4edf68SEd Tanous                         index++;
98af4edf68SEd Tanous                         break;
99af4edf68SEd Tanous                     }
100af4edf68SEd Tanous                     else if (index - 1 == boundary.size() - 2)
101af4edf68SEd Tanous                     {
102af4edf68SEd Tanous                         if (c != lf)
103af4edf68SEd Tanous                         {
104af4edf68SEd Tanous                             return ParserError::ERROR_BOUNDARY_LF;
105af4edf68SEd Tanous                         }
106af4edf68SEd Tanous                         index = 0;
107af4edf68SEd Tanous                         mime_fields.push_back({});
108af4edf68SEd Tanous                         state = State::HEADER_FIELD_START;
109af4edf68SEd Tanous                         break;
110af4edf68SEd Tanous                     }
111af4edf68SEd Tanous                     if (c != boundary[index + 2])
112af4edf68SEd Tanous                     {
113af4edf68SEd Tanous                         return ParserError::ERROR_BOUNDARY_DATA;
114af4edf68SEd Tanous                     }
115af4edf68SEd Tanous                     index++;
116af4edf68SEd Tanous                     break;
117af4edf68SEd Tanous                 case State::HEADER_FIELD_START:
118af4edf68SEd Tanous                     currentHeaderName.resize(0);
119af4edf68SEd Tanous                     state = State::HEADER_FIELD;
120af4edf68SEd Tanous                     headerFieldMark = i;
121af4edf68SEd Tanous                     index = 0;
122af4edf68SEd Tanous                     [[fallthrough]];
123af4edf68SEd Tanous                 case State::HEADER_FIELD:
124af4edf68SEd Tanous                     if (c == cr)
125af4edf68SEd Tanous                     {
126af4edf68SEd Tanous                         headerFieldMark = 0;
127af4edf68SEd Tanous                         state = State::HEADERS_ALMOST_DONE;
128af4edf68SEd Tanous                         break;
129af4edf68SEd Tanous                     }
130af4edf68SEd Tanous 
131af4edf68SEd Tanous                     index++;
132af4edf68SEd Tanous                     if (c == hyphen)
133af4edf68SEd Tanous                     {
134af4edf68SEd Tanous                         break;
135af4edf68SEd Tanous                     }
136af4edf68SEd Tanous 
137af4edf68SEd Tanous                     if (c == colon)
138af4edf68SEd Tanous                     {
139af4edf68SEd Tanous                         if (index == 1)
140af4edf68SEd Tanous                         {
141af4edf68SEd Tanous                             return ParserError::ERROR_EMPTY_HEADER;
142af4edf68SEd Tanous                         }
143*ca45aa3cSEd Tanous 
144*ca45aa3cSEd Tanous                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
145af4edf68SEd Tanous                         currentHeaderName.append(buffer + headerFieldMark,
146af4edf68SEd Tanous                                                  i - headerFieldMark);
147af4edf68SEd Tanous                         state = State::HEADER_VALUE_START;
148af4edf68SEd Tanous                         break;
149af4edf68SEd Tanous                     }
150af4edf68SEd Tanous                     cl = lower(c);
151af4edf68SEd Tanous                     if (cl < 'a' || cl > 'z')
152af4edf68SEd Tanous                     {
153af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_NAME;
154af4edf68SEd Tanous                     }
155af4edf68SEd Tanous                     break;
156af4edf68SEd Tanous                 case State::HEADER_VALUE_START:
157af4edf68SEd Tanous                     if (c == space)
158af4edf68SEd Tanous                     {
159af4edf68SEd Tanous                         break;
160af4edf68SEd Tanous                     }
161af4edf68SEd Tanous                     headerValueMark = i;
162af4edf68SEd Tanous                     state = State::HEADER_VALUE;
163af4edf68SEd Tanous                     [[fallthrough]];
164af4edf68SEd Tanous                 case State::HEADER_VALUE:
165af4edf68SEd Tanous                     if (c == cr)
166af4edf68SEd Tanous                     {
167*ca45aa3cSEd Tanous                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
168af4edf68SEd Tanous                         std::string_view value(buffer + headerValueMark,
169af4edf68SEd Tanous                                                i - headerValueMark);
170af4edf68SEd Tanous                         mime_fields.rbegin()->fields.set(currentHeaderName,
171af4edf68SEd Tanous                                                          value);
172af4edf68SEd Tanous                         state = State::HEADER_VALUE_ALMOST_DONE;
173af4edf68SEd Tanous                     }
174af4edf68SEd Tanous                     break;
175af4edf68SEd Tanous                 case State::HEADER_VALUE_ALMOST_DONE:
176af4edf68SEd Tanous                     if (c != lf)
177af4edf68SEd Tanous                     {
178af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_VALUE;
179af4edf68SEd Tanous                     }
180af4edf68SEd Tanous                     state = State::HEADER_FIELD_START;
181af4edf68SEd Tanous                     break;
182af4edf68SEd Tanous                 case State::HEADERS_ALMOST_DONE:
183af4edf68SEd Tanous                     if (c != lf)
184af4edf68SEd Tanous                     {
185af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_ENDING;
186af4edf68SEd Tanous                     }
187af4edf68SEd Tanous                     state = State::PART_DATA_START;
188af4edf68SEd Tanous                     break;
189af4edf68SEd Tanous                 case State::PART_DATA_START:
190af4edf68SEd Tanous                     state = State::PART_DATA;
191af4edf68SEd Tanous                     partDataMark = i;
192af4edf68SEd Tanous                     [[fallthrough]];
193af4edf68SEd Tanous                 case State::PART_DATA:
194af4edf68SEd Tanous                     if (index == 0)
195af4edf68SEd Tanous                     {
196af4edf68SEd Tanous                         skipNonBoundary(buffer, len, boundary.size() - 1, i);
197*ca45aa3cSEd Tanous 
198*ca45aa3cSEd Tanous                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
199af4edf68SEd Tanous                         c = buffer[i];
200af4edf68SEd Tanous                     }
201af4edf68SEd Tanous                     processPartData(prevIndex, index, buffer, i, c, state);
202af4edf68SEd Tanous                     break;
203af4edf68SEd Tanous                 case State::END:
204af4edf68SEd Tanous                     break;
205af4edf68SEd Tanous             }
206af4edf68SEd Tanous         }
207af4edf68SEd Tanous         return ParserError::PARSER_SUCCESS;
208af4edf68SEd Tanous     }
209af4edf68SEd Tanous     std::vector<FormPart> mime_fields;
210af4edf68SEd Tanous     std::string boundary;
211af4edf68SEd Tanous 
212af4edf68SEd Tanous   private:
213af4edf68SEd Tanous     void indexBoundary()
214af4edf68SEd Tanous     {
215af4edf68SEd Tanous         std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
216af4edf68SEd Tanous         for (const char current : boundary)
217af4edf68SEd Tanous         {
218af4edf68SEd Tanous             boundaryIndex[static_cast<unsigned char>(current)] = true;
219af4edf68SEd Tanous         }
220af4edf68SEd Tanous     }
221af4edf68SEd Tanous 
222af4edf68SEd Tanous     char lower(char c) const
223af4edf68SEd Tanous     {
224af4edf68SEd Tanous         return static_cast<char>(c | 0x20);
225af4edf68SEd Tanous     }
226af4edf68SEd Tanous 
227af4edf68SEd Tanous     inline bool isBoundaryChar(char c) const
228af4edf68SEd Tanous     {
229af4edf68SEd Tanous         return boundaryIndex[static_cast<unsigned char>(c)];
230af4edf68SEd Tanous     }
231af4edf68SEd Tanous 
232af4edf68SEd Tanous     void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
233af4edf68SEd Tanous                          size_t& i)
234af4edf68SEd Tanous     {
235af4edf68SEd Tanous         // boyer-moore derived algorithm to safely skip non-boundary data
236af4edf68SEd Tanous         while (i + boundary.size() <= len)
237af4edf68SEd Tanous         {
238*ca45aa3cSEd Tanous             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
239af4edf68SEd Tanous             if (isBoundaryChar(buffer[i + boundaryEnd]))
240af4edf68SEd Tanous             {
241af4edf68SEd Tanous                 break;
242af4edf68SEd Tanous             }
243af4edf68SEd Tanous             i += boundary.size();
244af4edf68SEd Tanous         }
245af4edf68SEd Tanous     }
246af4edf68SEd Tanous 
247af4edf68SEd Tanous     void processPartData(size_t& prevIndex, size_t& index, const char* buffer,
248af4edf68SEd Tanous                          size_t& i, char c, State& state)
249af4edf68SEd Tanous     {
250af4edf68SEd Tanous         prevIndex = index;
251af4edf68SEd Tanous 
252af4edf68SEd Tanous         if (index < boundary.size())
253af4edf68SEd Tanous         {
254af4edf68SEd Tanous             if (boundary[index] == c)
255af4edf68SEd Tanous             {
256af4edf68SEd Tanous                 if (index == 0)
257af4edf68SEd Tanous                 {
258*ca45aa3cSEd Tanous                     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
259*ca45aa3cSEd Tanous                     const char* start = buffer + partDataMark;
260*ca45aa3cSEd Tanous                     size_t size = i - partDataMark;
261*ca45aa3cSEd Tanous                     mime_fields.rbegin()->content +=
262*ca45aa3cSEd Tanous                         std::string_view(start, size);
263af4edf68SEd Tanous                 }
264af4edf68SEd Tanous                 index++;
265af4edf68SEd Tanous             }
266af4edf68SEd Tanous             else
267af4edf68SEd Tanous             {
268af4edf68SEd Tanous                 index = 0;
269af4edf68SEd Tanous             }
270af4edf68SEd Tanous         }
271af4edf68SEd Tanous         else if (index == boundary.size())
272af4edf68SEd Tanous         {
273af4edf68SEd Tanous             index++;
274af4edf68SEd Tanous             if (c == cr)
275af4edf68SEd Tanous             {
276af4edf68SEd Tanous                 // cr = part boundary
277af4edf68SEd Tanous                 flags = Boundary::PART_BOUNDARY;
278af4edf68SEd Tanous             }
279af4edf68SEd Tanous             else if (c == hyphen)
280af4edf68SEd Tanous             {
281af4edf68SEd Tanous                 // hyphen = end boundary
282af4edf68SEd Tanous                 flags = Boundary::END_BOUNDARY;
283af4edf68SEd Tanous             }
284af4edf68SEd Tanous             else
285af4edf68SEd Tanous             {
286af4edf68SEd Tanous                 index = 0;
287af4edf68SEd Tanous             }
288af4edf68SEd Tanous         }
289af4edf68SEd Tanous         else
290af4edf68SEd Tanous         {
291af4edf68SEd Tanous             if (flags == Boundary::PART_BOUNDARY)
292af4edf68SEd Tanous             {
293af4edf68SEd Tanous                 index = 0;
294af4edf68SEd Tanous                 if (c == lf)
295af4edf68SEd Tanous                 {
296af4edf68SEd Tanous                     // unset the PART_BOUNDARY flag
297af4edf68SEd Tanous                     flags = Boundary::NON_BOUNDARY;
298af4edf68SEd Tanous                     mime_fields.push_back({});
299af4edf68SEd Tanous                     state = State::HEADER_FIELD_START;
300af4edf68SEd Tanous                     return;
301af4edf68SEd Tanous                 }
302af4edf68SEd Tanous             }
303af4edf68SEd Tanous             if (flags == Boundary::END_BOUNDARY)
304af4edf68SEd Tanous             {
305af4edf68SEd Tanous                 if (c == hyphen)
306af4edf68SEd Tanous                 {
307af4edf68SEd Tanous                     state = State::END;
308af4edf68SEd Tanous                 }
309af4edf68SEd Tanous             }
310af4edf68SEd Tanous         }
311af4edf68SEd Tanous 
312af4edf68SEd Tanous         if (index > 0)
313af4edf68SEd Tanous         {
314af4edf68SEd Tanous             lookbehind[index - 1] = c;
315af4edf68SEd Tanous         }
316af4edf68SEd Tanous         else if (prevIndex > 0)
317af4edf68SEd Tanous         {
318af4edf68SEd Tanous             // if our boundary turned out to be rubbish, the captured
319af4edf68SEd Tanous             // lookbehind belongs to partData
320af4edf68SEd Tanous 
321af4edf68SEd Tanous             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
322af4edf68SEd Tanous             prevIndex = 0;
323af4edf68SEd Tanous             partDataMark = i;
324af4edf68SEd Tanous 
325af4edf68SEd Tanous             // reconsider the current character even so it interrupted
326af4edf68SEd Tanous             // the sequence it could be the beginning of a new sequence
327af4edf68SEd Tanous             i--;
328af4edf68SEd Tanous         }
329af4edf68SEd Tanous     }
330af4edf68SEd Tanous 
331af4edf68SEd Tanous     std::string currentHeaderName;
332af4edf68SEd Tanous     std::string currentHeaderValue;
333af4edf68SEd Tanous 
334af4edf68SEd Tanous     static constexpr char cr = '\r';
335af4edf68SEd Tanous     static constexpr char lf = '\n';
336af4edf68SEd Tanous     static constexpr char space = ' ';
337af4edf68SEd Tanous     static constexpr char hyphen = '-';
338af4edf68SEd Tanous     static constexpr char colon = ':';
339af4edf68SEd Tanous 
340af4edf68SEd Tanous     std::array<bool, 256> boundaryIndex;
341af4edf68SEd Tanous     std::string lookbehind;
342af4edf68SEd Tanous     State state;
343af4edf68SEd Tanous     Boundary flags;
344af4edf68SEd Tanous     size_t index = 0;
345af4edf68SEd Tanous     size_t partDataMark = 0;
346af4edf68SEd Tanous     size_t headerFieldMark = 0;
347af4edf68SEd Tanous     size_t headerValueMark = 0;
348af4edf68SEd Tanous };
349