1af4edf68SEd Tanous #pragma once
2af4edf68SEd Tanous 
33ccb3adbSEd Tanous #include "http_request.hpp"
43ccb3adbSEd Tanous 
5af4edf68SEd Tanous #include <boost/beast/http/fields.hpp>
6af4edf68SEd Tanous 
73544d2a7SEd Tanous #include <ranges>
8af4edf68SEd Tanous #include <string>
9af4edf68SEd Tanous #include <string_view>
10af4edf68SEd Tanous 
11af4edf68SEd Tanous enum class ParserError
12af4edf68SEd Tanous {
13af4edf68SEd Tanous     PARSER_SUCCESS,
14af4edf68SEd Tanous     ERROR_BOUNDARY_FORMAT,
15af4edf68SEd Tanous     ERROR_BOUNDARY_CR,
16af4edf68SEd Tanous     ERROR_BOUNDARY_LF,
17af4edf68SEd Tanous     ERROR_BOUNDARY_DATA,
18af4edf68SEd Tanous     ERROR_EMPTY_HEADER,
19af4edf68SEd Tanous     ERROR_HEADER_NAME,
20af4edf68SEd Tanous     ERROR_HEADER_VALUE,
2118e3f7fbSKrzysztof Grobelny     ERROR_HEADER_ENDING,
2218e3f7fbSKrzysztof Grobelny     ERROR_UNEXPECTED_END_OF_HEADER,
2318e3f7fbSKrzysztof Grobelny     ERROR_UNEXPECTED_END_OF_INPUT,
2418e3f7fbSKrzysztof Grobelny     ERROR_OUT_OF_RANGE
25af4edf68SEd Tanous };
26af4edf68SEd Tanous 
27af4edf68SEd Tanous enum class State
28af4edf68SEd Tanous {
29af4edf68SEd Tanous     START,
30af4edf68SEd Tanous     START_BOUNDARY,
31af4edf68SEd Tanous     HEADER_FIELD_START,
32af4edf68SEd Tanous     HEADER_FIELD,
33af4edf68SEd Tanous     HEADER_VALUE_START,
34af4edf68SEd Tanous     HEADER_VALUE,
35af4edf68SEd Tanous     HEADER_VALUE_ALMOST_DONE,
36af4edf68SEd Tanous     HEADERS_ALMOST_DONE,
37af4edf68SEd Tanous     PART_DATA_START,
38af4edf68SEd Tanous     PART_DATA,
39af4edf68SEd Tanous     END
40af4edf68SEd Tanous };
41af4edf68SEd Tanous 
42af4edf68SEd Tanous enum class Boundary
43af4edf68SEd Tanous {
44af4edf68SEd Tanous     NON_BOUNDARY,
45af4edf68SEd Tanous     PART_BOUNDARY,
46af4edf68SEd Tanous     END_BOUNDARY,
47af4edf68SEd Tanous };
48af4edf68SEd Tanous 
49af4edf68SEd Tanous struct FormPart
50af4edf68SEd Tanous {
51af4edf68SEd Tanous     boost::beast::http::fields fields;
52af4edf68SEd Tanous     std::string content;
53af4edf68SEd Tanous };
54af4edf68SEd Tanous 
55af4edf68SEd Tanous class MultipartParser
56af4edf68SEd Tanous {
57af4edf68SEd Tanous   public:
58af4edf68SEd Tanous     MultipartParser() = default;
59af4edf68SEd Tanous 
parse(const crow::Request & req)60af4edf68SEd Tanous     [[nodiscard]] ParserError parse(const crow::Request& req)
61af4edf68SEd Tanous     {
62af4edf68SEd Tanous         std::string_view contentType = req.getHeaderValue("content-type");
63af4edf68SEd Tanous 
64af4edf68SEd Tanous         const std::string boundaryFormat = "multipart/form-data; boundary=";
6511ba3979SEd Tanous         if (!contentType.starts_with(boundaryFormat))
66af4edf68SEd Tanous         {
67af4edf68SEd Tanous             return ParserError::ERROR_BOUNDARY_FORMAT;
68af4edf68SEd Tanous         }
69af4edf68SEd Tanous 
70af4edf68SEd Tanous         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
71af4edf68SEd Tanous 
72af4edf68SEd Tanous         boundary = "\r\n--";
73af4edf68SEd Tanous         boundary += ctBoundary;
74af4edf68SEd Tanous         indexBoundary();
75af4edf68SEd Tanous         lookbehind.resize(boundary.size() + 8);
76af4edf68SEd Tanous         state = State::START;
77af4edf68SEd Tanous 
780e31e952SPatrick Williams         const std::string& buffer = req.body();
790e31e952SPatrick Williams         size_t len = buffer.size();
80af4edf68SEd Tanous         char cl = 0;
81af4edf68SEd Tanous 
82af4edf68SEd Tanous         for (size_t i = 0; i < len; i++)
83af4edf68SEd Tanous         {
84af4edf68SEd Tanous             char c = buffer[i];
85af4edf68SEd Tanous             switch (state)
86af4edf68SEd Tanous             {
87af4edf68SEd Tanous                 case State::START:
88af4edf68SEd Tanous                     index = 0;
89af4edf68SEd Tanous                     state = State::START_BOUNDARY;
90af4edf68SEd Tanous                     [[fallthrough]];
91af4edf68SEd Tanous                 case State::START_BOUNDARY:
92af4edf68SEd Tanous                     if (index == boundary.size() - 2)
93af4edf68SEd Tanous                     {
94af4edf68SEd Tanous                         if (c != cr)
95af4edf68SEd Tanous                         {
96af4edf68SEd Tanous                             return ParserError::ERROR_BOUNDARY_CR;
97af4edf68SEd Tanous                         }
98af4edf68SEd Tanous                         index++;
99af4edf68SEd Tanous                         break;
100af4edf68SEd Tanous                     }
101af4edf68SEd Tanous                     else if (index - 1 == boundary.size() - 2)
102af4edf68SEd Tanous                     {
103af4edf68SEd Tanous                         if (c != lf)
104af4edf68SEd Tanous                         {
105af4edf68SEd Tanous                             return ParserError::ERROR_BOUNDARY_LF;
106af4edf68SEd Tanous                         }
107af4edf68SEd Tanous                         index = 0;
10826eee3a1SPatrick Williams                         mime_fields.emplace_back();
109af4edf68SEd Tanous                         state = State::HEADER_FIELD_START;
110af4edf68SEd Tanous                         break;
111af4edf68SEd Tanous                     }
112af4edf68SEd Tanous                     if (c != boundary[index + 2])
113af4edf68SEd Tanous                     {
114af4edf68SEd Tanous                         return ParserError::ERROR_BOUNDARY_DATA;
115af4edf68SEd Tanous                     }
116af4edf68SEd Tanous                     index++;
117af4edf68SEd Tanous                     break;
118af4edf68SEd Tanous                 case State::HEADER_FIELD_START:
119af4edf68SEd Tanous                     currentHeaderName.resize(0);
120af4edf68SEd Tanous                     state = State::HEADER_FIELD;
121af4edf68SEd Tanous                     headerFieldMark = i;
122af4edf68SEd Tanous                     index = 0;
123af4edf68SEd Tanous                     [[fallthrough]];
124af4edf68SEd Tanous                 case State::HEADER_FIELD:
125af4edf68SEd Tanous                     if (c == cr)
126af4edf68SEd Tanous                     {
127af4edf68SEd Tanous                         headerFieldMark = 0;
128af4edf68SEd Tanous                         state = State::HEADERS_ALMOST_DONE;
129af4edf68SEd Tanous                         break;
130af4edf68SEd Tanous                     }
131af4edf68SEd Tanous 
132af4edf68SEd Tanous                     index++;
133af4edf68SEd Tanous                     if (c == hyphen)
134af4edf68SEd Tanous                     {
135af4edf68SEd Tanous                         break;
136af4edf68SEd Tanous                     }
137af4edf68SEd Tanous 
138af4edf68SEd Tanous                     if (c == colon)
139af4edf68SEd Tanous                     {
140af4edf68SEd Tanous                         if (index == 1)
141af4edf68SEd Tanous                         {
142af4edf68SEd Tanous                             return ParserError::ERROR_EMPTY_HEADER;
143af4edf68SEd Tanous                         }
144ca45aa3cSEd Tanous 
1450e31e952SPatrick Williams                         currentHeaderName.append(&buffer[headerFieldMark],
146af4edf68SEd Tanous                                                  i - headerFieldMark);
147af4edf68SEd Tanous                         state = State::HEADER_VALUE_START;
148af4edf68SEd Tanous                         break;
149af4edf68SEd Tanous                     }
150af4edf68SEd Tanous                     cl = lower(c);
151af4edf68SEd Tanous                     if (cl < 'a' || cl > 'z')
152af4edf68SEd Tanous                     {
153af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_NAME;
154af4edf68SEd Tanous                     }
155af4edf68SEd Tanous                     break;
156af4edf68SEd Tanous                 case State::HEADER_VALUE_START:
157af4edf68SEd Tanous                     if (c == space)
158af4edf68SEd Tanous                     {
159af4edf68SEd Tanous                         break;
160af4edf68SEd Tanous                     }
161af4edf68SEd Tanous                     headerValueMark = i;
162af4edf68SEd Tanous                     state = State::HEADER_VALUE;
163af4edf68SEd Tanous                     [[fallthrough]];
164af4edf68SEd Tanous                 case State::HEADER_VALUE:
165af4edf68SEd Tanous                     if (c == cr)
166af4edf68SEd Tanous                     {
1670e31e952SPatrick Williams                         std::string_view value(&buffer[headerValueMark],
168af4edf68SEd Tanous                                                i - headerValueMark);
169af4edf68SEd Tanous                         mime_fields.rbegin()->fields.set(currentHeaderName,
170af4edf68SEd Tanous                                                          value);
171af4edf68SEd Tanous                         state = State::HEADER_VALUE_ALMOST_DONE;
172af4edf68SEd Tanous                     }
173af4edf68SEd Tanous                     break;
174af4edf68SEd Tanous                 case State::HEADER_VALUE_ALMOST_DONE:
175af4edf68SEd Tanous                     if (c != lf)
176af4edf68SEd Tanous                     {
177af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_VALUE;
178af4edf68SEd Tanous                     }
179af4edf68SEd Tanous                     state = State::HEADER_FIELD_START;
180af4edf68SEd Tanous                     break;
181af4edf68SEd Tanous                 case State::HEADERS_ALMOST_DONE:
182af4edf68SEd Tanous                     if (c != lf)
183af4edf68SEd Tanous                     {
184af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_ENDING;
185af4edf68SEd Tanous                     }
18618e3f7fbSKrzysztof Grobelny                     if (index > 0)
18718e3f7fbSKrzysztof Grobelny                     {
18818e3f7fbSKrzysztof Grobelny                         return ParserError::ERROR_UNEXPECTED_END_OF_HEADER;
18918e3f7fbSKrzysztof Grobelny                     }
190af4edf68SEd Tanous                     state = State::PART_DATA_START;
191af4edf68SEd Tanous                     break;
192af4edf68SEd Tanous                 case State::PART_DATA_START:
193af4edf68SEd Tanous                     state = State::PART_DATA;
194af4edf68SEd Tanous                     partDataMark = i;
195af4edf68SEd Tanous                     [[fallthrough]];
196af4edf68SEd Tanous                 case State::PART_DATA:
19718e3f7fbSKrzysztof Grobelny                 {
198af4edf68SEd Tanous                     if (index == 0)
199af4edf68SEd Tanous                     {
2000e31e952SPatrick Williams                         skipNonBoundary(buffer, boundary.size() - 1, i);
201af4edf68SEd Tanous                         c = buffer[i];
202af4edf68SEd Tanous                     }
2030e31e952SPatrick Williams                     if (auto ec = processPartData(buffer, i, c);
2040e31e952SPatrick Williams                         ec != ParserError::PARSER_SUCCESS)
20518e3f7fbSKrzysztof Grobelny                     {
20618e3f7fbSKrzysztof Grobelny                         return ec;
20718e3f7fbSKrzysztof Grobelny                     }
208af4edf68SEd Tanous                     break;
20918e3f7fbSKrzysztof Grobelny                 }
210af4edf68SEd Tanous                 case State::END:
211af4edf68SEd Tanous                     break;
2124da0490bSEd Tanous                 default:
2134da0490bSEd Tanous                     return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
214af4edf68SEd Tanous             }
215af4edf68SEd Tanous         }
21618e3f7fbSKrzysztof Grobelny 
21718e3f7fbSKrzysztof Grobelny         if (state != State::END)
21818e3f7fbSKrzysztof Grobelny         {
21918e3f7fbSKrzysztof Grobelny             return ParserError::ERROR_UNEXPECTED_END_OF_INPUT;
22018e3f7fbSKrzysztof Grobelny         }
22118e3f7fbSKrzysztof Grobelny 
222af4edf68SEd Tanous         return ParserError::PARSER_SUCCESS;
223af4edf68SEd Tanous     }
224af4edf68SEd Tanous     std::vector<FormPart> mime_fields;
225af4edf68SEd Tanous     std::string boundary;
226af4edf68SEd Tanous 
227af4edf68SEd Tanous   private:
indexBoundary()228af4edf68SEd Tanous     void indexBoundary()
229af4edf68SEd Tanous     {
2303544d2a7SEd Tanous         std::ranges::fill(boundaryIndex, 0);
231af4edf68SEd Tanous         for (const char current : boundary)
232af4edf68SEd Tanous         {
233af4edf68SEd Tanous             boundaryIndex[static_cast<unsigned char>(current)] = true;
234af4edf68SEd Tanous         }
235af4edf68SEd Tanous     }
236af4edf68SEd Tanous 
lower(char c)23756d2396dSEd Tanous     static char lower(char c)
238af4edf68SEd Tanous     {
239af4edf68SEd Tanous         return static_cast<char>(c | 0x20);
240af4edf68SEd Tanous     }
241af4edf68SEd Tanous 
isBoundaryChar(char c) const242*9de65b34SEd Tanous     bool isBoundaryChar(char c) const
243af4edf68SEd Tanous     {
244af4edf68SEd Tanous         return boundaryIndex[static_cast<unsigned char>(c)];
245af4edf68SEd Tanous     }
246af4edf68SEd Tanous 
skipNonBoundary(const std::string & buffer,size_t boundaryEnd,size_t & i)2470e31e952SPatrick Williams     void skipNonBoundary(const std::string& buffer, size_t boundaryEnd,
248af4edf68SEd Tanous                          size_t& i)
249af4edf68SEd Tanous     {
250af4edf68SEd Tanous         // boyer-moore derived algorithm to safely skip non-boundary data
2510e31e952SPatrick Williams         while (i + boundary.size() <= buffer.length())
252af4edf68SEd Tanous         {
253af4edf68SEd Tanous             if (isBoundaryChar(buffer[i + boundaryEnd]))
254af4edf68SEd Tanous             {
255af4edf68SEd Tanous                 break;
256af4edf68SEd Tanous             }
257af4edf68SEd Tanous             i += boundary.size();
258af4edf68SEd Tanous         }
259af4edf68SEd Tanous     }
260af4edf68SEd Tanous 
processPartData(const std::string & buffer,size_t & i,char c)2610e31e952SPatrick Williams     ParserError processPartData(const std::string& buffer, size_t& i, char c)
262af4edf68SEd Tanous     {
26318e3f7fbSKrzysztof Grobelny         size_t prevIndex = index;
264af4edf68SEd Tanous 
265af4edf68SEd Tanous         if (index < boundary.size())
266af4edf68SEd Tanous         {
267af4edf68SEd Tanous             if (boundary[index] == c)
268af4edf68SEd Tanous             {
269af4edf68SEd Tanous                 if (index == 0)
270af4edf68SEd Tanous                 {
2710e31e952SPatrick Williams                     const char* start = &buffer[partDataMark];
272ca45aa3cSEd Tanous                     size_t size = i - partDataMark;
27389492a15SPatrick Williams                     mime_fields.rbegin()->content += std::string_view(start,
27489492a15SPatrick Williams                                                                       size);
275af4edf68SEd Tanous                 }
276af4edf68SEd Tanous                 index++;
277af4edf68SEd Tanous             }
278af4edf68SEd Tanous             else
279af4edf68SEd Tanous             {
280af4edf68SEd Tanous                 index = 0;
281af4edf68SEd Tanous             }
282af4edf68SEd Tanous         }
283af4edf68SEd Tanous         else if (index == boundary.size())
284af4edf68SEd Tanous         {
285af4edf68SEd Tanous             index++;
286af4edf68SEd Tanous             if (c == cr)
287af4edf68SEd Tanous             {
288af4edf68SEd Tanous                 // cr = part boundary
289af4edf68SEd Tanous                 flags = Boundary::PART_BOUNDARY;
290af4edf68SEd Tanous             }
291af4edf68SEd Tanous             else if (c == hyphen)
292af4edf68SEd Tanous             {
293af4edf68SEd Tanous                 // hyphen = end boundary
294af4edf68SEd Tanous                 flags = Boundary::END_BOUNDARY;
295af4edf68SEd Tanous             }
296af4edf68SEd Tanous             else
297af4edf68SEd Tanous             {
298af4edf68SEd Tanous                 index = 0;
299af4edf68SEd Tanous             }
300af4edf68SEd Tanous         }
301af4edf68SEd Tanous         else
302af4edf68SEd Tanous         {
303af4edf68SEd Tanous             if (flags == Boundary::PART_BOUNDARY)
304af4edf68SEd Tanous             {
305af4edf68SEd Tanous                 index = 0;
306af4edf68SEd Tanous                 if (c == lf)
307af4edf68SEd Tanous                 {
308af4edf68SEd Tanous                     // unset the PART_BOUNDARY flag
309af4edf68SEd Tanous                     flags = Boundary::NON_BOUNDARY;
31026eee3a1SPatrick Williams                     mime_fields.emplace_back();
311af4edf68SEd Tanous                     state = State::HEADER_FIELD_START;
31218e3f7fbSKrzysztof Grobelny                     return ParserError::PARSER_SUCCESS;
313af4edf68SEd Tanous                 }
314af4edf68SEd Tanous             }
315af4edf68SEd Tanous             if (flags == Boundary::END_BOUNDARY)
316af4edf68SEd Tanous             {
317af4edf68SEd Tanous                 if (c == hyphen)
318af4edf68SEd Tanous                 {
319af4edf68SEd Tanous                     state = State::END;
320af4edf68SEd Tanous                 }
32118e3f7fbSKrzysztof Grobelny                 else
32218e3f7fbSKrzysztof Grobelny                 {
32318e3f7fbSKrzysztof Grobelny                     flags = Boundary::NON_BOUNDARY;
32418e3f7fbSKrzysztof Grobelny                     index = 0;
32518e3f7fbSKrzysztof Grobelny                 }
326af4edf68SEd Tanous             }
327af4edf68SEd Tanous         }
328af4edf68SEd Tanous 
329af4edf68SEd Tanous         if (index > 0)
330af4edf68SEd Tanous         {
33118e3f7fbSKrzysztof Grobelny             if ((index - 1) >= lookbehind.size())
33218e3f7fbSKrzysztof Grobelny             {
33318e3f7fbSKrzysztof Grobelny                 // Should never happen, but when it does it won't cause crash
33418e3f7fbSKrzysztof Grobelny                 return ParserError::ERROR_OUT_OF_RANGE;
33518e3f7fbSKrzysztof Grobelny             }
336af4edf68SEd Tanous             lookbehind[index - 1] = c;
337af4edf68SEd Tanous         }
338af4edf68SEd Tanous         else if (prevIndex > 0)
339af4edf68SEd Tanous         {
340af4edf68SEd Tanous             // if our boundary turned out to be rubbish, the captured
341af4edf68SEd Tanous             // lookbehind belongs to partData
342af4edf68SEd Tanous 
343af4edf68SEd Tanous             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
344af4edf68SEd Tanous             partDataMark = i;
345af4edf68SEd Tanous 
346af4edf68SEd Tanous             // reconsider the current character even so it interrupted
347af4edf68SEd Tanous             // the sequence it could be the beginning of a new sequence
348af4edf68SEd Tanous             i--;
349af4edf68SEd Tanous         }
35018e3f7fbSKrzysztof Grobelny         return ParserError::PARSER_SUCCESS;
351af4edf68SEd Tanous     }
352af4edf68SEd Tanous 
353af4edf68SEd Tanous     std::string currentHeaderName;
354af4edf68SEd Tanous     std::string currentHeaderValue;
355af4edf68SEd Tanous 
356af4edf68SEd Tanous     static constexpr char cr = '\r';
357af4edf68SEd Tanous     static constexpr char lf = '\n';
358af4edf68SEd Tanous     static constexpr char space = ' ';
359af4edf68SEd Tanous     static constexpr char hyphen = '-';
360af4edf68SEd Tanous     static constexpr char colon = ':';
361af4edf68SEd Tanous 
362d3a9e084SEd Tanous     std::array<bool, 256> boundaryIndex{};
363af4edf68SEd Tanous     std::string lookbehind;
364d3a9e084SEd Tanous     State state{State::START};
365d3a9e084SEd Tanous     Boundary flags{Boundary::NON_BOUNDARY};
366af4edf68SEd Tanous     size_t index = 0;
367af4edf68SEd Tanous     size_t partDataMark = 0;
368af4edf68SEd Tanous     size_t headerFieldMark = 0;
369af4edf68SEd Tanous     size_t headerValueMark = 0;
370af4edf68SEd Tanous };
371