1af4edf68SEd Tanous #pragma once
2af4edf68SEd Tanous 
3af4edf68SEd Tanous #include <boost/beast/http/fields.hpp>
4af4edf68SEd Tanous #include <http_request.hpp>
5af4edf68SEd Tanous 
6af4edf68SEd Tanous #include <string>
7af4edf68SEd Tanous #include <string_view>
8af4edf68SEd Tanous 
9af4edf68SEd Tanous enum class ParserError
10af4edf68SEd Tanous {
11af4edf68SEd Tanous     PARSER_SUCCESS,
12af4edf68SEd Tanous     ERROR_BOUNDARY_FORMAT,
13af4edf68SEd Tanous     ERROR_BOUNDARY_CR,
14af4edf68SEd Tanous     ERROR_BOUNDARY_LF,
15af4edf68SEd Tanous     ERROR_BOUNDARY_DATA,
16af4edf68SEd Tanous     ERROR_EMPTY_HEADER,
17af4edf68SEd Tanous     ERROR_HEADER_NAME,
18af4edf68SEd Tanous     ERROR_HEADER_VALUE,
19af4edf68SEd Tanous     ERROR_HEADER_ENDING
20af4edf68SEd Tanous };
21af4edf68SEd Tanous 
22af4edf68SEd Tanous enum class State
23af4edf68SEd Tanous {
24af4edf68SEd Tanous     START,
25af4edf68SEd Tanous     START_BOUNDARY,
26af4edf68SEd Tanous     HEADER_FIELD_START,
27af4edf68SEd Tanous     HEADER_FIELD,
28af4edf68SEd Tanous     HEADER_VALUE_START,
29af4edf68SEd Tanous     HEADER_VALUE,
30af4edf68SEd Tanous     HEADER_VALUE_ALMOST_DONE,
31af4edf68SEd Tanous     HEADERS_ALMOST_DONE,
32af4edf68SEd Tanous     PART_DATA_START,
33af4edf68SEd Tanous     PART_DATA,
34af4edf68SEd Tanous     END
35af4edf68SEd Tanous };
36af4edf68SEd Tanous 
37af4edf68SEd Tanous enum class Boundary
38af4edf68SEd Tanous {
39af4edf68SEd Tanous     NON_BOUNDARY,
40af4edf68SEd Tanous     PART_BOUNDARY,
41af4edf68SEd Tanous     END_BOUNDARY,
42af4edf68SEd Tanous };
43af4edf68SEd Tanous 
44af4edf68SEd Tanous struct FormPart
45af4edf68SEd Tanous {
46af4edf68SEd Tanous     boost::beast::http::fields fields;
47af4edf68SEd Tanous     std::string content;
48af4edf68SEd Tanous };
49af4edf68SEd Tanous 
50af4edf68SEd Tanous class MultipartParser
51af4edf68SEd Tanous {
52af4edf68SEd Tanous   public:
53af4edf68SEd Tanous     MultipartParser() = default;
54af4edf68SEd Tanous 
55af4edf68SEd Tanous     [[nodiscard]] ParserError parse(const crow::Request& req)
56af4edf68SEd Tanous     {
57af4edf68SEd Tanous         std::string_view contentType = req.getHeaderValue("content-type");
58af4edf68SEd Tanous 
59af4edf68SEd Tanous         const std::string boundaryFormat = "multipart/form-data; boundary=";
60*11ba3979SEd Tanous         if (!contentType.starts_with(boundaryFormat))
61af4edf68SEd Tanous         {
62af4edf68SEd Tanous             return ParserError::ERROR_BOUNDARY_FORMAT;
63af4edf68SEd Tanous         }
64af4edf68SEd Tanous 
65af4edf68SEd Tanous         std::string_view ctBoundary = contentType.substr(boundaryFormat.size());
66af4edf68SEd Tanous 
67af4edf68SEd Tanous         boundary = "\r\n--";
68af4edf68SEd Tanous         boundary += ctBoundary;
69af4edf68SEd Tanous         indexBoundary();
70af4edf68SEd Tanous         lookbehind.resize(boundary.size() + 8);
71af4edf68SEd Tanous         state = State::START;
72af4edf68SEd Tanous 
73af4edf68SEd Tanous         const char* buffer = req.body.data();
74af4edf68SEd Tanous         size_t len = req.body.size();
75af4edf68SEd Tanous         size_t prevIndex = index;
76af4edf68SEd Tanous         char cl = 0;
77af4edf68SEd Tanous 
78af4edf68SEd Tanous         for (size_t i = 0; i < len; i++)
79af4edf68SEd Tanous         {
80ca45aa3cSEd Tanous             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
81af4edf68SEd Tanous             char c = buffer[i];
82af4edf68SEd Tanous             switch (state)
83af4edf68SEd Tanous             {
84af4edf68SEd Tanous                 case State::START:
85af4edf68SEd Tanous                     index = 0;
86af4edf68SEd Tanous                     state = State::START_BOUNDARY;
87af4edf68SEd Tanous                     [[fallthrough]];
88af4edf68SEd Tanous                 case State::START_BOUNDARY:
89af4edf68SEd Tanous                     if (index == boundary.size() - 2)
90af4edf68SEd Tanous                     {
91af4edf68SEd Tanous                         if (c != cr)
92af4edf68SEd Tanous                         {
93af4edf68SEd Tanous                             return ParserError::ERROR_BOUNDARY_CR;
94af4edf68SEd Tanous                         }
95af4edf68SEd Tanous                         index++;
96af4edf68SEd Tanous                         break;
97af4edf68SEd Tanous                     }
98af4edf68SEd Tanous                     else if (index - 1 == boundary.size() - 2)
99af4edf68SEd Tanous                     {
100af4edf68SEd Tanous                         if (c != lf)
101af4edf68SEd Tanous                         {
102af4edf68SEd Tanous                             return ParserError::ERROR_BOUNDARY_LF;
103af4edf68SEd Tanous                         }
104af4edf68SEd Tanous                         index = 0;
105af4edf68SEd Tanous                         mime_fields.push_back({});
106af4edf68SEd Tanous                         state = State::HEADER_FIELD_START;
107af4edf68SEd Tanous                         break;
108af4edf68SEd Tanous                     }
109af4edf68SEd Tanous                     if (c != boundary[index + 2])
110af4edf68SEd Tanous                     {
111af4edf68SEd Tanous                         return ParserError::ERROR_BOUNDARY_DATA;
112af4edf68SEd Tanous                     }
113af4edf68SEd Tanous                     index++;
114af4edf68SEd Tanous                     break;
115af4edf68SEd Tanous                 case State::HEADER_FIELD_START:
116af4edf68SEd Tanous                     currentHeaderName.resize(0);
117af4edf68SEd Tanous                     state = State::HEADER_FIELD;
118af4edf68SEd Tanous                     headerFieldMark = i;
119af4edf68SEd Tanous                     index = 0;
120af4edf68SEd Tanous                     [[fallthrough]];
121af4edf68SEd Tanous                 case State::HEADER_FIELD:
122af4edf68SEd Tanous                     if (c == cr)
123af4edf68SEd Tanous                     {
124af4edf68SEd Tanous                         headerFieldMark = 0;
125af4edf68SEd Tanous                         state = State::HEADERS_ALMOST_DONE;
126af4edf68SEd Tanous                         break;
127af4edf68SEd Tanous                     }
128af4edf68SEd Tanous 
129af4edf68SEd Tanous                     index++;
130af4edf68SEd Tanous                     if (c == hyphen)
131af4edf68SEd Tanous                     {
132af4edf68SEd Tanous                         break;
133af4edf68SEd Tanous                     }
134af4edf68SEd Tanous 
135af4edf68SEd Tanous                     if (c == colon)
136af4edf68SEd Tanous                     {
137af4edf68SEd Tanous                         if (index == 1)
138af4edf68SEd Tanous                         {
139af4edf68SEd Tanous                             return ParserError::ERROR_EMPTY_HEADER;
140af4edf68SEd Tanous                         }
141ca45aa3cSEd Tanous 
142ca45aa3cSEd Tanous                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
143af4edf68SEd Tanous                         currentHeaderName.append(buffer + headerFieldMark,
144af4edf68SEd Tanous                                                  i - headerFieldMark);
145af4edf68SEd Tanous                         state = State::HEADER_VALUE_START;
146af4edf68SEd Tanous                         break;
147af4edf68SEd Tanous                     }
148af4edf68SEd Tanous                     cl = lower(c);
149af4edf68SEd Tanous                     if (cl < 'a' || cl > 'z')
150af4edf68SEd Tanous                     {
151af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_NAME;
152af4edf68SEd Tanous                     }
153af4edf68SEd Tanous                     break;
154af4edf68SEd Tanous                 case State::HEADER_VALUE_START:
155af4edf68SEd Tanous                     if (c == space)
156af4edf68SEd Tanous                     {
157af4edf68SEd Tanous                         break;
158af4edf68SEd Tanous                     }
159af4edf68SEd Tanous                     headerValueMark = i;
160af4edf68SEd Tanous                     state = State::HEADER_VALUE;
161af4edf68SEd Tanous                     [[fallthrough]];
162af4edf68SEd Tanous                 case State::HEADER_VALUE:
163af4edf68SEd Tanous                     if (c == cr)
164af4edf68SEd Tanous                     {
165ca45aa3cSEd Tanous                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
166af4edf68SEd Tanous                         std::string_view value(buffer + headerValueMark,
167af4edf68SEd Tanous                                                i - headerValueMark);
168af4edf68SEd Tanous                         mime_fields.rbegin()->fields.set(currentHeaderName,
169af4edf68SEd Tanous                                                          value);
170af4edf68SEd Tanous                         state = State::HEADER_VALUE_ALMOST_DONE;
171af4edf68SEd Tanous                     }
172af4edf68SEd Tanous                     break;
173af4edf68SEd Tanous                 case State::HEADER_VALUE_ALMOST_DONE:
174af4edf68SEd Tanous                     if (c != lf)
175af4edf68SEd Tanous                     {
176af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_VALUE;
177af4edf68SEd Tanous                     }
178af4edf68SEd Tanous                     state = State::HEADER_FIELD_START;
179af4edf68SEd Tanous                     break;
180af4edf68SEd Tanous                 case State::HEADERS_ALMOST_DONE:
181af4edf68SEd Tanous                     if (c != lf)
182af4edf68SEd Tanous                     {
183af4edf68SEd Tanous                         return ParserError::ERROR_HEADER_ENDING;
184af4edf68SEd Tanous                     }
185af4edf68SEd Tanous                     state = State::PART_DATA_START;
186af4edf68SEd Tanous                     break;
187af4edf68SEd Tanous                 case State::PART_DATA_START:
188af4edf68SEd Tanous                     state = State::PART_DATA;
189af4edf68SEd Tanous                     partDataMark = i;
190af4edf68SEd Tanous                     [[fallthrough]];
191af4edf68SEd Tanous                 case State::PART_DATA:
192af4edf68SEd Tanous                     if (index == 0)
193af4edf68SEd Tanous                     {
194af4edf68SEd Tanous                         skipNonBoundary(buffer, len, boundary.size() - 1, i);
195ca45aa3cSEd Tanous 
196ca45aa3cSEd Tanous                         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
197af4edf68SEd Tanous                         c = buffer[i];
198af4edf68SEd Tanous                     }
1998a592810SEd Tanous                     processPartData(prevIndex, buffer, i, c);
200af4edf68SEd Tanous                     break;
201af4edf68SEd Tanous                 case State::END:
202af4edf68SEd Tanous                     break;
203af4edf68SEd Tanous             }
204af4edf68SEd Tanous         }
205af4edf68SEd Tanous         return ParserError::PARSER_SUCCESS;
206af4edf68SEd Tanous     }
207af4edf68SEd Tanous     std::vector<FormPart> mime_fields;
208af4edf68SEd Tanous     std::string boundary;
209af4edf68SEd Tanous 
210af4edf68SEd Tanous   private:
211af4edf68SEd Tanous     void indexBoundary()
212af4edf68SEd Tanous     {
213af4edf68SEd Tanous         std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0);
214af4edf68SEd Tanous         for (const char current : boundary)
215af4edf68SEd Tanous         {
216af4edf68SEd Tanous             boundaryIndex[static_cast<unsigned char>(current)] = true;
217af4edf68SEd Tanous         }
218af4edf68SEd Tanous     }
219af4edf68SEd Tanous 
22056d2396dSEd Tanous     static char lower(char c)
221af4edf68SEd Tanous     {
222af4edf68SEd Tanous         return static_cast<char>(c | 0x20);
223af4edf68SEd Tanous     }
224af4edf68SEd Tanous 
225af4edf68SEd Tanous     inline bool isBoundaryChar(char c) const
226af4edf68SEd Tanous     {
227af4edf68SEd Tanous         return boundaryIndex[static_cast<unsigned char>(c)];
228af4edf68SEd Tanous     }
229af4edf68SEd Tanous 
230af4edf68SEd Tanous     void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd,
231af4edf68SEd Tanous                          size_t& i)
232af4edf68SEd Tanous     {
233af4edf68SEd Tanous         // boyer-moore derived algorithm to safely skip non-boundary data
234af4edf68SEd Tanous         while (i + boundary.size() <= len)
235af4edf68SEd Tanous         {
236ca45aa3cSEd Tanous             // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
237af4edf68SEd Tanous             if (isBoundaryChar(buffer[i + boundaryEnd]))
238af4edf68SEd Tanous             {
239af4edf68SEd Tanous                 break;
240af4edf68SEd Tanous             }
241af4edf68SEd Tanous             i += boundary.size();
242af4edf68SEd Tanous         }
243af4edf68SEd Tanous     }
244af4edf68SEd Tanous 
2458a592810SEd Tanous     void processPartData(size_t& prevIndex, const char* buffer, size_t& i,
2468a592810SEd Tanous                          char c)
247af4edf68SEd Tanous     {
248af4edf68SEd Tanous         prevIndex = index;
249af4edf68SEd Tanous 
250af4edf68SEd Tanous         if (index < boundary.size())
251af4edf68SEd Tanous         {
252af4edf68SEd Tanous             if (boundary[index] == c)
253af4edf68SEd Tanous             {
254af4edf68SEd Tanous                 if (index == 0)
255af4edf68SEd Tanous                 {
256ca45aa3cSEd Tanous                     // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
257ca45aa3cSEd Tanous                     const char* start = buffer + partDataMark;
258ca45aa3cSEd Tanous                     size_t size = i - partDataMark;
259ca45aa3cSEd Tanous                     mime_fields.rbegin()->content +=
260ca45aa3cSEd Tanous                         std::string_view(start, size);
261af4edf68SEd Tanous                 }
262af4edf68SEd Tanous                 index++;
263af4edf68SEd Tanous             }
264af4edf68SEd Tanous             else
265af4edf68SEd Tanous             {
266af4edf68SEd Tanous                 index = 0;
267af4edf68SEd Tanous             }
268af4edf68SEd Tanous         }
269af4edf68SEd Tanous         else if (index == boundary.size())
270af4edf68SEd Tanous         {
271af4edf68SEd Tanous             index++;
272af4edf68SEd Tanous             if (c == cr)
273af4edf68SEd Tanous             {
274af4edf68SEd Tanous                 // cr = part boundary
275af4edf68SEd Tanous                 flags = Boundary::PART_BOUNDARY;
276af4edf68SEd Tanous             }
277af4edf68SEd Tanous             else if (c == hyphen)
278af4edf68SEd Tanous             {
279af4edf68SEd Tanous                 // hyphen = end boundary
280af4edf68SEd Tanous                 flags = Boundary::END_BOUNDARY;
281af4edf68SEd Tanous             }
282af4edf68SEd Tanous             else
283af4edf68SEd Tanous             {
284af4edf68SEd Tanous                 index = 0;
285af4edf68SEd Tanous             }
286af4edf68SEd Tanous         }
287af4edf68SEd Tanous         else
288af4edf68SEd Tanous         {
289af4edf68SEd Tanous             if (flags == Boundary::PART_BOUNDARY)
290af4edf68SEd Tanous             {
291af4edf68SEd Tanous                 index = 0;
292af4edf68SEd Tanous                 if (c == lf)
293af4edf68SEd Tanous                 {
294af4edf68SEd Tanous                     // unset the PART_BOUNDARY flag
295af4edf68SEd Tanous                     flags = Boundary::NON_BOUNDARY;
296af4edf68SEd Tanous                     mime_fields.push_back({});
297af4edf68SEd Tanous                     state = State::HEADER_FIELD_START;
298af4edf68SEd Tanous                     return;
299af4edf68SEd Tanous                 }
300af4edf68SEd Tanous             }
301af4edf68SEd Tanous             if (flags == Boundary::END_BOUNDARY)
302af4edf68SEd Tanous             {
303af4edf68SEd Tanous                 if (c == hyphen)
304af4edf68SEd Tanous                 {
305af4edf68SEd Tanous                     state = State::END;
306af4edf68SEd Tanous                 }
307af4edf68SEd Tanous             }
308af4edf68SEd Tanous         }
309af4edf68SEd Tanous 
310af4edf68SEd Tanous         if (index > 0)
311af4edf68SEd Tanous         {
312af4edf68SEd Tanous             lookbehind[index - 1] = c;
313af4edf68SEd Tanous         }
314af4edf68SEd Tanous         else if (prevIndex > 0)
315af4edf68SEd Tanous         {
316af4edf68SEd Tanous             // if our boundary turned out to be rubbish, the captured
317af4edf68SEd Tanous             // lookbehind belongs to partData
318af4edf68SEd Tanous 
319af4edf68SEd Tanous             mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex);
320af4edf68SEd Tanous             prevIndex = 0;
321af4edf68SEd Tanous             partDataMark = i;
322af4edf68SEd Tanous 
323af4edf68SEd Tanous             // reconsider the current character even so it interrupted
324af4edf68SEd Tanous             // the sequence it could be the beginning of a new sequence
325af4edf68SEd Tanous             i--;
326af4edf68SEd Tanous         }
327af4edf68SEd Tanous     }
328af4edf68SEd Tanous 
329af4edf68SEd Tanous     std::string currentHeaderName;
330af4edf68SEd Tanous     std::string currentHeaderValue;
331af4edf68SEd Tanous 
332af4edf68SEd Tanous     static constexpr char cr = '\r';
333af4edf68SEd Tanous     static constexpr char lf = '\n';
334af4edf68SEd Tanous     static constexpr char space = ' ';
335af4edf68SEd Tanous     static constexpr char hyphen = '-';
336af4edf68SEd Tanous     static constexpr char colon = ':';
337af4edf68SEd Tanous 
338d3a9e084SEd Tanous     std::array<bool, 256> boundaryIndex{};
339af4edf68SEd Tanous     std::string lookbehind;
340d3a9e084SEd Tanous     State state{State::START};
341d3a9e084SEd Tanous     Boundary flags{Boundary::NON_BOUNDARY};
342af4edf68SEd Tanous     size_t index = 0;
343af4edf68SEd Tanous     size_t partDataMark = 0;
344af4edf68SEd Tanous     size_t headerFieldMark = 0;
345af4edf68SEd Tanous     size_t headerValueMark = 0;
346af4edf68SEd Tanous };
347