1 #pragma once 2 3 #include "http_request.hpp" 4 5 #include <boost/beast/http/fields.hpp> 6 7 #include <ranges> 8 #include <string> 9 #include <string_view> 10 11 enum class ParserError 12 { 13 PARSER_SUCCESS, 14 ERROR_BOUNDARY_FORMAT, 15 ERROR_BOUNDARY_CR, 16 ERROR_BOUNDARY_LF, 17 ERROR_BOUNDARY_DATA, 18 ERROR_EMPTY_HEADER, 19 ERROR_HEADER_NAME, 20 ERROR_HEADER_VALUE, 21 ERROR_HEADER_ENDING, 22 ERROR_UNEXPECTED_END_OF_HEADER, 23 ERROR_UNEXPECTED_END_OF_INPUT, 24 ERROR_OUT_OF_RANGE 25 }; 26 27 enum class State 28 { 29 START, 30 START_BOUNDARY, 31 HEADER_FIELD_START, 32 HEADER_FIELD, 33 HEADER_VALUE_START, 34 HEADER_VALUE, 35 HEADER_VALUE_ALMOST_DONE, 36 HEADERS_ALMOST_DONE, 37 PART_DATA_START, 38 PART_DATA, 39 END 40 }; 41 42 enum class Boundary 43 { 44 NON_BOUNDARY, 45 PART_BOUNDARY, 46 END_BOUNDARY, 47 }; 48 49 struct FormPart 50 { 51 boost::beast::http::fields fields; 52 std::string content; 53 }; 54 55 class MultipartParser 56 { 57 public: 58 MultipartParser() = default; 59 60 [[nodiscard]] ParserError parse(const crow::Request& req) 61 { 62 std::string_view contentType = req.getHeaderValue("content-type"); 63 64 const std::string boundaryFormat = "multipart/form-data; boundary="; 65 if (!contentType.starts_with(boundaryFormat)) 66 { 67 return ParserError::ERROR_BOUNDARY_FORMAT; 68 } 69 70 std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 71 72 boundary = "\r\n--"; 73 boundary += ctBoundary; 74 indexBoundary(); 75 lookbehind.resize(boundary.size() + 8); 76 state = State::START; 77 78 const std::string& buffer = req.body(); 79 size_t len = buffer.size(); 80 char cl = 0; 81 82 for (size_t i = 0; i < len; i++) 83 { 84 char c = buffer[i]; 85 switch (state) 86 { 87 case State::START: 88 index = 0; 89 state = State::START_BOUNDARY; 90 [[fallthrough]]; 91 case State::START_BOUNDARY: 92 if (index == boundary.size() - 2) 93 { 94 if (c != cr) 95 { 96 return ParserError::ERROR_BOUNDARY_CR; 97 } 98 index++; 99 break; 100 } 101 else if (index - 1 == boundary.size() - 2) 102 { 103 if (c != lf) 104 { 105 return ParserError::ERROR_BOUNDARY_LF; 106 } 107 index = 0; 108 mime_fields.push_back({}); 109 state = State::HEADER_FIELD_START; 110 break; 111 } 112 if (c != boundary[index + 2]) 113 { 114 return ParserError::ERROR_BOUNDARY_DATA; 115 } 116 index++; 117 break; 118 case State::HEADER_FIELD_START: 119 currentHeaderName.resize(0); 120 state = State::HEADER_FIELD; 121 headerFieldMark = i; 122 index = 0; 123 [[fallthrough]]; 124 case State::HEADER_FIELD: 125 if (c == cr) 126 { 127 headerFieldMark = 0; 128 state = State::HEADERS_ALMOST_DONE; 129 break; 130 } 131 132 index++; 133 if (c == hyphen) 134 { 135 break; 136 } 137 138 if (c == colon) 139 { 140 if (index == 1) 141 { 142 return ParserError::ERROR_EMPTY_HEADER; 143 } 144 145 currentHeaderName.append(&buffer[headerFieldMark], 146 i - headerFieldMark); 147 state = State::HEADER_VALUE_START; 148 break; 149 } 150 cl = lower(c); 151 if (cl < 'a' || cl > 'z') 152 { 153 return ParserError::ERROR_HEADER_NAME; 154 } 155 break; 156 case State::HEADER_VALUE_START: 157 if (c == space) 158 { 159 break; 160 } 161 headerValueMark = i; 162 state = State::HEADER_VALUE; 163 [[fallthrough]]; 164 case State::HEADER_VALUE: 165 if (c == cr) 166 { 167 std::string_view value(&buffer[headerValueMark], 168 i - headerValueMark); 169 mime_fields.rbegin()->fields.set(currentHeaderName, 170 value); 171 state = State::HEADER_VALUE_ALMOST_DONE; 172 } 173 break; 174 case State::HEADER_VALUE_ALMOST_DONE: 175 if (c != lf) 176 { 177 return ParserError::ERROR_HEADER_VALUE; 178 } 179 state = State::HEADER_FIELD_START; 180 break; 181 case State::HEADERS_ALMOST_DONE: 182 if (c != lf) 183 { 184 return ParserError::ERROR_HEADER_ENDING; 185 } 186 if (index > 0) 187 { 188 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER; 189 } 190 state = State::PART_DATA_START; 191 break; 192 case State::PART_DATA_START: 193 state = State::PART_DATA; 194 partDataMark = i; 195 [[fallthrough]]; 196 case State::PART_DATA: 197 { 198 if (index == 0) 199 { 200 skipNonBoundary(buffer, boundary.size() - 1, i); 201 c = buffer[i]; 202 } 203 if (auto ec = processPartData(buffer, i, c); 204 ec != ParserError::PARSER_SUCCESS) 205 { 206 return ec; 207 } 208 break; 209 } 210 case State::END: 211 break; 212 } 213 } 214 215 if (state != State::END) 216 { 217 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 218 } 219 220 return ParserError::PARSER_SUCCESS; 221 } 222 std::vector<FormPart> mime_fields; 223 std::string boundary; 224 225 private: 226 void indexBoundary() 227 { 228 std::ranges::fill(boundaryIndex, 0); 229 for (const char current : boundary) 230 { 231 boundaryIndex[static_cast<unsigned char>(current)] = true; 232 } 233 } 234 235 static char lower(char c) 236 { 237 return static_cast<char>(c | 0x20); 238 } 239 240 inline bool isBoundaryChar(char c) const 241 { 242 return boundaryIndex[static_cast<unsigned char>(c)]; 243 } 244 245 void skipNonBoundary(const std::string& buffer, size_t boundaryEnd, 246 size_t& i) 247 { 248 // boyer-moore derived algorithm to safely skip non-boundary data 249 while (i + boundary.size() <= buffer.length()) 250 { 251 if (isBoundaryChar(buffer[i + boundaryEnd])) 252 { 253 break; 254 } 255 i += boundary.size(); 256 } 257 } 258 259 ParserError processPartData(const std::string& buffer, size_t& i, char c) 260 { 261 size_t prevIndex = index; 262 263 if (index < boundary.size()) 264 { 265 if (boundary[index] == c) 266 { 267 if (index == 0) 268 { 269 const char* start = &buffer[partDataMark]; 270 size_t size = i - partDataMark; 271 mime_fields.rbegin()->content += std::string_view(start, 272 size); 273 } 274 index++; 275 } 276 else 277 { 278 index = 0; 279 } 280 } 281 else if (index == boundary.size()) 282 { 283 index++; 284 if (c == cr) 285 { 286 // cr = part boundary 287 flags = Boundary::PART_BOUNDARY; 288 } 289 else if (c == hyphen) 290 { 291 // hyphen = end boundary 292 flags = Boundary::END_BOUNDARY; 293 } 294 else 295 { 296 index = 0; 297 } 298 } 299 else 300 { 301 if (flags == Boundary::PART_BOUNDARY) 302 { 303 index = 0; 304 if (c == lf) 305 { 306 // unset the PART_BOUNDARY flag 307 flags = Boundary::NON_BOUNDARY; 308 mime_fields.push_back({}); 309 state = State::HEADER_FIELD_START; 310 return ParserError::PARSER_SUCCESS; 311 } 312 } 313 if (flags == Boundary::END_BOUNDARY) 314 { 315 if (c == hyphen) 316 { 317 state = State::END; 318 } 319 else 320 { 321 flags = Boundary::NON_BOUNDARY; 322 index = 0; 323 } 324 } 325 } 326 327 if (index > 0) 328 { 329 if ((index - 1) >= lookbehind.size()) 330 { 331 // Should never happen, but when it does it won't cause crash 332 return ParserError::ERROR_OUT_OF_RANGE; 333 } 334 lookbehind[index - 1] = c; 335 } 336 else if (prevIndex > 0) 337 { 338 // if our boundary turned out to be rubbish, the captured 339 // lookbehind belongs to partData 340 341 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 342 partDataMark = i; 343 344 // reconsider the current character even so it interrupted 345 // the sequence it could be the beginning of a new sequence 346 i--; 347 } 348 return ParserError::PARSER_SUCCESS; 349 } 350 351 std::string currentHeaderName; 352 std::string currentHeaderValue; 353 354 static constexpr char cr = '\r'; 355 static constexpr char lf = '\n'; 356 static constexpr char space = ' '; 357 static constexpr char hyphen = '-'; 358 static constexpr char colon = ':'; 359 360 std::array<bool, 256> boundaryIndex{}; 361 std::string lookbehind; 362 State state{State::START}; 363 Boundary flags{Boundary::NON_BOUNDARY}; 364 size_t index = 0; 365 size_t partDataMark = 0; 366 size_t headerFieldMark = 0; 367 size_t headerValueMark = 0; 368 }; 369