1 #pragma once 2 3 #include "http_request.hpp" 4 5 #include <boost/beast/http/fields.hpp> 6 7 #include <ranges> 8 #include <string> 9 #include <string_view> 10 11 enum class ParserError 12 { 13 PARSER_SUCCESS, 14 ERROR_BOUNDARY_FORMAT, 15 ERROR_BOUNDARY_CR, 16 ERROR_BOUNDARY_LF, 17 ERROR_BOUNDARY_DATA, 18 ERROR_EMPTY_HEADER, 19 ERROR_HEADER_NAME, 20 ERROR_HEADER_VALUE, 21 ERROR_HEADER_ENDING, 22 ERROR_UNEXPECTED_END_OF_HEADER, 23 ERROR_UNEXPECTED_END_OF_INPUT, 24 ERROR_OUT_OF_RANGE 25 }; 26 27 enum class State 28 { 29 START, 30 START_BOUNDARY, 31 HEADER_FIELD_START, 32 HEADER_FIELD, 33 HEADER_VALUE_START, 34 HEADER_VALUE, 35 HEADER_VALUE_ALMOST_DONE, 36 HEADERS_ALMOST_DONE, 37 PART_DATA_START, 38 PART_DATA, 39 END 40 }; 41 42 enum class Boundary 43 { 44 NON_BOUNDARY, 45 PART_BOUNDARY, 46 END_BOUNDARY, 47 }; 48 49 struct FormPart 50 { 51 boost::beast::http::fields fields; 52 std::string content; 53 }; 54 55 class MultipartParser 56 { 57 public: 58 MultipartParser() = default; 59 parse(const crow::Request & req)60 [[nodiscard]] ParserError parse(const crow::Request& req) 61 { 62 std::string_view contentType = req.getHeaderValue("content-type"); 63 64 const std::string boundaryFormat = "multipart/form-data; boundary="; 65 if (!contentType.starts_with(boundaryFormat)) 66 { 67 return ParserError::ERROR_BOUNDARY_FORMAT; 68 } 69 70 std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 71 72 boundary = "\r\n--"; 73 boundary += ctBoundary; 74 indexBoundary(); 75 lookbehind.resize(boundary.size() + 8); 76 state = State::START; 77 78 const std::string& buffer = req.body(); 79 size_t len = buffer.size(); 80 char cl = 0; 81 82 for (size_t i = 0; i < len; i++) 83 { 84 char c = buffer[i]; 85 switch (state) 86 { 87 case State::START: 88 index = 0; 89 state = State::START_BOUNDARY; 90 [[fallthrough]]; 91 case State::START_BOUNDARY: 92 if (index == boundary.size() - 2) 93 { 94 if (c != cr) 95 { 96 return ParserError::ERROR_BOUNDARY_CR; 97 } 98 index++; 99 break; 100 } 101 else if (index - 1 == boundary.size() - 2) 102 { 103 if (c != lf) 104 { 105 return ParserError::ERROR_BOUNDARY_LF; 106 } 107 index = 0; 108 mime_fields.emplace_back(); 109 state = State::HEADER_FIELD_START; 110 break; 111 } 112 if (c != boundary[index + 2]) 113 { 114 return ParserError::ERROR_BOUNDARY_DATA; 115 } 116 index++; 117 break; 118 case State::HEADER_FIELD_START: 119 currentHeaderName.resize(0); 120 state = State::HEADER_FIELD; 121 headerFieldMark = i; 122 index = 0; 123 [[fallthrough]]; 124 case State::HEADER_FIELD: 125 if (c == cr) 126 { 127 headerFieldMark = 0; 128 state = State::HEADERS_ALMOST_DONE; 129 break; 130 } 131 132 index++; 133 if (c == hyphen) 134 { 135 break; 136 } 137 138 if (c == colon) 139 { 140 if (index == 1) 141 { 142 return ParserError::ERROR_EMPTY_HEADER; 143 } 144 145 currentHeaderName.append(&buffer[headerFieldMark], 146 i - headerFieldMark); 147 state = State::HEADER_VALUE_START; 148 break; 149 } 150 cl = lower(c); 151 if (cl < 'a' || cl > 'z') 152 { 153 return ParserError::ERROR_HEADER_NAME; 154 } 155 break; 156 case State::HEADER_VALUE_START: 157 if (c == space) 158 { 159 break; 160 } 161 headerValueMark = i; 162 state = State::HEADER_VALUE; 163 [[fallthrough]]; 164 case State::HEADER_VALUE: 165 if (c == cr) 166 { 167 std::string_view value(&buffer[headerValueMark], 168 i - headerValueMark); 169 mime_fields.rbegin()->fields.set(currentHeaderName, 170 value); 171 state = State::HEADER_VALUE_ALMOST_DONE; 172 } 173 break; 174 case State::HEADER_VALUE_ALMOST_DONE: 175 if (c != lf) 176 { 177 return ParserError::ERROR_HEADER_VALUE; 178 } 179 state = State::HEADER_FIELD_START; 180 break; 181 case State::HEADERS_ALMOST_DONE: 182 if (c != lf) 183 { 184 return ParserError::ERROR_HEADER_ENDING; 185 } 186 if (index > 0) 187 { 188 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER; 189 } 190 state = State::PART_DATA_START; 191 break; 192 case State::PART_DATA_START: 193 state = State::PART_DATA; 194 partDataMark = i; 195 [[fallthrough]]; 196 case State::PART_DATA: 197 { 198 if (index == 0) 199 { 200 skipNonBoundary(buffer, boundary.size() - 1, i); 201 c = buffer[i]; 202 } 203 if (auto ec = processPartData(buffer, i, c); 204 ec != ParserError::PARSER_SUCCESS) 205 { 206 return ec; 207 } 208 break; 209 } 210 case State::END: 211 break; 212 default: 213 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 214 } 215 } 216 217 if (state != State::END) 218 { 219 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 220 } 221 222 return ParserError::PARSER_SUCCESS; 223 } 224 std::vector<FormPart> mime_fields; 225 std::string boundary; 226 227 private: indexBoundary()228 void indexBoundary() 229 { 230 std::ranges::fill(boundaryIndex, 0); 231 for (const char current : boundary) 232 { 233 boundaryIndex[static_cast<unsigned char>(current)] = true; 234 } 235 } 236 lower(char c)237 static char lower(char c) 238 { 239 return static_cast<char>(c | 0x20); 240 } 241 isBoundaryChar(char c) const242 bool isBoundaryChar(char c) const 243 { 244 return boundaryIndex[static_cast<unsigned char>(c)]; 245 } 246 skipNonBoundary(const std::string & buffer,size_t boundaryEnd,size_t & i)247 void skipNonBoundary(const std::string& buffer, size_t boundaryEnd, 248 size_t& i) 249 { 250 // boyer-moore derived algorithm to safely skip non-boundary data 251 while (i + boundary.size() <= buffer.length()) 252 { 253 if (isBoundaryChar(buffer[i + boundaryEnd])) 254 { 255 break; 256 } 257 i += boundary.size(); 258 } 259 } 260 processPartData(const std::string & buffer,size_t & i,char c)261 ParserError processPartData(const std::string& buffer, size_t& i, char c) 262 { 263 size_t prevIndex = index; 264 265 if (index < boundary.size()) 266 { 267 if (boundary[index] == c) 268 { 269 if (index == 0) 270 { 271 const char* start = &buffer[partDataMark]; 272 size_t size = i - partDataMark; 273 mime_fields.rbegin()->content += 274 std::string_view(start, size); 275 } 276 index++; 277 } 278 else 279 { 280 index = 0; 281 } 282 } 283 else if (index == boundary.size()) 284 { 285 index++; 286 if (c == cr) 287 { 288 // cr = part boundary 289 flags = Boundary::PART_BOUNDARY; 290 } 291 else if (c == hyphen) 292 { 293 // hyphen = end boundary 294 flags = Boundary::END_BOUNDARY; 295 } 296 else 297 { 298 index = 0; 299 } 300 } 301 else 302 { 303 if (flags == Boundary::PART_BOUNDARY) 304 { 305 index = 0; 306 if (c == lf) 307 { 308 // unset the PART_BOUNDARY flag 309 flags = Boundary::NON_BOUNDARY; 310 mime_fields.emplace_back(); 311 state = State::HEADER_FIELD_START; 312 return ParserError::PARSER_SUCCESS; 313 } 314 } 315 if (flags == Boundary::END_BOUNDARY) 316 { 317 if (c == hyphen) 318 { 319 state = State::END; 320 } 321 else 322 { 323 flags = Boundary::NON_BOUNDARY; 324 index = 0; 325 } 326 } 327 } 328 329 if (index > 0) 330 { 331 if ((index - 1) >= lookbehind.size()) 332 { 333 // Should never happen, but when it does it won't cause crash 334 return ParserError::ERROR_OUT_OF_RANGE; 335 } 336 lookbehind[index - 1] = c; 337 } 338 else if (prevIndex > 0) 339 { 340 // if our boundary turned out to be rubbish, the captured 341 // lookbehind belongs to partData 342 343 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 344 partDataMark = i; 345 346 // reconsider the current character even so it interrupted 347 // the sequence it could be the beginning of a new sequence 348 i--; 349 } 350 return ParserError::PARSER_SUCCESS; 351 } 352 353 std::string currentHeaderName; 354 std::string currentHeaderValue; 355 356 static constexpr char cr = '\r'; 357 static constexpr char lf = '\n'; 358 static constexpr char space = ' '; 359 static constexpr char hyphen = '-'; 360 static constexpr char colon = ':'; 361 362 std::array<bool, 256> boundaryIndex{}; 363 std::string lookbehind; 364 State state{State::START}; 365 Boundary flags{Boundary::NON_BOUNDARY}; 366 size_t index = 0; 367 size_t partDataMark = 0; 368 size_t headerFieldMark = 0; 369 size_t headerValueMark = 0; 370 }; 371