1 #pragma once 2 3 #include <boost/beast/http/fields.hpp> 4 #include <http_request.hpp> 5 6 #include <string> 7 #include <string_view> 8 9 enum class ParserError 10 { 11 PARSER_SUCCESS, 12 ERROR_BOUNDARY_FORMAT, 13 ERROR_BOUNDARY_CR, 14 ERROR_BOUNDARY_LF, 15 ERROR_BOUNDARY_DATA, 16 ERROR_EMPTY_HEADER, 17 ERROR_HEADER_NAME, 18 ERROR_HEADER_VALUE, 19 ERROR_HEADER_ENDING 20 }; 21 22 enum class State 23 { 24 START, 25 START_BOUNDARY, 26 HEADER_FIELD_START, 27 HEADER_FIELD, 28 HEADER_VALUE_START, 29 HEADER_VALUE, 30 HEADER_VALUE_ALMOST_DONE, 31 HEADERS_ALMOST_DONE, 32 PART_DATA_START, 33 PART_DATA, 34 END 35 }; 36 37 enum class Boundary 38 { 39 NON_BOUNDARY, 40 PART_BOUNDARY, 41 END_BOUNDARY, 42 }; 43 44 struct FormPart 45 { 46 boost::beast::http::fields fields; 47 std::string content; 48 }; 49 50 class MultipartParser 51 { 52 public: 53 MultipartParser() = default; 54 55 [[nodiscard]] ParserError parse(const crow::Request& req) 56 { 57 std::string_view contentType = req.getHeaderValue("content-type"); 58 59 const std::string boundaryFormat = "multipart/form-data; boundary="; 60 if (!contentType.starts_with(boundaryFormat)) 61 { 62 return ParserError::ERROR_BOUNDARY_FORMAT; 63 } 64 65 std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 66 67 boundary = "\r\n--"; 68 boundary += ctBoundary; 69 indexBoundary(); 70 lookbehind.resize(boundary.size() + 8); 71 state = State::START; 72 73 const char* buffer = req.body.data(); 74 size_t len = req.body.size(); 75 size_t prevIndex = index; 76 char cl = 0; 77 78 for (size_t i = 0; i < len; i++) 79 { 80 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 81 char c = buffer[i]; 82 switch (state) 83 { 84 case State::START: 85 index = 0; 86 state = State::START_BOUNDARY; 87 [[fallthrough]]; 88 case State::START_BOUNDARY: 89 if (index == boundary.size() - 2) 90 { 91 if (c != cr) 92 { 93 return ParserError::ERROR_BOUNDARY_CR; 94 } 95 index++; 96 break; 97 } 98 else if (index - 1 == boundary.size() - 2) 99 { 100 if (c != lf) 101 { 102 return ParserError::ERROR_BOUNDARY_LF; 103 } 104 index = 0; 105 mime_fields.push_back({}); 106 state = State::HEADER_FIELD_START; 107 break; 108 } 109 if (c != boundary[index + 2]) 110 { 111 return ParserError::ERROR_BOUNDARY_DATA; 112 } 113 index++; 114 break; 115 case State::HEADER_FIELD_START: 116 currentHeaderName.resize(0); 117 state = State::HEADER_FIELD; 118 headerFieldMark = i; 119 index = 0; 120 [[fallthrough]]; 121 case State::HEADER_FIELD: 122 if (c == cr) 123 { 124 headerFieldMark = 0; 125 state = State::HEADERS_ALMOST_DONE; 126 break; 127 } 128 129 index++; 130 if (c == hyphen) 131 { 132 break; 133 } 134 135 if (c == colon) 136 { 137 if (index == 1) 138 { 139 return ParserError::ERROR_EMPTY_HEADER; 140 } 141 142 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 143 currentHeaderName.append(buffer + headerFieldMark, 144 i - headerFieldMark); 145 state = State::HEADER_VALUE_START; 146 break; 147 } 148 cl = lower(c); 149 if (cl < 'a' || cl > 'z') 150 { 151 return ParserError::ERROR_HEADER_NAME; 152 } 153 break; 154 case State::HEADER_VALUE_START: 155 if (c == space) 156 { 157 break; 158 } 159 headerValueMark = i; 160 state = State::HEADER_VALUE; 161 [[fallthrough]]; 162 case State::HEADER_VALUE: 163 if (c == cr) 164 { 165 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 166 std::string_view value(buffer + headerValueMark, 167 i - headerValueMark); 168 mime_fields.rbegin()->fields.set(currentHeaderName, 169 value); 170 state = State::HEADER_VALUE_ALMOST_DONE; 171 } 172 break; 173 case State::HEADER_VALUE_ALMOST_DONE: 174 if (c != lf) 175 { 176 return ParserError::ERROR_HEADER_VALUE; 177 } 178 state = State::HEADER_FIELD_START; 179 break; 180 case State::HEADERS_ALMOST_DONE: 181 if (c != lf) 182 { 183 return ParserError::ERROR_HEADER_ENDING; 184 } 185 state = State::PART_DATA_START; 186 break; 187 case State::PART_DATA_START: 188 state = State::PART_DATA; 189 partDataMark = i; 190 [[fallthrough]]; 191 case State::PART_DATA: 192 if (index == 0) 193 { 194 skipNonBoundary(buffer, len, boundary.size() - 1, i); 195 196 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 197 c = buffer[i]; 198 } 199 processPartData(prevIndex, buffer, i, c); 200 break; 201 case State::END: 202 break; 203 } 204 } 205 return ParserError::PARSER_SUCCESS; 206 } 207 std::vector<FormPart> mime_fields; 208 std::string boundary; 209 210 private: 211 void indexBoundary() 212 { 213 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0); 214 for (const char current : boundary) 215 { 216 boundaryIndex[static_cast<unsigned char>(current)] = true; 217 } 218 } 219 220 static char lower(char c) 221 { 222 return static_cast<char>(c | 0x20); 223 } 224 225 inline bool isBoundaryChar(char c) const 226 { 227 return boundaryIndex[static_cast<unsigned char>(c)]; 228 } 229 230 void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd, 231 size_t& i) 232 { 233 // boyer-moore derived algorithm to safely skip non-boundary data 234 while (i + boundary.size() <= len) 235 { 236 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 237 if (isBoundaryChar(buffer[i + boundaryEnd])) 238 { 239 break; 240 } 241 i += boundary.size(); 242 } 243 } 244 245 void processPartData(size_t& prevIndex, const char* buffer, size_t& i, 246 char c) 247 { 248 prevIndex = index; 249 250 if (index < boundary.size()) 251 { 252 if (boundary[index] == c) 253 { 254 if (index == 0) 255 { 256 // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) 257 const char* start = buffer + partDataMark; 258 size_t size = i - partDataMark; 259 mime_fields.rbegin()->content += 260 std::string_view(start, size); 261 } 262 index++; 263 } 264 else 265 { 266 index = 0; 267 } 268 } 269 else if (index == boundary.size()) 270 { 271 index++; 272 if (c == cr) 273 { 274 // cr = part boundary 275 flags = Boundary::PART_BOUNDARY; 276 } 277 else if (c == hyphen) 278 { 279 // hyphen = end boundary 280 flags = Boundary::END_BOUNDARY; 281 } 282 else 283 { 284 index = 0; 285 } 286 } 287 else 288 { 289 if (flags == Boundary::PART_BOUNDARY) 290 { 291 index = 0; 292 if (c == lf) 293 { 294 // unset the PART_BOUNDARY flag 295 flags = Boundary::NON_BOUNDARY; 296 mime_fields.push_back({}); 297 state = State::HEADER_FIELD_START; 298 return; 299 } 300 } 301 if (flags == Boundary::END_BOUNDARY) 302 { 303 if (c == hyphen) 304 { 305 state = State::END; 306 } 307 } 308 } 309 310 if (index > 0) 311 { 312 lookbehind[index - 1] = c; 313 } 314 else if (prevIndex > 0) 315 { 316 // if our boundary turned out to be rubbish, the captured 317 // lookbehind belongs to partData 318 319 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 320 prevIndex = 0; 321 partDataMark = i; 322 323 // reconsider the current character even so it interrupted 324 // the sequence it could be the beginning of a new sequence 325 i--; 326 } 327 } 328 329 std::string currentHeaderName; 330 std::string currentHeaderValue; 331 332 static constexpr char cr = '\r'; 333 static constexpr char lf = '\n'; 334 static constexpr char space = ' '; 335 static constexpr char hyphen = '-'; 336 static constexpr char colon = ':'; 337 338 std::array<bool, 256> boundaryIndex{}; 339 std::string lookbehind; 340 State state{State::START}; 341 Boundary flags{Boundary::NON_BOUNDARY}; 342 size_t index = 0; 343 size_t partDataMark = 0; 344 size_t headerFieldMark = 0; 345 size_t headerValueMark = 0; 346 }; 347