1 #pragma once 2 3 #include <boost/algorithm/string/predicate.hpp> 4 #include <boost/beast/http/fields.hpp> 5 #include <http_request.hpp> 6 7 #include <string> 8 #include <string_view> 9 10 enum class ParserError 11 { 12 PARSER_SUCCESS, 13 ERROR_BOUNDARY_FORMAT, 14 ERROR_BOUNDARY_CR, 15 ERROR_BOUNDARY_LF, 16 ERROR_BOUNDARY_DATA, 17 ERROR_EMPTY_HEADER, 18 ERROR_HEADER_NAME, 19 ERROR_HEADER_VALUE, 20 ERROR_HEADER_ENDING 21 }; 22 23 enum class State 24 { 25 START, 26 START_BOUNDARY, 27 HEADER_FIELD_START, 28 HEADER_FIELD, 29 HEADER_VALUE_START, 30 HEADER_VALUE, 31 HEADER_VALUE_ALMOST_DONE, 32 HEADERS_ALMOST_DONE, 33 PART_DATA_START, 34 PART_DATA, 35 END 36 }; 37 38 enum class Boundary 39 { 40 NON_BOUNDARY, 41 PART_BOUNDARY, 42 END_BOUNDARY, 43 }; 44 45 struct FormPart 46 { 47 boost::beast::http::fields fields; 48 std::string content; 49 }; 50 51 class MultipartParser 52 { 53 public: 54 MultipartParser() = default; 55 56 [[nodiscard]] ParserError parse(const crow::Request& req) 57 { 58 std::string_view contentType = req.getHeaderValue("content-type"); 59 60 const std::string boundaryFormat = "multipart/form-data; boundary="; 61 if (!boost::starts_with(req.getHeaderValue("content-type"), 62 boundaryFormat)) 63 { 64 return ParserError::ERROR_BOUNDARY_FORMAT; 65 } 66 67 std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 68 69 boundary = "\r\n--"; 70 boundary += ctBoundary; 71 indexBoundary(); 72 lookbehind.resize(boundary.size() + 8); 73 state = State::START; 74 75 const char* buffer = req.body.data(); 76 size_t len = req.body.size(); 77 size_t prevIndex = index; 78 char cl = 0; 79 80 for (size_t i = 0; i < len; i++) 81 { 82 char c = buffer[i]; 83 switch (state) 84 { 85 case State::START: 86 index = 0; 87 state = State::START_BOUNDARY; 88 [[fallthrough]]; 89 case State::START_BOUNDARY: 90 if (index == boundary.size() - 2) 91 { 92 if (c != cr) 93 { 94 return ParserError::ERROR_BOUNDARY_CR; 95 } 96 index++; 97 break; 98 } 99 else if (index - 1 == boundary.size() - 2) 100 { 101 if (c != lf) 102 { 103 return ParserError::ERROR_BOUNDARY_LF; 104 } 105 index = 0; 106 mime_fields.push_back({}); 107 state = State::HEADER_FIELD_START; 108 break; 109 } 110 if (c != boundary[index + 2]) 111 { 112 return ParserError::ERROR_BOUNDARY_DATA; 113 } 114 index++; 115 break; 116 case State::HEADER_FIELD_START: 117 currentHeaderName.resize(0); 118 state = State::HEADER_FIELD; 119 headerFieldMark = i; 120 index = 0; 121 [[fallthrough]]; 122 case State::HEADER_FIELD: 123 if (c == cr) 124 { 125 headerFieldMark = 0; 126 state = State::HEADERS_ALMOST_DONE; 127 break; 128 } 129 130 index++; 131 if (c == hyphen) 132 { 133 break; 134 } 135 136 if (c == colon) 137 { 138 if (index == 1) 139 { 140 return ParserError::ERROR_EMPTY_HEADER; 141 } 142 currentHeaderName.append(buffer + headerFieldMark, 143 i - headerFieldMark); 144 state = State::HEADER_VALUE_START; 145 break; 146 } 147 cl = lower(c); 148 if (cl < 'a' || cl > 'z') 149 { 150 return ParserError::ERROR_HEADER_NAME; 151 } 152 break; 153 case State::HEADER_VALUE_START: 154 if (c == space) 155 { 156 break; 157 } 158 headerValueMark = i; 159 state = State::HEADER_VALUE; 160 [[fallthrough]]; 161 case State::HEADER_VALUE: 162 if (c == cr) 163 { 164 std::string_view value(buffer + headerValueMark, 165 i - headerValueMark); 166 mime_fields.rbegin()->fields.set(currentHeaderName, 167 value); 168 state = State::HEADER_VALUE_ALMOST_DONE; 169 } 170 break; 171 case State::HEADER_VALUE_ALMOST_DONE: 172 if (c != lf) 173 { 174 return ParserError::ERROR_HEADER_VALUE; 175 } 176 state = State::HEADER_FIELD_START; 177 break; 178 case State::HEADERS_ALMOST_DONE: 179 if (c != lf) 180 { 181 return ParserError::ERROR_HEADER_ENDING; 182 } 183 state = State::PART_DATA_START; 184 break; 185 case State::PART_DATA_START: 186 state = State::PART_DATA; 187 partDataMark = i; 188 [[fallthrough]]; 189 case State::PART_DATA: 190 if (index == 0) 191 { 192 skipNonBoundary(buffer, len, boundary.size() - 1, i); 193 c = buffer[i]; 194 } 195 processPartData(prevIndex, index, buffer, i, c, state); 196 break; 197 case State::END: 198 break; 199 } 200 } 201 return ParserError::PARSER_SUCCESS; 202 } 203 std::vector<FormPart> mime_fields; 204 std::string boundary; 205 206 private: 207 void indexBoundary() 208 { 209 std::fill(boundaryIndex.begin(), boundaryIndex.end(), 0); 210 for (const char current : boundary) 211 { 212 boundaryIndex[static_cast<unsigned char>(current)] = true; 213 } 214 } 215 216 char lower(char c) const 217 { 218 return static_cast<char>(c | 0x20); 219 } 220 221 inline bool isBoundaryChar(char c) const 222 { 223 return boundaryIndex[static_cast<unsigned char>(c)]; 224 } 225 226 void skipNonBoundary(const char* buffer, size_t len, size_t boundaryEnd, 227 size_t& i) 228 { 229 // boyer-moore derived algorithm to safely skip non-boundary data 230 while (i + boundary.size() <= len) 231 { 232 if (isBoundaryChar(buffer[i + boundaryEnd])) 233 { 234 break; 235 } 236 i += boundary.size(); 237 } 238 } 239 240 void processPartData(size_t& prevIndex, size_t& index, const char* buffer, 241 size_t& i, char c, State& state) 242 { 243 prevIndex = index; 244 245 if (index < boundary.size()) 246 { 247 if (boundary[index] == c) 248 { 249 if (index == 0) 250 { 251 mime_fields.rbegin()->content += std::string_view( 252 buffer + partDataMark, i - partDataMark); 253 } 254 index++; 255 } 256 else 257 { 258 index = 0; 259 } 260 } 261 else if (index == boundary.size()) 262 { 263 index++; 264 if (c == cr) 265 { 266 // cr = part boundary 267 flags = Boundary::PART_BOUNDARY; 268 } 269 else if (c == hyphen) 270 { 271 // hyphen = end boundary 272 flags = Boundary::END_BOUNDARY; 273 } 274 else 275 { 276 index = 0; 277 } 278 } 279 else 280 { 281 if (flags == Boundary::PART_BOUNDARY) 282 { 283 index = 0; 284 if (c == lf) 285 { 286 // unset the PART_BOUNDARY flag 287 flags = Boundary::NON_BOUNDARY; 288 mime_fields.push_back({}); 289 state = State::HEADER_FIELD_START; 290 return; 291 } 292 } 293 if (flags == Boundary::END_BOUNDARY) 294 { 295 if (c == hyphen) 296 { 297 state = State::END; 298 } 299 } 300 } 301 302 if (index > 0) 303 { 304 lookbehind[index - 1] = c; 305 } 306 else if (prevIndex > 0) 307 { 308 // if our boundary turned out to be rubbish, the captured 309 // lookbehind belongs to partData 310 311 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 312 prevIndex = 0; 313 partDataMark = i; 314 315 // reconsider the current character even so it interrupted 316 // the sequence it could be the beginning of a new sequence 317 i--; 318 } 319 } 320 321 std::string currentHeaderName; 322 std::string currentHeaderValue; 323 324 static constexpr char cr = '\r'; 325 static constexpr char lf = '\n'; 326 static constexpr char space = ' '; 327 static constexpr char hyphen = '-'; 328 static constexpr char colon = ':'; 329 330 std::array<bool, 256> boundaryIndex; 331 std::string lookbehind; 332 State state; 333 Boundary flags; 334 size_t index = 0; 335 size_t partDataMark = 0; 336 size_t headerFieldMark = 0; 337 size_t headerValueMark = 0; 338 }; 339