1 // SPDX-License-Identifier: Apache-2.0 2 // SPDX-FileCopyrightText: Copyright OpenBMC Authors 3 #pragma once 4 5 #include "http_request.hpp" 6 7 #include <boost/beast/http/fields.hpp> 8 9 #include <algorithm> 10 #include <array> 11 #include <cstddef> 12 #include <ranges> 13 #include <string> 14 #include <string_view> 15 #include <vector> 16 17 enum class ParserError 18 { 19 PARSER_SUCCESS, 20 ERROR_BOUNDARY_FORMAT, 21 ERROR_BOUNDARY_CR, 22 ERROR_BOUNDARY_LF, 23 ERROR_BOUNDARY_DATA, 24 ERROR_EMPTY_HEADER, 25 ERROR_HEADER_NAME, 26 ERROR_HEADER_VALUE, 27 ERROR_HEADER_ENDING, 28 ERROR_UNEXPECTED_END_OF_HEADER, 29 ERROR_UNEXPECTED_END_OF_INPUT, 30 ERROR_OUT_OF_RANGE 31 }; 32 33 enum class State 34 { 35 START, 36 START_BOUNDARY, 37 HEADER_FIELD_START, 38 HEADER_FIELD, 39 HEADER_VALUE_START, 40 HEADER_VALUE, 41 HEADER_VALUE_ALMOST_DONE, 42 HEADERS_ALMOST_DONE, 43 PART_DATA_START, 44 PART_DATA, 45 END 46 }; 47 48 enum class Boundary 49 { 50 NON_BOUNDARY, 51 PART_BOUNDARY, 52 END_BOUNDARY, 53 }; 54 55 struct FormPart 56 { 57 boost::beast::http::fields fields; 58 std::string content; 59 }; 60 61 class MultipartParser 62 { 63 public: 64 MultipartParser() = default; 65 parse(const crow::Request & req)66 [[nodiscard]] ParserError parse(const crow::Request& req) 67 { 68 std::string_view contentType = req.getHeaderValue("content-type"); 69 70 const std::string boundaryFormat = "multipart/form-data; boundary="; 71 if (!contentType.starts_with(boundaryFormat)) 72 { 73 return ParserError::ERROR_BOUNDARY_FORMAT; 74 } 75 76 std::string_view ctBoundary = contentType.substr(boundaryFormat.size()); 77 78 boundary = "\r\n--"; 79 boundary += ctBoundary; 80 indexBoundary(); 81 lookbehind.resize(boundary.size() + 8); 82 state = State::START; 83 84 const std::string& buffer = req.body(); 85 size_t len = buffer.size(); 86 char cl = 0; 87 88 for (size_t i = 0; i < len; i++) 89 { 90 char c = buffer[i]; 91 switch (state) 92 { 93 case State::START: 94 index = 0; 95 state = State::START_BOUNDARY; 96 [[fallthrough]]; 97 case State::START_BOUNDARY: 98 if (index == boundary.size() - 2) 99 { 100 if (c != cr) 101 { 102 return ParserError::ERROR_BOUNDARY_CR; 103 } 104 index++; 105 break; 106 } 107 else if (index - 1 == boundary.size() - 2) 108 { 109 if (c != lf) 110 { 111 return ParserError::ERROR_BOUNDARY_LF; 112 } 113 index = 0; 114 mime_fields.emplace_back(); 115 state = State::HEADER_FIELD_START; 116 break; 117 } 118 if (c != boundary[index + 2]) 119 { 120 return ParserError::ERROR_BOUNDARY_DATA; 121 } 122 index++; 123 break; 124 case State::HEADER_FIELD_START: 125 currentHeaderName.resize(0); 126 state = State::HEADER_FIELD; 127 headerFieldMark = i; 128 index = 0; 129 [[fallthrough]]; 130 case State::HEADER_FIELD: 131 if (c == cr) 132 { 133 headerFieldMark = 0; 134 state = State::HEADERS_ALMOST_DONE; 135 break; 136 } 137 138 index++; 139 if (c == hyphen) 140 { 141 break; 142 } 143 144 if (c == colon) 145 { 146 if (index == 1) 147 { 148 return ParserError::ERROR_EMPTY_HEADER; 149 } 150 151 currentHeaderName.append(&buffer[headerFieldMark], 152 i - headerFieldMark); 153 state = State::HEADER_VALUE_START; 154 break; 155 } 156 cl = lower(c); 157 if (cl < 'a' || cl > 'z') 158 { 159 return ParserError::ERROR_HEADER_NAME; 160 } 161 break; 162 case State::HEADER_VALUE_START: 163 if (c == space) 164 { 165 break; 166 } 167 headerValueMark = i; 168 state = State::HEADER_VALUE; 169 [[fallthrough]]; 170 case State::HEADER_VALUE: 171 if (c == cr) 172 { 173 std::string_view value(&buffer[headerValueMark], 174 i - headerValueMark); 175 mime_fields.rbegin()->fields.set(currentHeaderName, 176 value); 177 state = State::HEADER_VALUE_ALMOST_DONE; 178 } 179 break; 180 case State::HEADER_VALUE_ALMOST_DONE: 181 if (c != lf) 182 { 183 return ParserError::ERROR_HEADER_VALUE; 184 } 185 state = State::HEADER_FIELD_START; 186 break; 187 case State::HEADERS_ALMOST_DONE: 188 if (c != lf) 189 { 190 return ParserError::ERROR_HEADER_ENDING; 191 } 192 if (index > 0) 193 { 194 return ParserError::ERROR_UNEXPECTED_END_OF_HEADER; 195 } 196 state = State::PART_DATA_START; 197 break; 198 case State::PART_DATA_START: 199 state = State::PART_DATA; 200 partDataMark = i; 201 [[fallthrough]]; 202 case State::PART_DATA: 203 { 204 if (index == 0) 205 { 206 skipNonBoundary(buffer, boundary.size() - 1, i); 207 c = buffer[i]; 208 } 209 if (auto ec = processPartData(buffer, i, c); 210 ec != ParserError::PARSER_SUCCESS) 211 { 212 return ec; 213 } 214 break; 215 } 216 case State::END: 217 break; 218 default: 219 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 220 } 221 } 222 223 if (state != State::END) 224 { 225 return ParserError::ERROR_UNEXPECTED_END_OF_INPUT; 226 } 227 228 return ParserError::PARSER_SUCCESS; 229 } 230 std::vector<FormPart> mime_fields; 231 std::string boundary; 232 233 private: indexBoundary()234 void indexBoundary() 235 { 236 std::ranges::fill(boundaryIndex, 0); 237 for (const char current : boundary) 238 { 239 boundaryIndex[static_cast<unsigned char>(current)] = true; 240 } 241 } 242 lower(char c)243 static char lower(char c) 244 { 245 return static_cast<char>(c | 0x20); 246 } 247 isBoundaryChar(char c) const248 bool isBoundaryChar(char c) const 249 { 250 return boundaryIndex[static_cast<unsigned char>(c)]; 251 } 252 skipNonBoundary(const std::string & buffer,size_t boundaryEnd,size_t & i)253 void skipNonBoundary(const std::string& buffer, size_t boundaryEnd, 254 size_t& i) 255 { 256 // boyer-moore derived algorithm to safely skip non-boundary data 257 while (i + boundary.size() <= buffer.length()) 258 { 259 if (isBoundaryChar(buffer[i + boundaryEnd])) 260 { 261 break; 262 } 263 i += boundary.size(); 264 } 265 } 266 processPartData(const std::string & buffer,size_t & i,char c)267 ParserError processPartData(const std::string& buffer, size_t& i, char c) 268 { 269 size_t prevIndex = index; 270 271 if (index < boundary.size()) 272 { 273 if (boundary[index] == c) 274 { 275 if (index == 0) 276 { 277 const char* start = &buffer[partDataMark]; 278 size_t size = i - partDataMark; 279 mime_fields.rbegin()->content += 280 std::string_view(start, size); 281 } 282 index++; 283 } 284 else 285 { 286 index = 0; 287 } 288 } 289 else if (index == boundary.size()) 290 { 291 index++; 292 if (c == cr) 293 { 294 // cr = part boundary 295 flags = Boundary::PART_BOUNDARY; 296 } 297 else if (c == hyphen) 298 { 299 // hyphen = end boundary 300 flags = Boundary::END_BOUNDARY; 301 } 302 else 303 { 304 index = 0; 305 } 306 } 307 else 308 { 309 if (flags == Boundary::PART_BOUNDARY) 310 { 311 index = 0; 312 if (c == lf) 313 { 314 // unset the PART_BOUNDARY flag 315 flags = Boundary::NON_BOUNDARY; 316 mime_fields.emplace_back(); 317 state = State::HEADER_FIELD_START; 318 return ParserError::PARSER_SUCCESS; 319 } 320 } 321 if (flags == Boundary::END_BOUNDARY) 322 { 323 if (c == hyphen) 324 { 325 state = State::END; 326 } 327 else 328 { 329 flags = Boundary::NON_BOUNDARY; 330 index = 0; 331 } 332 } 333 } 334 335 if (index > 0) 336 { 337 if ((index - 1) >= lookbehind.size()) 338 { 339 // Should never happen, but when it does it won't cause crash 340 return ParserError::ERROR_OUT_OF_RANGE; 341 } 342 lookbehind[index - 1] = c; 343 } 344 else if (prevIndex > 0) 345 { 346 // if our boundary turned out to be rubbish, the captured 347 // lookbehind belongs to partData 348 349 mime_fields.rbegin()->content += lookbehind.substr(0, prevIndex); 350 partDataMark = i; 351 352 // reconsider the current character even so it interrupted 353 // the sequence it could be the beginning of a new sequence 354 i--; 355 } 356 return ParserError::PARSER_SUCCESS; 357 } 358 359 std::string currentHeaderName; 360 std::string currentHeaderValue; 361 362 static constexpr char cr = '\r'; 363 static constexpr char lf = '\n'; 364 static constexpr char space = ' '; 365 static constexpr char hyphen = '-'; 366 static constexpr char colon = ':'; 367 368 std::array<bool, 256> boundaryIndex{}; 369 std::string lookbehind; 370 State state{State::START}; 371 Boundary flags{Boundary::NON_BOUNDARY}; 372 size_t index = 0; 373 size_t partDataMark = 0; 374 size_t headerFieldMark = 0; 375 size_t headerValueMark = 0; 376 }; 377