1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * The base64 encode/decode code was copied from fscrypt: 4 * Copyright (C) 2015, Google, Inc. 5 * Copyright (C) 2015, Motorola Mobility 6 * Written by Uday Savagaonkar, 2014. 7 * Modified by Jaegeuk Kim, 2015. 8 */ 9 #include <linux/ceph/ceph_debug.h> 10 #include <linux/xattr.h> 11 #include <linux/fscrypt.h> 12 #include <linux/ceph/striper.h> 13 14 #include "super.h" 15 #include "mds_client.h" 16 #include "crypto.h" 17 18 /* 19 * The base64url encoding used by fscrypt includes the '_' character, which may 20 * cause problems in snapshot names (which can not start with '_'). Thus, we 21 * used the base64 encoding defined for IMAP mailbox names (RFC 3501) instead, 22 * which replaces '-' and '_' by '+' and ','. 23 */ 24 static const char base64_table[65] = 25 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; 26 27 int ceph_base64_encode(const u8 *src, int srclen, char *dst) 28 { 29 u32 ac = 0; 30 int bits = 0; 31 int i; 32 char *cp = dst; 33 34 for (i = 0; i < srclen; i++) { 35 ac = (ac << 8) | src[i]; 36 bits += 8; 37 do { 38 bits -= 6; 39 *cp++ = base64_table[(ac >> bits) & 0x3f]; 40 } while (bits >= 6); 41 } 42 if (bits) 43 *cp++ = base64_table[(ac << (6 - bits)) & 0x3f]; 44 return cp - dst; 45 } 46 47 int ceph_base64_decode(const char *src, int srclen, u8 *dst) 48 { 49 u32 ac = 0; 50 int bits = 0; 51 int i; 52 u8 *bp = dst; 53 54 for (i = 0; i < srclen; i++) { 55 const char *p = strchr(base64_table, src[i]); 56 57 if (p == NULL || src[i] == 0) 58 return -1; 59 ac = (ac << 6) | (p - base64_table); 60 bits += 6; 61 if (bits >= 8) { 62 bits -= 8; 63 *bp++ = (u8)(ac >> bits); 64 } 65 } 66 if (ac & ((1 << bits) - 1)) 67 return -1; 68 return bp - dst; 69 } 70 71 static int ceph_crypt_get_context(struct inode *inode, void *ctx, size_t len) 72 { 73 struct ceph_inode_info *ci = ceph_inode(inode); 74 struct ceph_fscrypt_auth *cfa = (struct ceph_fscrypt_auth *)ci->fscrypt_auth; 75 u32 ctxlen; 76 77 /* Non existent or too short? */ 78 if (!cfa || (ci->fscrypt_auth_len < (offsetof(struct ceph_fscrypt_auth, cfa_blob) + 1))) 79 return -ENOBUFS; 80 81 /* Some format we don't recognize? */ 82 if (le32_to_cpu(cfa->cfa_version) != CEPH_FSCRYPT_AUTH_VERSION) 83 return -ENOBUFS; 84 85 ctxlen = le32_to_cpu(cfa->cfa_blob_len); 86 if (len < ctxlen) 87 return -ERANGE; 88 89 memcpy(ctx, cfa->cfa_blob, ctxlen); 90 return ctxlen; 91 } 92 93 static int ceph_crypt_set_context(struct inode *inode, const void *ctx, 94 size_t len, void *fs_data) 95 { 96 int ret; 97 struct iattr attr = { }; 98 struct ceph_iattr cia = { }; 99 struct ceph_fscrypt_auth *cfa; 100 101 WARN_ON_ONCE(fs_data); 102 103 if (len > FSCRYPT_SET_CONTEXT_MAX_SIZE) 104 return -EINVAL; 105 106 cfa = kzalloc(sizeof(*cfa), GFP_KERNEL); 107 if (!cfa) 108 return -ENOMEM; 109 110 cfa->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION); 111 cfa->cfa_blob_len = cpu_to_le32(len); 112 memcpy(cfa->cfa_blob, ctx, len); 113 114 cia.fscrypt_auth = cfa; 115 116 ret = __ceph_setattr(inode, &attr, &cia); 117 if (ret == 0) 118 inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED); 119 kfree(cia.fscrypt_auth); 120 return ret; 121 } 122 123 static bool ceph_crypt_empty_dir(struct inode *inode) 124 { 125 struct ceph_inode_info *ci = ceph_inode(inode); 126 127 return ci->i_rsubdirs + ci->i_rfiles == 1; 128 } 129 130 static const union fscrypt_policy *ceph_get_dummy_policy(struct super_block *sb) 131 { 132 return ceph_sb_to_client(sb)->fsc_dummy_enc_policy.policy; 133 } 134 135 static struct fscrypt_operations ceph_fscrypt_ops = { 136 .get_context = ceph_crypt_get_context, 137 .set_context = ceph_crypt_set_context, 138 .get_dummy_policy = ceph_get_dummy_policy, 139 .empty_dir = ceph_crypt_empty_dir, 140 }; 141 142 void ceph_fscrypt_set_ops(struct super_block *sb) 143 { 144 fscrypt_set_ops(sb, &ceph_fscrypt_ops); 145 } 146 147 void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc) 148 { 149 fscrypt_free_dummy_policy(&fsc->fsc_dummy_enc_policy); 150 } 151 152 int ceph_fscrypt_prepare_context(struct inode *dir, struct inode *inode, 153 struct ceph_acl_sec_ctx *as) 154 { 155 int ret, ctxsize; 156 bool encrypted = false; 157 struct ceph_inode_info *ci = ceph_inode(inode); 158 159 ret = fscrypt_prepare_new_inode(dir, inode, &encrypted); 160 if (ret) 161 return ret; 162 if (!encrypted) 163 return 0; 164 165 as->fscrypt_auth = kzalloc(sizeof(*as->fscrypt_auth), GFP_KERNEL); 166 if (!as->fscrypt_auth) 167 return -ENOMEM; 168 169 ctxsize = fscrypt_context_for_new_inode(as->fscrypt_auth->cfa_blob, 170 inode); 171 if (ctxsize < 0) 172 return ctxsize; 173 174 as->fscrypt_auth->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION); 175 as->fscrypt_auth->cfa_blob_len = cpu_to_le32(ctxsize); 176 177 WARN_ON_ONCE(ci->fscrypt_auth); 178 kfree(ci->fscrypt_auth); 179 ci->fscrypt_auth_len = ceph_fscrypt_auth_len(as->fscrypt_auth); 180 ci->fscrypt_auth = kmemdup(as->fscrypt_auth, ci->fscrypt_auth_len, 181 GFP_KERNEL); 182 if (!ci->fscrypt_auth) 183 return -ENOMEM; 184 185 inode->i_flags |= S_ENCRYPTED; 186 187 return 0; 188 } 189 190 void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req, 191 struct ceph_acl_sec_ctx *as) 192 { 193 swap(req->r_fscrypt_auth, as->fscrypt_auth); 194 } 195 196 /* 197 * User-created snapshots can't start with '_'. Snapshots that start with this 198 * character are special (hint: there aren't real snapshots) and use the 199 * following format: 200 * 201 * _<SNAPSHOT-NAME>_<INODE-NUMBER> 202 * 203 * where: 204 * - <SNAPSHOT-NAME> - the real snapshot name that may need to be decrypted, 205 * - <INODE-NUMBER> - the inode number (in decimal) for the actual snapshot 206 * 207 * This function parses these snapshot names and returns the inode 208 * <INODE-NUMBER>. 'name_len' will also bet set with the <SNAPSHOT-NAME> 209 * length. 210 */ 211 static struct inode *parse_longname(const struct inode *parent, 212 const char *name, int *name_len) 213 { 214 struct inode *dir = NULL; 215 struct ceph_vino vino = { .snap = CEPH_NOSNAP }; 216 char *inode_number; 217 char *name_end; 218 int orig_len = *name_len; 219 int ret = -EIO; 220 221 /* Skip initial '_' */ 222 name++; 223 name_end = strrchr(name, '_'); 224 if (!name_end) { 225 dout("Failed to parse long snapshot name: %s\n", name); 226 return ERR_PTR(-EIO); 227 } 228 *name_len = (name_end - name); 229 if (*name_len <= 0) { 230 pr_err("Failed to parse long snapshot name\n"); 231 return ERR_PTR(-EIO); 232 } 233 234 /* Get the inode number */ 235 inode_number = kmemdup_nul(name_end + 1, 236 orig_len - *name_len - 2, 237 GFP_KERNEL); 238 if (!inode_number) 239 return ERR_PTR(-ENOMEM); 240 ret = kstrtou64(inode_number, 10, &vino.ino); 241 if (ret) { 242 dout("Failed to parse inode number: %s\n", name); 243 dir = ERR_PTR(ret); 244 goto out; 245 } 246 247 /* And finally the inode */ 248 dir = ceph_find_inode(parent->i_sb, vino); 249 if (!dir) { 250 /* This can happen if we're not mounting cephfs on the root */ 251 dir = ceph_get_inode(parent->i_sb, vino, NULL); 252 if (IS_ERR(dir)) 253 dout("Can't find inode %s (%s)\n", inode_number, name); 254 } 255 256 out: 257 kfree(inode_number); 258 return dir; 259 } 260 261 int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name, 262 char *buf) 263 { 264 struct inode *dir = parent; 265 struct qstr iname; 266 u32 len; 267 int name_len; 268 int elen; 269 int ret; 270 u8 *cryptbuf = NULL; 271 272 iname.name = d_name->name; 273 name_len = d_name->len; 274 275 /* Handle the special case of snapshot names that start with '_' */ 276 if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) && 277 (iname.name[0] == '_')) { 278 dir = parse_longname(parent, iname.name, &name_len); 279 if (IS_ERR(dir)) 280 return PTR_ERR(dir); 281 iname.name++; /* skip initial '_' */ 282 } 283 iname.len = name_len; 284 285 if (!fscrypt_has_encryption_key(dir)) { 286 memcpy(buf, d_name->name, d_name->len); 287 elen = d_name->len; 288 goto out; 289 } 290 291 /* 292 * Convert cleartext d_name to ciphertext. If result is longer than 293 * CEPH_NOHASH_NAME_MAX, sha256 the remaining bytes 294 * 295 * See: fscrypt_setup_filename 296 */ 297 if (!fscrypt_fname_encrypted_size(dir, iname.len, NAME_MAX, &len)) { 298 elen = -ENAMETOOLONG; 299 goto out; 300 } 301 302 /* Allocate a buffer appropriate to hold the result */ 303 cryptbuf = kmalloc(len > CEPH_NOHASH_NAME_MAX ? NAME_MAX : len, 304 GFP_KERNEL); 305 if (!cryptbuf) { 306 elen = -ENOMEM; 307 goto out; 308 } 309 310 ret = fscrypt_fname_encrypt(dir, &iname, cryptbuf, len); 311 if (ret) { 312 elen = ret; 313 goto out; 314 } 315 316 /* hash the end if the name is long enough */ 317 if (len > CEPH_NOHASH_NAME_MAX) { 318 u8 hash[SHA256_DIGEST_SIZE]; 319 u8 *extra = cryptbuf + CEPH_NOHASH_NAME_MAX; 320 321 /* 322 * hash the extra bytes and overwrite crypttext beyond that 323 * point with it 324 */ 325 sha256(extra, len - CEPH_NOHASH_NAME_MAX, hash); 326 memcpy(extra, hash, SHA256_DIGEST_SIZE); 327 len = CEPH_NOHASH_NAME_MAX + SHA256_DIGEST_SIZE; 328 } 329 330 /* base64 encode the encrypted name */ 331 elen = ceph_base64_encode(cryptbuf, len, buf); 332 dout("base64-encoded ciphertext name = %.*s\n", elen, buf); 333 334 /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */ 335 WARN_ON(elen > 240); 336 if ((elen > 0) && (dir != parent)) { 337 char tmp_buf[NAME_MAX]; 338 339 elen = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld", 340 elen, buf, dir->i_ino); 341 memcpy(buf, tmp_buf, elen); 342 } 343 344 out: 345 kfree(cryptbuf); 346 if (dir != parent) { 347 if ((dir->i_state & I_NEW)) 348 discard_new_inode(dir); 349 else 350 iput(dir); 351 } 352 return elen; 353 } 354 355 int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry, 356 char *buf) 357 { 358 WARN_ON_ONCE(!fscrypt_has_encryption_key(parent)); 359 360 return ceph_encode_encrypted_dname(parent, &dentry->d_name, buf); 361 } 362 363 /** 364 * ceph_fname_to_usr - convert a filename for userland presentation 365 * @fname: ceph_fname to be converted 366 * @tname: temporary name buffer to use for conversion (may be NULL) 367 * @oname: where converted name should be placed 368 * @is_nokey: set to true if key wasn't available during conversion (may be NULL) 369 * 370 * Given a filename (usually from the MDS), format it for presentation to 371 * userland. If @parent is not encrypted, just pass it back as-is. 372 * 373 * Otherwise, base64 decode the string, and then ask fscrypt to format it 374 * for userland presentation. 375 * 376 * Returns 0 on success or negative error code on error. 377 */ 378 int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname, 379 struct fscrypt_str *oname, bool *is_nokey) 380 { 381 struct inode *dir = fname->dir; 382 struct fscrypt_str _tname = FSTR_INIT(NULL, 0); 383 struct fscrypt_str iname; 384 char *name = fname->name; 385 int name_len = fname->name_len; 386 int ret; 387 388 /* Sanity check that the resulting name will fit in the buffer */ 389 if (fname->name_len > NAME_MAX || fname->ctext_len > NAME_MAX) 390 return -EIO; 391 392 /* Handle the special case of snapshot names that start with '_' */ 393 if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) && 394 (name[0] == '_')) { 395 dir = parse_longname(dir, name, &name_len); 396 if (IS_ERR(dir)) 397 return PTR_ERR(dir); 398 name++; /* skip initial '_' */ 399 } 400 401 if (!IS_ENCRYPTED(dir)) { 402 oname->name = fname->name; 403 oname->len = fname->name_len; 404 ret = 0; 405 goto out_inode; 406 } 407 408 ret = ceph_fscrypt_prepare_readdir(dir); 409 if (ret) 410 goto out_inode; 411 412 /* 413 * Use the raw dentry name as sent by the MDS instead of 414 * generating a nokey name via fscrypt. 415 */ 416 if (!fscrypt_has_encryption_key(dir)) { 417 if (fname->no_copy) 418 oname->name = fname->name; 419 else 420 memcpy(oname->name, fname->name, fname->name_len); 421 oname->len = fname->name_len; 422 if (is_nokey) 423 *is_nokey = true; 424 ret = 0; 425 goto out_inode; 426 } 427 428 if (fname->ctext_len == 0) { 429 int declen; 430 431 if (!tname) { 432 ret = fscrypt_fname_alloc_buffer(NAME_MAX, &_tname); 433 if (ret) 434 goto out_inode; 435 tname = &_tname; 436 } 437 438 declen = ceph_base64_decode(name, name_len, tname->name); 439 if (declen <= 0) { 440 ret = -EIO; 441 goto out; 442 } 443 iname.name = tname->name; 444 iname.len = declen; 445 } else { 446 iname.name = fname->ctext; 447 iname.len = fname->ctext_len; 448 } 449 450 ret = fscrypt_fname_disk_to_usr(dir, 0, 0, &iname, oname); 451 if (!ret && (dir != fname->dir)) { 452 char tmp_buf[CEPH_BASE64_CHARS(NAME_MAX)]; 453 454 name_len = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld", 455 oname->len, oname->name, dir->i_ino); 456 memcpy(oname->name, tmp_buf, name_len); 457 oname->len = name_len; 458 } 459 460 out: 461 fscrypt_fname_free_buffer(&_tname); 462 out_inode: 463 if ((dir != fname->dir) && !IS_ERR(dir)) { 464 if ((dir->i_state & I_NEW)) 465 discard_new_inode(dir); 466 else 467 iput(dir); 468 } 469 return ret; 470 } 471 472 /** 473 * ceph_fscrypt_prepare_readdir - simple __fscrypt_prepare_readdir() wrapper 474 * @dir: directory inode for readdir prep 475 * 476 * Simple wrapper around __fscrypt_prepare_readdir() that will mark directory as 477 * non-complete if this call results in having the directory unlocked. 478 * 479 * Returns: 480 * 1 - if directory was locked and key is now loaded (i.e. dir is unlocked) 481 * 0 - if directory is still locked 482 * < 0 - if __fscrypt_prepare_readdir() fails 483 */ 484 int ceph_fscrypt_prepare_readdir(struct inode *dir) 485 { 486 bool had_key = fscrypt_has_encryption_key(dir); 487 int err; 488 489 if (!IS_ENCRYPTED(dir)) 490 return 0; 491 492 err = __fscrypt_prepare_readdir(dir); 493 if (err) 494 return err; 495 if (!had_key && fscrypt_has_encryption_key(dir)) { 496 /* directory just got unlocked, mark it as not complete */ 497 ceph_dir_clear_complete(dir); 498 return 1; 499 } 500 return 0; 501 } 502 503 int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode, 504 struct page *page, unsigned int len, 505 unsigned int offs, u64 lblk_num) 506 { 507 dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num); 508 return fscrypt_decrypt_block_inplace(inode, page, len, offs, lblk_num); 509 } 510 511 int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode, 512 struct page *page, unsigned int len, 513 unsigned int offs, u64 lblk_num, 514 gfp_t gfp_flags) 515 { 516 dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num); 517 return fscrypt_encrypt_block_inplace(inode, page, len, offs, lblk_num, 518 gfp_flags); 519 } 520 521 /** 522 * ceph_fscrypt_decrypt_pages - decrypt an array of pages 523 * @inode: pointer to inode associated with these pages 524 * @page: pointer to page array 525 * @off: offset into the file that the read data starts 526 * @len: max length to decrypt 527 * 528 * Decrypt an array of fscrypt'ed pages and return the amount of 529 * data decrypted. Any data in the page prior to the start of the 530 * first complete block in the read is ignored. Any incomplete 531 * crypto blocks at the end of the array are ignored (and should 532 * probably be zeroed by the caller). 533 * 534 * Returns the length of the decrypted data or a negative errno. 535 */ 536 int ceph_fscrypt_decrypt_pages(struct inode *inode, struct page **page, 537 u64 off, int len) 538 { 539 int i, num_blocks; 540 u64 baseblk = off >> CEPH_FSCRYPT_BLOCK_SHIFT; 541 int ret = 0; 542 543 /* 544 * We can't deal with partial blocks on an encrypted file, so mask off 545 * the last bit. 546 */ 547 num_blocks = ceph_fscrypt_blocks(off, len & CEPH_FSCRYPT_BLOCK_MASK); 548 549 /* Decrypt each block */ 550 for (i = 0; i < num_blocks; ++i) { 551 int blkoff = i << CEPH_FSCRYPT_BLOCK_SHIFT; 552 int pgidx = blkoff >> PAGE_SHIFT; 553 unsigned int pgoffs = offset_in_page(blkoff); 554 int fret; 555 556 fret = ceph_fscrypt_decrypt_block_inplace(inode, page[pgidx], 557 CEPH_FSCRYPT_BLOCK_SIZE, pgoffs, 558 baseblk + i); 559 if (fret < 0) { 560 if (ret == 0) 561 ret = fret; 562 break; 563 } 564 ret += CEPH_FSCRYPT_BLOCK_SIZE; 565 } 566 return ret; 567 } 568 569 /** 570 * ceph_fscrypt_decrypt_extents: decrypt received extents in given buffer 571 * @inode: inode associated with pages being decrypted 572 * @page: pointer to page array 573 * @off: offset into the file that the data in page[0] starts 574 * @map: pointer to extent array 575 * @ext_cnt: length of extent array 576 * 577 * Given an extent map and a page array, decrypt the received data in-place, 578 * skipping holes. Returns the offset into buffer of end of last decrypted 579 * block. 580 */ 581 int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page, 582 u64 off, struct ceph_sparse_extent *map, 583 u32 ext_cnt) 584 { 585 int i, ret = 0; 586 struct ceph_inode_info *ci = ceph_inode(inode); 587 u64 objno, objoff; 588 u32 xlen; 589 590 /* Nothing to do for empty array */ 591 if (ext_cnt == 0) { 592 dout("%s: empty array, ret 0\n", __func__); 593 return 0; 594 } 595 596 ceph_calc_file_object_mapping(&ci->i_layout, off, map[0].len, 597 &objno, &objoff, &xlen); 598 599 for (i = 0; i < ext_cnt; ++i) { 600 struct ceph_sparse_extent *ext = &map[i]; 601 int pgsoff = ext->off - objoff; 602 int pgidx = pgsoff >> PAGE_SHIFT; 603 int fret; 604 605 if ((ext->off | ext->len) & ~CEPH_FSCRYPT_BLOCK_MASK) { 606 pr_warn("%s: bad encrypted sparse extent idx %d off %llx len %llx\n", 607 __func__, i, ext->off, ext->len); 608 return -EIO; 609 } 610 fret = ceph_fscrypt_decrypt_pages(inode, &page[pgidx], 611 off + pgsoff, ext->len); 612 dout("%s: [%d] 0x%llx~0x%llx fret %d\n", __func__, i, 613 ext->off, ext->len, fret); 614 if (fret < 0) { 615 if (ret == 0) 616 ret = fret; 617 break; 618 } 619 ret = pgsoff + fret; 620 } 621 dout("%s: ret %d\n", __func__, ret); 622 return ret; 623 } 624 625 /** 626 * ceph_fscrypt_encrypt_pages - encrypt an array of pages 627 * @inode: pointer to inode associated with these pages 628 * @page: pointer to page array 629 * @off: offset into the file that the data starts 630 * @len: max length to encrypt 631 * @gfp: gfp flags to use for allocation 632 * 633 * Decrypt an array of cleartext pages and return the amount of 634 * data encrypted. Any data in the page prior to the start of the 635 * first complete block in the read is ignored. Any incomplete 636 * crypto blocks at the end of the array are ignored. 637 * 638 * Returns the length of the encrypted data or a negative errno. 639 */ 640 int ceph_fscrypt_encrypt_pages(struct inode *inode, struct page **page, u64 off, 641 int len, gfp_t gfp) 642 { 643 int i, num_blocks; 644 u64 baseblk = off >> CEPH_FSCRYPT_BLOCK_SHIFT; 645 int ret = 0; 646 647 /* 648 * We can't deal with partial blocks on an encrypted file, so mask off 649 * the last bit. 650 */ 651 num_blocks = ceph_fscrypt_blocks(off, len & CEPH_FSCRYPT_BLOCK_MASK); 652 653 /* Encrypt each block */ 654 for (i = 0; i < num_blocks; ++i) { 655 int blkoff = i << CEPH_FSCRYPT_BLOCK_SHIFT; 656 int pgidx = blkoff >> PAGE_SHIFT; 657 unsigned int pgoffs = offset_in_page(blkoff); 658 int fret; 659 660 fret = ceph_fscrypt_encrypt_block_inplace(inode, page[pgidx], 661 CEPH_FSCRYPT_BLOCK_SIZE, pgoffs, 662 baseblk + i, gfp); 663 if (fret < 0) { 664 if (ret == 0) 665 ret = fret; 666 break; 667 } 668 ret += CEPH_FSCRYPT_BLOCK_SIZE; 669 } 670 return ret; 671 } 672