1From 544d23dea91b2be793c805b9e4bce8cd1d28121f Mon Sep 17 00:00:00 2001 2From: Jason Wessel <jason.wessel@windriver.com> 3Date: Sat, 7 Dec 2019 09:59:22 -0800 4Subject: [PATCH] localedef: Add hardlink resolver from util-linux 5 6The hard link resolver that is built into localedef cannot be run in 7parallel. It will search sibling directories (which are be processed 8in parallel) and perform a creation of a .tmp file and remove the 9original and move the .tmp file in. The problem is that if a probe 10occurs a hard link can be requested to the file that is being removed. 11This will lead to a stray copy or potentially, on a loaded system 12cause race condition which pseudo cannot deal with, where it is left 13with a hard link request to a file that no longer exists. In this 14situation psuedo will inherit the permissions of what ever the target 15inode had to offer. 16 17In short, there are two problems: 18 191) You will be left with stray copies when using the hard link 20resolution that is built in while running in parallel with 21localedef. 22 232) When running under pseudo the possibility exists for uid/gid 24leakage when the source file is removed before the hard link can 25be completed. 26 27The solution is to call localedef with --no-hard-links and separately 28process the hardlinks at a later point. To do this requires the 29inclusion of the hardlink utility found in modern versions of 30util-linux. Most host systems do not have this, so it will be 31included with the cross-localedef binary. 32 33[YOCTO #11299] 34[YOCTO #12434] 35 36Upstream-Status: Pending 37 38Signed-off-by: Jason Wessel <jason.wessel@windriver.com> 39Signed-off-by: Khem Raj <raj.khem@gmail.com> 40--- 41 locale/programs/c.h | 407 ++++++++++++++++ 42 locale/programs/cross-localedef-hardlink.c | 528 +++++++++++++++++++++ 43 locale/programs/xalloc.h | 129 +++++ 44 3 files changed, 1064 insertions(+) 45 create mode 100644 locale/programs/c.h 46 create mode 100644 locale/programs/cross-localedef-hardlink.c 47 create mode 100644 locale/programs/xalloc.h 48 49diff --git a/locale/programs/c.h b/locale/programs/c.h 50new file mode 100644 51index 0000000000..d0a402e90e 52--- /dev/null 53+++ b/locale/programs/c.h 54@@ -0,0 +1,407 @@ 55+/* 56+ * Fundamental C definitions. 57+ */ 58+ 59+#ifndef UTIL_LINUX_C_H 60+#define UTIL_LINUX_C_H 61+ 62+#include <limits.h> 63+#include <stddef.h> 64+#include <stdint.h> 65+#include <stdio.h> 66+#include <unistd.h> 67+#include <stdarg.h> 68+#include <stdlib.h> 69+#include <string.h> 70+#include <errno.h> 71+ 72+#include <assert.h> 73+ 74+#ifdef HAVE_ERR_H 75+# include <err.h> 76+#endif 77+ 78+#ifdef HAVE_SYS_SYSMACROS_H 79+# include <sys/sysmacros.h> /* for major, minor */ 80+#endif 81+ 82+#ifndef LOGIN_NAME_MAX 83+# define LOGIN_NAME_MAX 256 84+#endif 85+ 86+#ifndef NAME_MAX 87+# define NAME_MAX PATH_MAX 88+#endif 89+ 90+/* 91+ * __GNUC_PREREQ is deprecated in favour of __has_attribute() and 92+ * __has_feature(). The __has macros are supported by clang and gcc>=5. 93+ */ 94+#ifndef __GNUC_PREREQ 95+# if defined __GNUC__ && defined __GNUC_MINOR__ 96+# define __GNUC_PREREQ(maj, min) \ 97+ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) 98+# else 99+# define __GNUC_PREREQ(maj, min) 0 100+# endif 101+#endif 102+ 103+#ifdef __GNUC__ 104+ 105+/* &a[0] degrades to a pointer: a different type from an array */ 106+# define __must_be_array(a) \ 107+ UL_BUILD_BUG_ON_ZERO(__builtin_types_compatible_p(__typeof__(a), __typeof__(&a[0]))) 108+ 109+# define ignore_result(x) __extension__ ({ \ 110+ __typeof__(x) __dummy __attribute__((__unused__)) = (x); (void) __dummy; \ 111+}) 112+ 113+#else /* !__GNUC__ */ 114+# define __must_be_array(a) 0 115+# define __attribute__(_arg_) 116+# define ignore_result(x) ((void) (x)) 117+#endif /* !__GNUC__ */ 118+ 119+/* 120+ * It evaluates to 1 if the attribute/feature is supported by the current 121+ * compilation targed. Fallback for old compilers. 122+ */ 123+#ifndef __has_attribute 124+ #define __has_attribute(x) 0 125+#endif 126+ 127+#ifndef __has_feature 128+ #define __has_feature(x) 0 129+#endif 130+ 131+/* 132+ * Function attributes 133+ */ 134+#ifndef __ul_alloc_size 135+# if (__has_attribute(alloc_size) && __has_attribute(warn_unused_result)) || __GNUC_PREREQ (4, 3) 136+# define __ul_alloc_size(s) __attribute__((alloc_size(s), warn_unused_result)) 137+# else 138+# define __ul_alloc_size(s) 139+# endif 140+#endif 141+ 142+#ifndef __ul_calloc_size 143+# if (__has_attribute(alloc_size) && __has_attribute(warn_unused_result)) || __GNUC_PREREQ (4, 3) 144+# define __ul_calloc_size(n, s) __attribute__((alloc_size(n, s), warn_unused_result)) 145+# else 146+# define __ul_calloc_size(n, s) 147+# endif 148+#endif 149+ 150+#if __has_attribute(returns_nonnull) || __GNUC_PREREQ (4, 9) 151+# define __ul_returns_nonnull __attribute__((returns_nonnull)) 152+#else 153+# define __ul_returns_nonnull 154+#endif 155+ 156+/* 157+ * Force a compilation error if condition is true, but also produce a 158+ * result (of value 0 and type size_t), so the expression can be used 159+ * e.g. in a structure initializer (or wherever else comma expressions 160+ * aren't permitted). 161+ */ 162+#define UL_BUILD_BUG_ON_ZERO(e) __extension__ (sizeof(struct { int:-!!(e); })) 163+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) 164+ 165+#ifndef ARRAY_SIZE 166+# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) 167+#endif 168+ 169+#ifndef PATH_MAX 170+# define PATH_MAX 4096 171+#endif 172+ 173+#ifndef TRUE 174+# define TRUE 1 175+#endif 176+ 177+#ifndef FALSE 178+# define FALSE 0 179+#endif 180+ 181+#ifndef min 182+# define min(x, y) __extension__ ({ \ 183+ __typeof__(x) _min1 = (x); \ 184+ __typeof__(y) _min2 = (y); \ 185+ (void) (&_min1 == &_min2); \ 186+ _min1 < _min2 ? _min1 : _min2; }) 187+#endif 188+ 189+#ifndef max 190+# define max(x, y) __extension__ ({ \ 191+ __typeof__(x) _max1 = (x); \ 192+ __typeof__(y) _max2 = (y); \ 193+ (void) (&_max1 == &_max2); \ 194+ _max1 > _max2 ? _max1 : _max2; }) 195+#endif 196+ 197+#ifndef cmp_numbers 198+# define cmp_numbers(x, y) __extension__ ({ \ 199+ __typeof__(x) _a = (x); \ 200+ __typeof__(y) _b = (y); \ 201+ (void) (&_a == &_b); \ 202+ _a == _b ? 0 : _a > _b ? 1 : -1; }) 203+#endif 204+ 205+#ifndef offsetof 206+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) 207+#endif 208+ 209+/* 210+ * container_of - cast a member of a structure out to the containing structure 211+ * @ptr: the pointer to the member. 212+ * @type: the type of the container struct this is embedded in. 213+ * @member: the name of the member within the struct. 214+ */ 215+#ifndef container_of 216+#define container_of(ptr, type, member) __extension__ ({ \ 217+ const __typeof__( ((type *)0)->member ) *__mptr = (ptr); \ 218+ (type *)( (char *)__mptr - offsetof(type,member) );}) 219+#endif 220+ 221+#ifndef HAVE_PROGRAM_INVOCATION_SHORT_NAME 222+# ifdef HAVE___PROGNAME 223+extern char *__progname; 224+# define program_invocation_short_name __progname 225+# else 226+# ifdef HAVE_GETEXECNAME 227+# define program_invocation_short_name \ 228+ prog_inv_sh_nm_from_file(getexecname(), 0) 229+# else 230+# define program_invocation_short_name \ 231+ prog_inv_sh_nm_from_file(__FILE__, 1) 232+# endif 233+static char prog_inv_sh_nm_buf[256]; 234+static inline char * 235+prog_inv_sh_nm_from_file(char *f, char stripext) 236+{ 237+ char *t; 238+ 239+ if ((t = strrchr(f, '/')) != NULL) 240+ t++; 241+ else 242+ t = f; 243+ 244+ strncpy(prog_inv_sh_nm_buf, t, sizeof(prog_inv_sh_nm_buf) - 1); 245+ prog_inv_sh_nm_buf[sizeof(prog_inv_sh_nm_buf) - 1] = '\0'; 246+ 247+ if (stripext && (t = strrchr(prog_inv_sh_nm_buf, '.')) != NULL) 248+ *t = '\0'; 249+ 250+ return prog_inv_sh_nm_buf; 251+} 252+# endif 253+#endif 254+ 255+ 256+#ifndef HAVE_ERR_H 257+static inline void 258+errmsg(char doexit, int excode, char adderr, const char *fmt, ...) 259+{ 260+ fprintf(stderr, "%s: ", program_invocation_short_name); 261+ if (fmt != NULL) { 262+ va_list argp; 263+ va_start(argp, fmt); 264+ vfprintf(stderr, fmt, argp); 265+ va_end(argp); 266+ if (adderr) 267+ fprintf(stderr, ": "); 268+ } 269+ if (adderr) 270+ fprintf(stderr, "%m"); 271+ fprintf(stderr, "\n"); 272+ if (doexit) 273+ exit(excode); 274+} 275+ 276+#ifndef HAVE_ERR 277+# define err(E, FMT...) errmsg(1, E, 1, FMT) 278+#endif 279+ 280+#ifndef HAVE_ERRX 281+# define errx(E, FMT...) errmsg(1, E, 0, FMT) 282+#endif 283+ 284+#ifndef HAVE_WARN 285+# define warn(FMT...) errmsg(0, 0, 1, FMT) 286+#endif 287+ 288+#ifndef HAVE_WARNX 289+# define warnx(FMT...) errmsg(0, 0, 0, FMT) 290+#endif 291+#endif /* !HAVE_ERR_H */ 292+ 293+ 294+/* Don't use inline function to avoid '#include "nls.h"' in c.h 295+ */ 296+#define errtryhelp(eval) __extension__ ({ \ 297+ fprintf(stderr, _("Try '%s --help' for more information.\n"), \ 298+ program_invocation_short_name); \ 299+ exit(eval); \ 300+}) 301+ 302+/* After failed execvp() */ 303+#define EX_EXEC_FAILED 126 /* Program located, but not usable. */ 304+#define EX_EXEC_ENOENT 127 /* Could not find program to exec. */ 305+#define errexec(name) err(errno == ENOENT ? EX_EXEC_ENOENT : EX_EXEC_FAILED, \ 306+ _("failed to execute %s"), name) 307+ 308+ 309+static inline __attribute__((const)) int is_power_of_2(unsigned long num) 310+{ 311+ return (num != 0 && ((num & (num - 1)) == 0)); 312+} 313+ 314+#ifndef HAVE_LOFF_T 315+typedef int64_t loff_t; 316+#endif 317+ 318+#if !defined(HAVE_DIRFD) && (!defined(HAVE_DECL_DIRFD) || HAVE_DECL_DIRFD == 0) && defined(HAVE_DIR_DD_FD) 319+#include <sys/types.h> 320+#include <dirent.h> 321+static inline int dirfd(DIR *d) 322+{ 323+ return d->dd_fd; 324+} 325+#endif 326+ 327+/* 328+ * Fallback defines for old versions of glibc 329+ */ 330+#include <fcntl.h> 331+ 332+#ifdef O_CLOEXEC 333+#define UL_CLOEXECSTR "e" 334+#else 335+#define UL_CLOEXECSTR "" 336+#endif 337+ 338+#ifndef O_CLOEXEC 339+#define O_CLOEXEC 0 340+#endif 341+ 342+#ifdef __FreeBSD_kernel__ 343+#ifndef F_DUPFD_CLOEXEC 344+#define F_DUPFD_CLOEXEC 17 /* Like F_DUPFD, but FD_CLOEXEC is set */ 345+#endif 346+#endif 347+ 348+ 349+#ifndef AI_ADDRCONFIG 350+#define AI_ADDRCONFIG 0x0020 351+#endif 352+ 353+#ifndef IUTF8 354+#define IUTF8 0040000 355+#endif 356+ 357+/* 358+ * MAXHOSTNAMELEN replacement 359+ */ 360+static inline size_t get_hostname_max(void) 361+{ 362+ long len = sysconf(_SC_HOST_NAME_MAX); 363+ 364+ if (0 < len) 365+ return len; 366+ 367+#ifdef MAXHOSTNAMELEN 368+ return MAXHOSTNAMELEN; 369+#elif HOST_NAME_MAX 370+ return HOST_NAME_MAX; 371+#endif 372+ return 64; 373+} 374+ 375+ 376+/* 377+ * Constant strings for usage() functions. For more info see 378+ * Documentation/{howto-usage-function.txt,boilerplate.c} 379+ */ 380+#define USAGE_HEADER ("\nUsage:\n") 381+#define USAGE_OPTIONS ("\nOptions:\n") 382+#define USAGE_FUNCTIONS ("\nFunctions:\n") 383+#define USAGE_COMMANDS ("\nCommands:\n") 384+#define USAGE_COLUMNS ("\nAvailable output columns:\n") 385+#define USAGE_SEPARATOR "\n" 386+ 387+#define USAGE_OPTSTR_HELP ("display this help") 388+#define USAGE_OPTSTR_VERSION ("display version") 389+ 390+#define USAGE_HELP_OPTIONS(marg_dsc) \ 391+ "%-" #marg_dsc "s%s\n" \ 392+ "%-" #marg_dsc "s%s\n" \ 393+ , " -h, --help", USAGE_OPTSTR_HELP \ 394+ , " -V, --version", USAGE_OPTSTR_VERSION 395+ 396+#define USAGE_MAN_TAIL(_man) ("\nFor more details see %s.\n"), _man 397+ 398+#define UTIL_LINUX_VERSION ("%s from %s\n"), program_invocation_short_name, PACKAGE_STRING 399+ 400+#define print_version(eval) __extension__ ({ \ 401+ printf(UTIL_LINUX_VERSION); \ 402+ exit(eval); \ 403+}) 404+ 405+/* 406+ * scanf modifiers for "strings allocation" 407+ */ 408+#ifdef HAVE_SCANF_MS_MODIFIER 409+#define UL_SCNsA "%ms" 410+#elif defined(HAVE_SCANF_AS_MODIFIER) 411+#define UL_SCNsA "%as" 412+#endif 413+ 414+/* 415+ * seek stuff 416+ */ 417+#ifndef SEEK_DATA 418+# define SEEK_DATA 3 419+#endif 420+#ifndef SEEK_HOLE 421+# define SEEK_HOLE 4 422+#endif 423+ 424+ 425+/* 426+ * Macros to convert #define'itions to strings, for example 427+ * #define XYXXY 42 428+ * printf ("%s=%s\n", stringify(XYXXY), stringify_value(XYXXY)); 429+ */ 430+#define stringify_value(s) stringify(s) 431+#define stringify(s) #s 432+ 433+/* 434+ * UL_ASAN_BLACKLIST is a macro to tell AddressSanitizer (a compile-time 435+ * instrumentation shipped with Clang and GCC) to not instrument the 436+ * annotated function. Furthermore, it will prevent the compiler from 437+ * inlining the function because inlining currently breaks the blacklisting 438+ * mechanism of AddressSanitizer. 439+ */ 440+#if __has_feature(address_sanitizer) && __has_attribute(no_sanitize_memory) && __has_attribute(no_sanitize_address) 441+# define UL_ASAN_BLACKLIST __attribute__((noinline)) __attribute__((no_sanitize_memory)) __attribute__((no_sanitize_address)) 442+#else 443+# define UL_ASAN_BLACKLIST /* nothing */ 444+#endif 445+ 446+/* 447+ * Note that sysconf(_SC_GETPW_R_SIZE_MAX) returns *initial* suggested size for 448+ * pwd buffer and in some cases it is not large enough. See POSIX and 449+ * getpwnam_r man page for more details. 450+ */ 451+#define UL_GETPW_BUFSIZ (16 * 1024) 452+ 453+/* 454+ * Darwin or other BSDs may only have MAP_ANON. To get it on Darwin we must 455+ * define _DARWIN_C_SOURCE before including sys/mman.h. We do this in config.h. 456+ */ 457+#if !defined MAP_ANONYMOUS && defined MAP_ANON 458+# define MAP_ANONYMOUS (MAP_ANON) 459+#endif 460+ 461+#endif /* UTIL_LINUX_C_H */ 462diff --git a/locale/programs/cross-localedef-hardlink.c b/locale/programs/cross-localedef-hardlink.c 463new file mode 100644 464index 0000000000..63615896b0 465--- /dev/null 466+++ b/locale/programs/cross-localedef-hardlink.c 467@@ -0,0 +1,528 @@ 468+/* 469+ * hardlink - consolidate duplicate files via hardlinks 470+ * 471+ * Copyright (C) 2018 Red Hat, Inc. All rights reserved. 472+ * Written by Jakub Jelinek <jakub@redhat.com> 473+ * 474+ * Copyright (C) 2019 Karel Zak <kzak@redhat.com> 475+ * 476+ * This program is free software; you can redistribute it and/or modify 477+ * it under the terms of the GNU General Public License as published by 478+ * the Free Software Foundation; either version 2 of the License, or 479+ * (at your option) any later version. 480+ * 481+ * This program is distributed in the hope that it would be useful, 482+ * but WITHOUT ANY WARRANTY; without even the implied warranty of 483+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 484+ * GNU General Public License for more details. 485+ * 486+ * You should have received a copy of the GNU General Public License along 487+ * with this program; if not, write to the Free Software Foundation, Inc., 488+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 489+ */ 490+#include <sys/types.h> 491+#include <stdlib.h> 492+#include <getopt.h> 493+#include <stdio.h> 494+#include <unistd.h> 495+#include <sys/stat.h> 496+#include <sys/mman.h> 497+#include <string.h> 498+#include <dirent.h> 499+#include <fcntl.h> 500+#include <errno.h> 501+#ifdef HAVE_PCRE 502+# define PCRE2_CODE_UNIT_WIDTH 8 503+# include <pcre2.h> 504+#endif 505+ 506+#include "c.h" 507+#include "xalloc.h" 508+#include "nls.h" 509+#include "closestream.h" 510+ 511+#define NHASH (1<<17) /* Must be a power of 2! */ 512+#define NBUF 64 513+ 514+struct hardlink_file; 515+ 516+struct hardlink_hash { 517+ struct hardlink_hash *next; 518+ struct hardlink_file *chain; 519+ off_t size; 520+ time_t mtime; 521+}; 522+ 523+struct hardlink_dir { 524+ struct hardlink_dir *next; 525+ char name[]; 526+}; 527+ 528+struct hardlink_file { 529+ struct hardlink_file *next; 530+ ino_t ino; 531+ dev_t dev; 532+ unsigned int cksum; 533+ char name[]; 534+}; 535+ 536+struct hardlink_dynstr { 537+ char *buf; 538+ size_t alloc; 539+}; 540+ 541+struct hardlink_ctl { 542+ struct hardlink_dir *dirs; 543+ struct hardlink_hash *hps[NHASH]; 544+ char iobuf1[BUFSIZ]; 545+ char iobuf2[BUFSIZ]; 546+ /* summary counters */ 547+ unsigned long long ndirs; 548+ unsigned long long nobjects; 549+ unsigned long long nregfiles; 550+ unsigned long long ncomp; 551+ unsigned long long nlinks; 552+ unsigned long long nsaved; 553+ /* current device */ 554+ dev_t dev; 555+ /* flags */ 556+ unsigned int verbose; 557+ unsigned int 558+ no_link:1, 559+ content_only:1, 560+ force:1; 561+}; 562+/* ctl is in global scope due use in atexit() */ 563+struct hardlink_ctl global_ctl; 564+ 565+__attribute__ ((always_inline)) 566+static inline unsigned int hash(off_t size, time_t mtime) 567+{ 568+ return (size ^ mtime) & (NHASH - 1); 569+} 570+ 571+__attribute__ ((always_inline)) 572+static inline int stcmp(struct stat *st1, struct stat *st2, int content_scope) 573+{ 574+ if (content_scope) 575+ return st1->st_size != st2->st_size; 576+ 577+ return st1->st_mode != st2->st_mode 578+ || st1->st_uid != st2->st_uid 579+ || st1->st_gid != st2->st_gid 580+ || st1->st_size != st2->st_size 581+ || st1->st_mtime != st2->st_mtime; 582+} 583+ 584+static void print_summary(void) 585+{ 586+ struct hardlink_ctl const *const ctl = &global_ctl; 587+ 588+ if (!ctl->verbose) 589+ return; 590+ 591+ if (ctl->verbose > 1 && ctl->nlinks) 592+ fputc('\n', stdout); 593+ 594+ printf(_("Directories: %9lld\n"), ctl->ndirs); 595+ printf(_("Objects: %9lld\n"), ctl->nobjects); 596+ printf(_("Regular files: %9lld\n"), ctl->nregfiles); 597+ printf(_("Comparisons: %9lld\n"), ctl->ncomp); 598+ printf( "%s%9lld\n", (ctl->no_link ? 599+ _("Would link: ") : 600+ _("Linked: ")), ctl->nlinks); 601+ printf( "%s %9lld\n", (ctl->no_link ? 602+ _("Would save: ") : 603+ _("Saved: ")), ctl->nsaved); 604+} 605+ 606+static void __attribute__((__noreturn__)) usage(void) 607+{ 608+ fputs(USAGE_HEADER, stdout); 609+ printf(_(" %s [options] directory...\n"), program_invocation_short_name); 610+ 611+ fputs(USAGE_SEPARATOR, stdout); 612+ puts(_("Consolidate duplicate files using hardlinks.")); 613+ 614+ fputs(USAGE_OPTIONS, stdout); 615+ puts(_(" -c, --content compare only contents, ignore permission, etc.")); 616+ puts(_(" -n, --dry-run don't actually link anything")); 617+ puts(_(" -v, --verbose print summary after hardlinking")); 618+ puts(_(" -vv print every hardlinked file and summary")); 619+ puts(_(" -f, --force force hardlinking across filesystems")); 620+ puts(_(" -x, --exclude <regex> exclude files matching pattern")); 621+ 622+ fputs(USAGE_SEPARATOR, stdout); 623+ printf(USAGE_HELP_OPTIONS(16)); /* char offset to align option descriptions */ 624+ printf(USAGE_MAN_TAIL("hardlink(1)")); 625+ exit(EXIT_SUCCESS); 626+} 627+ 628+__attribute__ ((always_inline)) 629+static inline size_t add2(size_t a, size_t b) 630+{ 631+ size_t sum = a + b; 632+ 633+ if (sum < a) 634+ errx(EXIT_FAILURE, _("integer overflow")); 635+ return sum; 636+} 637+ 638+__attribute__ ((always_inline)) 639+static inline size_t add3(size_t a, size_t b, size_t c) 640+{ 641+ return add2(add2(a, b), c); 642+} 643+ 644+static void growstr(struct hardlink_dynstr *str, size_t newlen) 645+{ 646+ if (newlen < str->alloc) 647+ return; 648+ str->buf = xrealloc(str->buf, str->alloc = add2(newlen, 1)); 649+} 650+ 651+static void process_path(struct hardlink_ctl *ctl, const char *name) 652+{ 653+ struct stat st, st2, st3; 654+ const size_t namelen = strlen(name); 655+ 656+ ctl->nobjects++; 657+ if (lstat(name, &st)) 658+ return; 659+ 660+ if (st.st_dev != ctl->dev && !ctl->force) { 661+ if (ctl->dev) 662+ errx(EXIT_FAILURE, 663+ _("%s is on different filesystem than the rest " 664+ "(use -f option to override)."), name); 665+ ctl->dev = st.st_dev; 666+ } 667+ if (S_ISDIR(st.st_mode)) { 668+ struct hardlink_dir *dp = xmalloc(add3(sizeof(*dp), namelen, 1)); 669+ memcpy(dp->name, name, namelen + 1); 670+ dp->next = ctl->dirs; 671+ ctl->dirs = dp; 672+ 673+ } else if (S_ISREG(st.st_mode)) { 674+ int fd, i; 675+ struct hardlink_file *fp, *fp2; 676+ struct hardlink_hash *hp; 677+ const char *n1, *n2; 678+ unsigned int buf[NBUF]; 679+ int cksumsize = sizeof(buf); 680+ unsigned int cksum; 681+ time_t mtime = ctl->content_only ? 0 : st.st_mtime; 682+ unsigned int hsh = hash(st.st_size, mtime); 683+ off_t fsize; 684+ 685+ ctl->nregfiles++; 686+ if (ctl->verbose > 1) 687+ printf("%s\n", name); 688+ 689+ fd = open(name, O_RDONLY); 690+ if (fd < 0) 691+ return; 692+ 693+ if ((size_t)st.st_size < sizeof(buf)) { 694+ cksumsize = st.st_size; 695+ memset(((char *)buf) + cksumsize, 0, 696+ (sizeof(buf) - cksumsize) % sizeof(buf[0])); 697+ } 698+ if (read(fd, buf, cksumsize) != cksumsize) { 699+ close(fd); 700+ return; 701+ } 702+ cksumsize = (cksumsize + sizeof(buf[0]) - 1) / sizeof(buf[0]); 703+ for (i = 0, cksum = 0; i < cksumsize; i++) { 704+ if (cksum + buf[i] < cksum) 705+ cksum += buf[i] + 1; 706+ else 707+ cksum += buf[i]; 708+ } 709+ for (hp = ctl->hps[hsh]; hp; hp = hp->next) { 710+ if (hp->size == st.st_size && hp->mtime == mtime) 711+ break; 712+ } 713+ if (!hp) { 714+ hp = xmalloc(sizeof(*hp)); 715+ hp->size = st.st_size; 716+ hp->mtime = mtime; 717+ hp->chain = NULL; 718+ hp->next = ctl->hps[hsh]; 719+ ctl->hps[hsh] = hp; 720+ } 721+ for (fp = hp->chain; fp; fp = fp->next) { 722+ if (fp->cksum == cksum) 723+ break; 724+ } 725+ for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) { 726+ if (fp2->ino == st.st_ino && fp2->dev == st.st_dev) { 727+ close(fd); 728+ return; 729+ } 730+ } 731+ for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) { 732+ 733+ if (!lstat(fp2->name, &st2) && S_ISREG(st2.st_mode) && 734+ !stcmp(&st, &st2, ctl->content_only) && 735+ st2.st_ino != st.st_ino && 736+ st2.st_dev == st.st_dev) { 737+ 738+ int fd2 = open(fp2->name, O_RDONLY); 739+ if (fd2 < 0) 740+ continue; 741+ 742+ if (fstat(fd2, &st2) || !S_ISREG(st2.st_mode) 743+ || st2.st_size == 0) { 744+ close(fd2); 745+ continue; 746+ } 747+ ctl->ncomp++; 748+ lseek(fd, 0, SEEK_SET); 749+ 750+ for (fsize = st.st_size; fsize > 0; 751+ fsize -= (off_t)sizeof(ctl->iobuf1)) { 752+ ssize_t xsz; 753+ ssize_t rsize = fsize > (ssize_t) sizeof(ctl->iobuf1) ? 754+ (ssize_t) sizeof(ctl->iobuf1) : fsize; 755+ 756+ if ((xsz = read(fd, ctl->iobuf1, rsize)) != rsize) 757+ warn(_("cannot read %s"), name); 758+ else if ((xsz = read(fd2, ctl->iobuf2, rsize)) != rsize) 759+ warn(_("cannot read %s"), fp2->name); 760+ 761+ if (xsz != rsize) { 762+ close(fd); 763+ close(fd2); 764+ return; 765+ } 766+ if (memcmp(ctl->iobuf1, ctl->iobuf2, rsize)) 767+ break; 768+ } 769+ close(fd2); 770+ if (fsize > 0) 771+ continue; 772+ if (lstat(name, &st3)) { 773+ warn(_("cannot stat %s"), name); 774+ close(fd); 775+ return; 776+ } 777+ st3.st_atime = st.st_atime; 778+ if (stcmp(&st, &st3, 0)) { 779+ warnx(_("file %s changed underneath us"), name); 780+ close(fd); 781+ return; 782+ } 783+ n1 = fp2->name; 784+ n2 = name; 785+ 786+ if (!ctl->no_link) { 787+ const char *suffix = 788+ ".$$$___cleanit___$$$"; 789+ const size_t suffixlen = strlen(suffix); 790+ size_t n2len = strlen(n2); 791+ struct hardlink_dynstr nam2 = { NULL, 0 }; 792+ 793+ growstr(&nam2, add2(n2len, suffixlen)); 794+ memcpy(nam2.buf, n2, n2len); 795+ memcpy(&nam2.buf[n2len], suffix, 796+ suffixlen + 1); 797+ /* First create a temporary link to n1 under a new name */ 798+ if (link(n1, nam2.buf)) { 799+ warn(_("failed to hardlink %s to %s (create temporary link as %s failed)"), 800+ n1, n2, nam2.buf); 801+ free(nam2.buf); 802+ continue; 803+ } 804+ /* Then rename into place over the existing n2 */ 805+ if (rename(nam2.buf, n2)) { 806+ warn(_("failed to hardlink %s to %s (rename temporary link to %s failed)"), 807+ n1, n2, n2); 808+ /* Something went wrong, try to remove the now redundant temporary link */ 809+ if (unlink(nam2.buf)) 810+ warn(_("failed to remove temporary link %s"), nam2.buf); 811+ free(nam2.buf); 812+ continue; 813+ } 814+ free(nam2.buf); 815+ } 816+ ctl->nlinks++; 817+ if (st3.st_nlink > 1) { 818+ /* We actually did not save anything this time, since the link second argument 819+ had some other links as well. */ 820+ if (ctl->verbose > 1) 821+ printf(_(" %s %s to %s\n"), 822+ (ctl->no_link ? _("Would link") : _("Linked")), 823+ n1, n2); 824+ } else { 825+ ctl->nsaved += ((st.st_size + 4095) / 4096) * 4096; 826+ if (ctl->verbose > 1) 827+ printf(_(" %s %s to %s, %s %jd\n"), 828+ (ctl->no_link ? _("Would link") : _("Linked")), 829+ n1, n2, 830+ (ctl->no_link ? _("would save") : _("saved")), 831+ (intmax_t)st.st_size); 832+ } 833+ close(fd); 834+ return; 835+ } 836+ } 837+ fp2 = xmalloc(add3(sizeof(*fp2), namelen, 1)); 838+ close(fd); 839+ fp2->ino = st.st_ino; 840+ fp2->dev = st.st_dev; 841+ fp2->cksum = cksum; 842+ memcpy(fp2->name, name, namelen + 1); 843+ 844+ if (fp) { 845+ fp2->next = fp->next; 846+ fp->next = fp2; 847+ } else { 848+ fp2->next = hp->chain; 849+ hp->chain = fp2; 850+ } 851+ return; 852+ } 853+} 854+ 855+int main(int argc, char **argv) 856+{ 857+ int ch; 858+ int i; 859+#ifdef HAVE_PCRE 860+ int errornumber; 861+ PCRE2_SIZE erroroffset; 862+ pcre2_code *re = NULL; 863+ PCRE2_SPTR exclude_pattern = NULL; 864+ pcre2_match_data *match_data = NULL; 865+#endif 866+ struct hardlink_dynstr nam1 = { NULL, 0 }; 867+ struct hardlink_ctl *ctl = &global_ctl; 868+ 869+ static const struct option longopts[] = { 870+ { "content", no_argument, NULL, 'c' }, 871+ { "dry-run", no_argument, NULL, 'n' }, 872+ { "exclude", required_argument, NULL, 'x' }, 873+ { "force", no_argument, NULL, 'f' }, 874+ { "help", no_argument, NULL, 'h' }, 875+ { "verbose", no_argument, NULL, 'v' }, 876+ { "version", no_argument, NULL, 'V' }, 877+ { NULL, 0, NULL, 0 }, 878+ }; 879+ 880+ setlocale(LC_ALL, ""); 881+ bindtextdomain(PACKAGE, LOCALEDIR); 882+ textdomain(PACKAGE); 883+ close_stdout_atexit(); 884+ 885+ while ((ch = getopt_long(argc, argv, "cnvfx:Vh", longopts, NULL)) != -1) { 886+ switch (ch) { 887+ case 'n': 888+ ctl->no_link = 1; 889+ break; 890+ case 'v': 891+ ctl->verbose++; 892+ break; 893+ case 'c': 894+ ctl->content_only = 1; 895+ break; 896+ case 'f': 897+ ctl->force = 1; 898+ break; 899+ case 'x': 900+#ifdef HAVE_PCRE 901+ exclude_pattern = (PCRE2_SPTR) optarg; 902+#else 903+ errx(EXIT_FAILURE, 904+ _("option --exclude not supported (built without pcre2)")); 905+#endif 906+ break; 907+ case 'V': 908+ print_version(EXIT_SUCCESS); 909+ case 'h': 910+ usage(); 911+ default: 912+ errtryhelp(EXIT_FAILURE); 913+ } 914+ } 915+ 916+ if (optind == argc) { 917+ warnx(_("no directory specified")); 918+ errtryhelp(EXIT_FAILURE); 919+ } 920+ 921+#ifdef HAVE_PCRE 922+ if (exclude_pattern) { 923+ re = pcre2_compile(exclude_pattern, /* the pattern */ 924+ PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminate */ 925+ 0, /* default options */ 926+ &errornumber, &erroroffset, NULL); /* use default compile context */ 927+ if (!re) { 928+ PCRE2_UCHAR buffer[256]; 929+ pcre2_get_error_message(errornumber, buffer, 930+ sizeof(buffer)); 931+ errx(EXIT_FAILURE, _("pattern error at offset %d: %s"), 932+ (int)erroroffset, buffer); 933+ } 934+ match_data = pcre2_match_data_create_from_pattern(re, NULL); 935+ } 936+#endif 937+ atexit(print_summary); 938+ 939+ for (i = optind; i < argc; i++) 940+ process_path(ctl, argv[i]); 941+ 942+ while (ctl->dirs) { 943+ DIR *dh; 944+ struct dirent *di; 945+ struct hardlink_dir *dp = ctl->dirs; 946+ size_t nam1baselen = strlen(dp->name); 947+ 948+ ctl->dirs = dp->next; 949+ growstr(&nam1, add2(nam1baselen, 1)); 950+ memcpy(nam1.buf, dp->name, nam1baselen); 951+ free(dp); 952+ nam1.buf[nam1baselen++] = '/'; 953+ nam1.buf[nam1baselen] = 0; 954+ dh = opendir(nam1.buf); 955+ 956+ if (dh == NULL) 957+ continue; 958+ ctl->ndirs++; 959+ 960+ while ((di = readdir(dh)) != NULL) { 961+ if (!di->d_name[0]) 962+ continue; 963+ if (di->d_name[0] == '.') { 964+ if (!di->d_name[1] || !strcmp(di->d_name, "..")) 965+ continue; 966+ } 967+#ifdef HAVE_PCRE 968+ if (re && pcre2_match(re, /* compiled regex */ 969+ (PCRE2_SPTR) di->d_name, strlen(di->d_name), 0, /* start at offset 0 */ 970+ 0, /* default options */ 971+ match_data, /* block for storing the result */ 972+ NULL) /* use default match context */ 973+ >=0) { 974+ if (ctl->verbose) { 975+ nam1.buf[nam1baselen] = 0; 976+ printf(_("Skipping %s%s\n"), nam1.buf, di->d_name); 977+ } 978+ continue; 979+ } 980+#endif 981+ { 982+ size_t subdirlen; 983+ growstr(&nam1, 984+ add2(nam1baselen, subdirlen = 985+ strlen(di->d_name))); 986+ memcpy(&nam1.buf[nam1baselen], di->d_name, 987+ add2(subdirlen, 1)); 988+ } 989+ process_path(ctl, nam1.buf); 990+ } 991+ closedir(dh); 992+ } 993+ 994+ return 0; 995+} 996diff --git a/locale/programs/xalloc.h b/locale/programs/xalloc.h 997new file mode 100644 998index 0000000000..0129a85e2e 999--- /dev/null 1000+++ b/locale/programs/xalloc.h 1001@@ -0,0 +1,129 @@ 1002+/* 1003+ * Copyright (C) 2010 Davidlohr Bueso <dave@gnu.org> 1004+ * 1005+ * This file may be redistributed under the terms of the 1006+ * GNU Lesser General Public License. 1007+ * 1008+ * General memory allocation wrappers for malloc, realloc, calloc and strdup 1009+ */ 1010+ 1011+#ifndef UTIL_LINUX_XALLOC_H 1012+#define UTIL_LINUX_XALLOC_H 1013+ 1014+#include <stdlib.h> 1015+#include <string.h> 1016+ 1017+#include "c.h" 1018+ 1019+#ifndef XALLOC_EXIT_CODE 1020+# define XALLOC_EXIT_CODE EXIT_FAILURE 1021+#endif 1022+ 1023+static inline void __attribute__((__noreturn__)) 1024+__err_oom(const char *file, unsigned int line) 1025+{ 1026+ err(XALLOC_EXIT_CODE, "%s: %u: cannot allocate memory", file, line); 1027+} 1028+ 1029+#define err_oom() __err_oom(__FILE__, __LINE__) 1030+ 1031+static inline __ul_alloc_size(1) __ul_returns_nonnull 1032+void *xmalloc(const size_t size) 1033+{ 1034+ void *ret = malloc(size); 1035+ 1036+ if (!ret && size) 1037+ err(XALLOC_EXIT_CODE, "cannot allocate %zu bytes", size); 1038+ return ret; 1039+} 1040+ 1041+static inline __ul_alloc_size(2) __ul_returns_nonnull 1042+void *xrealloc(void *ptr, const size_t size) 1043+{ 1044+ void *ret = realloc(ptr, size); 1045+ 1046+ if (!ret && size) 1047+ err(XALLOC_EXIT_CODE, "cannot allocate %zu bytes", size); 1048+ return ret; 1049+} 1050+ 1051+static inline __ul_calloc_size(1, 2) __ul_returns_nonnull 1052+void *xcalloc(const size_t nelems, const size_t size) 1053+{ 1054+ void *ret = calloc(nelems, size); 1055+ 1056+ if (!ret && size && nelems) 1057+ err(XALLOC_EXIT_CODE, "cannot allocate %zu bytes", size); 1058+ return ret; 1059+} 1060+ 1061+static inline char __attribute__((warn_unused_result)) __ul_returns_nonnull 1062+*xstrdup(const char *str) 1063+{ 1064+ char *ret; 1065+ 1066+ if (!str) 1067+ return NULL; 1068+ 1069+ ret = strdup(str); 1070+ 1071+ if (!ret) 1072+ err(XALLOC_EXIT_CODE, "cannot duplicate string"); 1073+ return ret; 1074+} 1075+ 1076+static inline char * __attribute__((warn_unused_result)) __ul_returns_nonnull 1077+xstrndup(const char *str, size_t size) 1078+{ 1079+ char *ret; 1080+ 1081+ if (!str) 1082+ return NULL; 1083+ 1084+ ret = strndup(str, size); 1085+ 1086+ if (!ret) 1087+ err(XALLOC_EXIT_CODE, "cannot duplicate string"); 1088+ return ret; 1089+} 1090+ 1091+ 1092+static inline int __attribute__ ((__format__(printf, 2, 3))) 1093+ xasprintf(char **strp, const char *fmt, ...) 1094+{ 1095+ int ret; 1096+ va_list args; 1097+ va_start(args, fmt); 1098+ ret = vasprintf(&(*strp), fmt, args); 1099+ va_end(args); 1100+ if (ret < 0) 1101+ err(XALLOC_EXIT_CODE, "cannot allocate string"); 1102+ return ret; 1103+} 1104+ 1105+static inline int __attribute__ ((__format__(printf, 2, 0))) 1106+xvasprintf(char **strp, const char *fmt, va_list ap) 1107+{ 1108+ int ret = vasprintf(&(*strp), fmt, ap); 1109+ if (ret < 0) 1110+ err(XALLOC_EXIT_CODE, "cannot allocate string"); 1111+ return ret; 1112+} 1113+ 1114+ 1115+static inline char * __attribute__((warn_unused_result)) xgethostname(void) 1116+{ 1117+ char *name; 1118+ size_t sz = get_hostname_max() + 1; 1119+ 1120+ name = xmalloc(sizeof(char) * sz); 1121+ 1122+ if (gethostname(name, sz) != 0) { 1123+ free(name); 1124+ return NULL; 1125+ } 1126+ name[sz - 1] = '\0'; 1127+ return name; 1128+} 1129+ 1130+#endif 1131