1From 544d23dea91b2be793c805b9e4bce8cd1d28121f Mon Sep 17 00:00:00 2001
2From: Jason Wessel <jason.wessel@windriver.com>
3Date: Sat, 7 Dec 2019 09:59:22 -0800
4Subject: [PATCH] localedef: Add hardlink resolver from util-linux
5
6The hard link resolver that is built into localedef cannot be run in
7parallel.  It will search sibling directories (which are be processed
8in parallel) and perform a creation of a .tmp file and remove the
9original and move the .tmp file in.  The problem is that if a probe
10occurs a hard link can be requested to the file that is being removed.
11This will lead to a stray copy or potentially, on a loaded system
12cause race condition which pseudo cannot deal with, where it is left
13with a hard link request to a file that no longer exists.  In this
14situation psuedo will inherit the permissions of what ever the target
15inode had to offer.
16
17In short, there are two problems:
18
191) You will be left with stray copies when using the hard link
20resolution that is built in while running in parallel with
21localedef.
22
232) When running under pseudo the possibility exists for uid/gid
24leakage when the source file is removed before the hard link can
25be completed.
26
27The solution is to call localedef with --no-hard-links and separately
28process the hardlinks at a later point.  To do this requires the
29inclusion of the hardlink utility found in modern versions of
30util-linux.  Most host systems do not have this, so it will be
31included with the cross-localedef binary.
32
33[YOCTO #11299]
34[YOCTO #12434]
35
36Upstream-Status: Pending
37
38Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
39Signed-off-by: Khem Raj <raj.khem@gmail.com>
40---
41 locale/programs/c.h                        | 407 ++++++++++++++++
42 locale/programs/cross-localedef-hardlink.c | 528 +++++++++++++++++++++
43 locale/programs/xalloc.h                   | 129 +++++
44 3 files changed, 1064 insertions(+)
45 create mode 100644 locale/programs/c.h
46 create mode 100644 locale/programs/cross-localedef-hardlink.c
47 create mode 100644 locale/programs/xalloc.h
48
49diff --git a/locale/programs/c.h b/locale/programs/c.h
50new file mode 100644
51index 0000000000..d0a402e90e
52--- /dev/null
53+++ b/locale/programs/c.h
54@@ -0,0 +1,407 @@
55+/*
56+ * Fundamental C definitions.
57+ */
58+
59+#ifndef UTIL_LINUX_C_H
60+#define UTIL_LINUX_C_H
61+
62+#include <limits.h>
63+#include <stddef.h>
64+#include <stdint.h>
65+#include <stdio.h>
66+#include <unistd.h>
67+#include <stdarg.h>
68+#include <stdlib.h>
69+#include <string.h>
70+#include <errno.h>
71+
72+#include <assert.h>
73+
74+#ifdef HAVE_ERR_H
75+# include <err.h>
76+#endif
77+
78+#ifdef HAVE_SYS_SYSMACROS_H
79+# include <sys/sysmacros.h>     /* for major, minor */
80+#endif
81+
82+#ifndef LOGIN_NAME_MAX
83+# define LOGIN_NAME_MAX 256
84+#endif
85+
86+#ifndef NAME_MAX
87+# define NAME_MAX PATH_MAX
88+#endif
89+
90+/*
91+ * __GNUC_PREREQ is deprecated in favour of __has_attribute() and
92+ * __has_feature(). The __has macros are supported by clang and gcc>=5.
93+ */
94+#ifndef __GNUC_PREREQ
95+# if defined __GNUC__ && defined __GNUC_MINOR__
96+#  define __GNUC_PREREQ(maj, min) \
97+	((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
98+# else
99+#  define __GNUC_PREREQ(maj, min) 0
100+# endif
101+#endif
102+
103+#ifdef __GNUC__
104+
105+/* &a[0] degrades to a pointer: a different type from an array */
106+# define __must_be_array(a) \
107+	UL_BUILD_BUG_ON_ZERO(__builtin_types_compatible_p(__typeof__(a), __typeof__(&a[0])))
108+
109+# define ignore_result(x) __extension__ ({ \
110+	__typeof__(x) __dummy __attribute__((__unused__)) = (x); (void) __dummy; \
111+})
112+
113+#else /* !__GNUC__ */
114+# define __must_be_array(a)	0
115+# define __attribute__(_arg_)
116+# define ignore_result(x) ((void) (x))
117+#endif /* !__GNUC__ */
118+
119+/*
120+ * It evaluates to 1 if the attribute/feature is supported by the current
121+ * compilation targed. Fallback for old compilers.
122+ */
123+#ifndef __has_attribute
124+  #define __has_attribute(x) 0
125+#endif
126+
127+#ifndef __has_feature
128+  #define __has_feature(x) 0
129+#endif
130+
131+/*
132+ * Function attributes
133+ */
134+#ifndef __ul_alloc_size
135+# if (__has_attribute(alloc_size) && __has_attribute(warn_unused_result)) || __GNUC_PREREQ (4, 3)
136+#  define __ul_alloc_size(s) __attribute__((alloc_size(s), warn_unused_result))
137+# else
138+#  define __ul_alloc_size(s)
139+# endif
140+#endif
141+
142+#ifndef __ul_calloc_size
143+# if (__has_attribute(alloc_size) && __has_attribute(warn_unused_result)) || __GNUC_PREREQ (4, 3)
144+#  define __ul_calloc_size(n, s) __attribute__((alloc_size(n, s), warn_unused_result))
145+# else
146+#  define __ul_calloc_size(n, s)
147+# endif
148+#endif
149+
150+#if __has_attribute(returns_nonnull) || __GNUC_PREREQ (4, 9)
151+# define __ul_returns_nonnull __attribute__((returns_nonnull))
152+#else
153+# define __ul_returns_nonnull
154+#endif
155+
156+/*
157+ * Force a compilation error if condition is true, but also produce a
158+ * result (of value 0 and type size_t), so the expression can be used
159+ * e.g. in a structure initializer (or wherever else comma expressions
160+ * aren't permitted).
161+ */
162+#define UL_BUILD_BUG_ON_ZERO(e) __extension__ (sizeof(struct { int:-!!(e); }))
163+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
164+
165+#ifndef ARRAY_SIZE
166+# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
167+#endif
168+
169+#ifndef PATH_MAX
170+# define PATH_MAX 4096
171+#endif
172+
173+#ifndef TRUE
174+# define TRUE 1
175+#endif
176+
177+#ifndef FALSE
178+# define FALSE 0
179+#endif
180+
181+#ifndef min
182+# define min(x, y) __extension__ ({		\
183+	__typeof__(x) _min1 = (x);		\
184+	__typeof__(y) _min2 = (y);		\
185+	(void) (&_min1 == &_min2);		\
186+	_min1 < _min2 ? _min1 : _min2; })
187+#endif
188+
189+#ifndef max
190+# define max(x, y) __extension__ ({		\
191+	__typeof__(x) _max1 = (x);		\
192+	__typeof__(y) _max2 = (y);		\
193+	(void) (&_max1 == &_max2);		\
194+	_max1 > _max2 ? _max1 : _max2; })
195+#endif
196+
197+#ifndef cmp_numbers
198+# define cmp_numbers(x, y) __extension__ ({	\
199+	__typeof__(x) _a = (x);			\
200+	__typeof__(y) _b = (y);			\
201+	(void) (&_a == &_b);			\
202+	_a == _b ? 0 : _a > _b ? 1 : -1; })
203+#endif
204+
205+#ifndef offsetof
206+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
207+#endif
208+
209+/*
210+ * container_of - cast a member of a structure out to the containing structure
211+ * @ptr:	the pointer to the member.
212+ * @type:	the type of the container struct this is embedded in.
213+ * @member:	the name of the member within the struct.
214+ */
215+#ifndef container_of
216+#define container_of(ptr, type, member) __extension__ ({	\
217+	const __typeof__( ((type *)0)->member ) *__mptr = (ptr); \
218+	(type *)( (char *)__mptr - offsetof(type,member) );})
219+#endif
220+
221+#ifndef HAVE_PROGRAM_INVOCATION_SHORT_NAME
222+# ifdef HAVE___PROGNAME
223+extern char *__progname;
224+#  define program_invocation_short_name __progname
225+# else
226+#  ifdef HAVE_GETEXECNAME
227+#   define program_invocation_short_name \
228+		prog_inv_sh_nm_from_file(getexecname(), 0)
229+#  else
230+#   define program_invocation_short_name \
231+		prog_inv_sh_nm_from_file(__FILE__, 1)
232+#  endif
233+static char prog_inv_sh_nm_buf[256];
234+static inline char *
235+prog_inv_sh_nm_from_file(char *f, char stripext)
236+{
237+	char *t;
238+
239+	if ((t = strrchr(f, '/')) != NULL)
240+		t++;
241+	else
242+		t = f;
243+
244+	strncpy(prog_inv_sh_nm_buf, t, sizeof(prog_inv_sh_nm_buf) - 1);
245+	prog_inv_sh_nm_buf[sizeof(prog_inv_sh_nm_buf) - 1] = '\0';
246+
247+	if (stripext && (t = strrchr(prog_inv_sh_nm_buf, '.')) != NULL)
248+		*t = '\0';
249+
250+	return prog_inv_sh_nm_buf;
251+}
252+# endif
253+#endif
254+
255+
256+#ifndef HAVE_ERR_H
257+static inline void
258+errmsg(char doexit, int excode, char adderr, const char *fmt, ...)
259+{
260+	fprintf(stderr, "%s: ", program_invocation_short_name);
261+	if (fmt != NULL) {
262+		va_list argp;
263+		va_start(argp, fmt);
264+		vfprintf(stderr, fmt, argp);
265+		va_end(argp);
266+		if (adderr)
267+			fprintf(stderr, ": ");
268+	}
269+	if (adderr)
270+		fprintf(stderr, "%m");
271+	fprintf(stderr, "\n");
272+	if (doexit)
273+		exit(excode);
274+}
275+
276+#ifndef HAVE_ERR
277+# define err(E, FMT...) errmsg(1, E, 1, FMT)
278+#endif
279+
280+#ifndef HAVE_ERRX
281+# define errx(E, FMT...) errmsg(1, E, 0, FMT)
282+#endif
283+
284+#ifndef HAVE_WARN
285+# define warn(FMT...) errmsg(0, 0, 1, FMT)
286+#endif
287+
288+#ifndef HAVE_WARNX
289+# define warnx(FMT...) errmsg(0, 0, 0, FMT)
290+#endif
291+#endif /* !HAVE_ERR_H */
292+
293+
294+/* Don't use inline function to avoid '#include "nls.h"' in c.h
295+ */
296+#define errtryhelp(eval) __extension__ ({ \
297+	fprintf(stderr, _("Try '%s --help' for more information.\n"), \
298+			program_invocation_short_name); \
299+	exit(eval); \
300+})
301+
302+/* After failed execvp() */
303+#define EX_EXEC_FAILED		126	/* Program located, but not usable. */
304+#define EX_EXEC_ENOENT		127	/* Could not find program to exec.  */
305+#define errexec(name)	err(errno == ENOENT ? EX_EXEC_ENOENT : EX_EXEC_FAILED, \
306+			_("failed to execute %s"), name)
307+
308+
309+static inline __attribute__((const)) int is_power_of_2(unsigned long num)
310+{
311+	return (num != 0 && ((num & (num - 1)) == 0));
312+}
313+
314+#ifndef HAVE_LOFF_T
315+typedef int64_t loff_t;
316+#endif
317+
318+#if !defined(HAVE_DIRFD) && (!defined(HAVE_DECL_DIRFD) || HAVE_DECL_DIRFD == 0) && defined(HAVE_DIR_DD_FD)
319+#include <sys/types.h>
320+#include <dirent.h>
321+static inline int dirfd(DIR *d)
322+{
323+	return d->dd_fd;
324+}
325+#endif
326+
327+/*
328+ * Fallback defines for old versions of glibc
329+ */
330+#include <fcntl.h>
331+
332+#ifdef O_CLOEXEC
333+#define UL_CLOEXECSTR	"e"
334+#else
335+#define UL_CLOEXECSTR	""
336+#endif
337+
338+#ifndef O_CLOEXEC
339+#define O_CLOEXEC 0
340+#endif
341+
342+#ifdef __FreeBSD_kernel__
343+#ifndef F_DUPFD_CLOEXEC
344+#define F_DUPFD_CLOEXEC	17	/* Like F_DUPFD, but FD_CLOEXEC is set */
345+#endif
346+#endif
347+
348+
349+#ifndef AI_ADDRCONFIG
350+#define AI_ADDRCONFIG 0x0020
351+#endif
352+
353+#ifndef IUTF8
354+#define IUTF8 0040000
355+#endif
356+
357+/*
358+ * MAXHOSTNAMELEN replacement
359+ */
360+static inline size_t get_hostname_max(void)
361+{
362+	long len = sysconf(_SC_HOST_NAME_MAX);
363+
364+	if (0 < len)
365+		return len;
366+
367+#ifdef MAXHOSTNAMELEN
368+	return MAXHOSTNAMELEN;
369+#elif HOST_NAME_MAX
370+	return HOST_NAME_MAX;
371+#endif
372+	return 64;
373+}
374+
375+
376+/*
377+ * Constant strings for usage() functions. For more info see
378+ * Documentation/{howto-usage-function.txt,boilerplate.c}
379+ */
380+#define USAGE_HEADER     ("\nUsage:\n")
381+#define USAGE_OPTIONS    ("\nOptions:\n")
382+#define USAGE_FUNCTIONS  ("\nFunctions:\n")
383+#define USAGE_COMMANDS   ("\nCommands:\n")
384+#define USAGE_COLUMNS    ("\nAvailable output columns:\n")
385+#define USAGE_SEPARATOR    "\n"
386+
387+#define USAGE_OPTSTR_HELP     ("display this help")
388+#define USAGE_OPTSTR_VERSION  ("display version")
389+
390+#define USAGE_HELP_OPTIONS(marg_dsc) \
391+		"%-" #marg_dsc "s%s\n" \
392+		"%-" #marg_dsc "s%s\n" \
393+		, " -h, --help",    USAGE_OPTSTR_HELP \
394+		, " -V, --version", USAGE_OPTSTR_VERSION
395+
396+#define USAGE_MAN_TAIL(_man)   ("\nFor more details see %s.\n"), _man
397+
398+#define UTIL_LINUX_VERSION ("%s from %s\n"), program_invocation_short_name, PACKAGE_STRING
399+
400+#define print_version(eval) __extension__ ({ \
401+		printf(UTIL_LINUX_VERSION); \
402+		exit(eval); \
403+})
404+
405+/*
406+ * scanf modifiers for "strings allocation"
407+ */
408+#ifdef HAVE_SCANF_MS_MODIFIER
409+#define UL_SCNsA	"%ms"
410+#elif defined(HAVE_SCANF_AS_MODIFIER)
411+#define UL_SCNsA	"%as"
412+#endif
413+
414+/*
415+ * seek stuff
416+ */
417+#ifndef SEEK_DATA
418+# define SEEK_DATA	3
419+#endif
420+#ifndef SEEK_HOLE
421+# define SEEK_HOLE	4
422+#endif
423+
424+
425+/*
426+ * Macros to convert #define'itions to strings, for example
427+ * #define XYXXY 42
428+ * printf ("%s=%s\n", stringify(XYXXY), stringify_value(XYXXY));
429+ */
430+#define stringify_value(s) stringify(s)
431+#define stringify(s) #s
432+
433+/*
434+ * UL_ASAN_BLACKLIST is a macro to tell AddressSanitizer (a compile-time
435+ * instrumentation shipped with Clang and GCC) to not instrument the
436+ * annotated function.  Furthermore, it will prevent the compiler from
437+ * inlining the function because inlining currently breaks the blacklisting
438+ * mechanism of AddressSanitizer.
439+ */
440+#if __has_feature(address_sanitizer) && __has_attribute(no_sanitize_memory) && __has_attribute(no_sanitize_address)
441+# define UL_ASAN_BLACKLIST __attribute__((noinline)) __attribute__((no_sanitize_memory)) __attribute__((no_sanitize_address))
442+#else
443+# define UL_ASAN_BLACKLIST	/* nothing */
444+#endif
445+
446+/*
447+ * Note that sysconf(_SC_GETPW_R_SIZE_MAX) returns *initial* suggested size for
448+ * pwd buffer and in some cases it is not large enough. See POSIX and
449+ * getpwnam_r man page for more details.
450+ */
451+#define UL_GETPW_BUFSIZ	(16 * 1024)
452+
453+/*
454+ * Darwin or other BSDs may only have MAP_ANON. To get it on Darwin we must
455+ * define _DARWIN_C_SOURCE before including sys/mman.h. We do this in config.h.
456+ */
457+#if !defined MAP_ANONYMOUS && defined MAP_ANON
458+# define MAP_ANONYMOUS  (MAP_ANON)
459+#endif
460+
461+#endif /* UTIL_LINUX_C_H */
462diff --git a/locale/programs/cross-localedef-hardlink.c b/locale/programs/cross-localedef-hardlink.c
463new file mode 100644
464index 0000000000..63615896b0
465--- /dev/null
466+++ b/locale/programs/cross-localedef-hardlink.c
467@@ -0,0 +1,528 @@
468+/*
469+ * hardlink - consolidate duplicate files via hardlinks
470+ *
471+ * Copyright (C) 2018 Red Hat, Inc. All rights reserved.
472+ * Written by Jakub Jelinek <jakub@redhat.com>
473+ *
474+ * Copyright (C) 2019 Karel Zak <kzak@redhat.com>
475+ *
476+ * This program is free software; you can redistribute it and/or modify
477+ * it under the terms of the GNU General Public License as published by
478+ * the Free Software Foundation; either version 2 of the License, or
479+ * (at your option) any later version.
480+ *
481+ * This program is distributed in the hope that it would be useful,
482+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
483+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
484+ * GNU General Public License for more details.
485+ *
486+ * You should have received a copy of the GNU General Public License along
487+ * with this program; if not, write to the Free Software Foundation, Inc.,
488+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
489+ */
490+#include <sys/types.h>
491+#include <stdlib.h>
492+#include <getopt.h>
493+#include <stdio.h>
494+#include <unistd.h>
495+#include <sys/stat.h>
496+#include <sys/mman.h>
497+#include <string.h>
498+#include <dirent.h>
499+#include <fcntl.h>
500+#include <errno.h>
501+#ifdef HAVE_PCRE
502+# define PCRE2_CODE_UNIT_WIDTH 8
503+# include <pcre2.h>
504+#endif
505+
506+#include "c.h"
507+#include "xalloc.h"
508+#include "nls.h"
509+#include "closestream.h"
510+
511+#define NHASH   (1<<17)  /* Must be a power of 2! */
512+#define NBUF    64
513+
514+struct hardlink_file;
515+
516+struct hardlink_hash {
517+	struct hardlink_hash *next;
518+	struct hardlink_file *chain;
519+	off_t size;
520+	time_t mtime;
521+};
522+
523+struct hardlink_dir {
524+	struct hardlink_dir *next;
525+	char name[];
526+};
527+
528+struct hardlink_file {
529+	struct hardlink_file *next;
530+	ino_t ino;
531+	dev_t dev;
532+	unsigned int cksum;
533+	char name[];
534+};
535+
536+struct hardlink_dynstr {
537+	char *buf;
538+	size_t alloc;
539+};
540+
541+struct hardlink_ctl {
542+	struct hardlink_dir *dirs;
543+	struct hardlink_hash *hps[NHASH];
544+	char iobuf1[BUFSIZ];
545+	char iobuf2[BUFSIZ];
546+	/* summary counters */
547+	unsigned long long ndirs;
548+	unsigned long long nobjects;
549+	unsigned long long nregfiles;
550+	unsigned long long ncomp;
551+	unsigned long long nlinks;
552+	unsigned long long nsaved;
553+	/* current device */
554+	dev_t dev;
555+	/* flags */
556+	unsigned int verbose;
557+	unsigned int
558+		no_link:1,
559+		content_only:1,
560+		force:1;
561+};
562+/* ctl is in global scope due use in atexit() */
563+struct hardlink_ctl global_ctl;
564+
565+__attribute__ ((always_inline))
566+static inline unsigned int hash(off_t size, time_t mtime)
567+{
568+	return (size ^ mtime) & (NHASH - 1);
569+}
570+
571+__attribute__ ((always_inline))
572+static inline int stcmp(struct stat *st1, struct stat *st2, int content_scope)
573+{
574+	if (content_scope)
575+		return st1->st_size != st2->st_size;
576+
577+	return st1->st_mode != st2->st_mode
578+		|| st1->st_uid != st2->st_uid
579+		|| st1->st_gid != st2->st_gid
580+		|| st1->st_size != st2->st_size
581+		|| st1->st_mtime != st2->st_mtime;
582+}
583+
584+static void print_summary(void)
585+{
586+	struct hardlink_ctl const *const ctl = &global_ctl;
587+
588+	if (!ctl->verbose)
589+		return;
590+
591+	if (ctl->verbose > 1 && ctl->nlinks)
592+		fputc('\n', stdout);
593+
594+	printf(_("Directories:   %9lld\n"), ctl->ndirs);
595+	printf(_("Objects:       %9lld\n"), ctl->nobjects);
596+	printf(_("Regular files: %9lld\n"), ctl->nregfiles);
597+	printf(_("Comparisons:   %9lld\n"), ctl->ncomp);
598+	printf(  "%s%9lld\n", (ctl->no_link ?
599+	       _("Would link:    ") :
600+	       _("Linked:        ")), ctl->nlinks);
601+	printf(  "%s %9lld\n", (ctl->no_link ?
602+	       _("Would save:   ") :
603+	       _("Saved:        ")), ctl->nsaved);
604+}
605+
606+static void __attribute__((__noreturn__)) usage(void)
607+{
608+	fputs(USAGE_HEADER, stdout);
609+	printf(_(" %s [options] directory...\n"), program_invocation_short_name);
610+
611+	fputs(USAGE_SEPARATOR, stdout);
612+	puts(_("Consolidate duplicate files using hardlinks."));
613+
614+	fputs(USAGE_OPTIONS, stdout);
615+	puts(_(" -c, --content          compare only contents, ignore permission, etc."));
616+	puts(_(" -n, --dry-run          don't actually link anything"));
617+	puts(_(" -v, --verbose          print summary after hardlinking"));
618+	puts(_(" -vv                    print every hardlinked file and summary"));
619+	puts(_(" -f, --force            force hardlinking across filesystems"));
620+	puts(_(" -x, --exclude <regex>  exclude files matching pattern"));
621+
622+	fputs(USAGE_SEPARATOR, stdout);
623+	printf(USAGE_HELP_OPTIONS(16)); /* char offset to align option descriptions */
624+	printf(USAGE_MAN_TAIL("hardlink(1)"));
625+	exit(EXIT_SUCCESS);
626+}
627+
628+__attribute__ ((always_inline))
629+static inline size_t add2(size_t a, size_t b)
630+{
631+	size_t sum = a + b;
632+
633+	if (sum < a)
634+		errx(EXIT_FAILURE, _("integer overflow"));
635+	return sum;
636+}
637+
638+__attribute__ ((always_inline))
639+static inline size_t add3(size_t a, size_t b, size_t c)
640+{
641+	return add2(add2(a, b), c);
642+}
643+
644+static void growstr(struct hardlink_dynstr *str, size_t newlen)
645+{
646+	if (newlen < str->alloc)
647+		return;
648+	str->buf = xrealloc(str->buf, str->alloc = add2(newlen, 1));
649+}
650+
651+static void process_path(struct hardlink_ctl *ctl, const char *name)
652+{
653+	struct stat st, st2, st3;
654+	const size_t namelen = strlen(name);
655+
656+	ctl->nobjects++;
657+	if (lstat(name, &st))
658+		return;
659+
660+	if (st.st_dev != ctl->dev && !ctl->force) {
661+		if (ctl->dev)
662+			errx(EXIT_FAILURE,
663+			     _("%s is on different filesystem than the rest "
664+			       "(use -f option to override)."), name);
665+		ctl->dev = st.st_dev;
666+	}
667+	if (S_ISDIR(st.st_mode)) {
668+		struct hardlink_dir *dp = xmalloc(add3(sizeof(*dp), namelen, 1));
669+		memcpy(dp->name, name, namelen + 1);
670+		dp->next = ctl->dirs;
671+		ctl->dirs = dp;
672+
673+	} else if (S_ISREG(st.st_mode)) {
674+		int fd, i;
675+		struct hardlink_file *fp, *fp2;
676+		struct hardlink_hash *hp;
677+		const char *n1, *n2;
678+		unsigned int buf[NBUF];
679+		int cksumsize = sizeof(buf);
680+		unsigned int cksum;
681+		time_t mtime = ctl->content_only ? 0 : st.st_mtime;
682+		unsigned int hsh = hash(st.st_size, mtime);
683+		off_t fsize;
684+
685+		ctl->nregfiles++;
686+		if (ctl->verbose > 1)
687+			printf("%s\n", name);
688+
689+		fd = open(name, O_RDONLY);
690+		if (fd < 0)
691+			return;
692+
693+		if ((size_t)st.st_size < sizeof(buf)) {
694+			cksumsize = st.st_size;
695+			memset(((char *)buf) + cksumsize, 0,
696+			       (sizeof(buf) - cksumsize) % sizeof(buf[0]));
697+		}
698+		if (read(fd, buf, cksumsize) != cksumsize) {
699+			close(fd);
700+			return;
701+		}
702+		cksumsize = (cksumsize + sizeof(buf[0]) - 1) / sizeof(buf[0]);
703+		for (i = 0, cksum = 0; i < cksumsize; i++) {
704+			if (cksum + buf[i] < cksum)
705+				cksum += buf[i] + 1;
706+			else
707+				cksum += buf[i];
708+		}
709+		for (hp = ctl->hps[hsh]; hp; hp = hp->next) {
710+			if (hp->size == st.st_size && hp->mtime == mtime)
711+				break;
712+		}
713+		if (!hp) {
714+			hp = xmalloc(sizeof(*hp));
715+			hp->size = st.st_size;
716+			hp->mtime = mtime;
717+			hp->chain = NULL;
718+			hp->next = ctl->hps[hsh];
719+			ctl->hps[hsh] = hp;
720+		}
721+		for (fp = hp->chain; fp; fp = fp->next) {
722+			if (fp->cksum == cksum)
723+				break;
724+		}
725+		for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) {
726+			if (fp2->ino == st.st_ino && fp2->dev == st.st_dev) {
727+				close(fd);
728+				return;
729+			}
730+		}
731+		for (fp2 = fp; fp2 && fp2->cksum == cksum; fp2 = fp2->next) {
732+
733+			if (!lstat(fp2->name, &st2) && S_ISREG(st2.st_mode) &&
734+			    !stcmp(&st, &st2, ctl->content_only) &&
735+			    st2.st_ino != st.st_ino &&
736+			    st2.st_dev == st.st_dev) {
737+
738+				int fd2 = open(fp2->name, O_RDONLY);
739+				if (fd2 < 0)
740+					continue;
741+
742+				if (fstat(fd2, &st2) || !S_ISREG(st2.st_mode)
743+				    || st2.st_size == 0) {
744+					close(fd2);
745+					continue;
746+				}
747+				ctl->ncomp++;
748+				lseek(fd, 0, SEEK_SET);
749+
750+				for (fsize = st.st_size; fsize > 0;
751+				     fsize -= (off_t)sizeof(ctl->iobuf1)) {
752+					ssize_t xsz;
753+					ssize_t rsize = fsize > (ssize_t) sizeof(ctl->iobuf1) ?
754+							(ssize_t) sizeof(ctl->iobuf1) : fsize;
755+
756+					if ((xsz = read(fd, ctl->iobuf1, rsize)) != rsize)
757+						warn(_("cannot read %s"), name);
758+					else if ((xsz = read(fd2, ctl->iobuf2, rsize)) != rsize)
759+						warn(_("cannot read %s"), fp2->name);
760+
761+					if (xsz != rsize) {
762+						close(fd);
763+						close(fd2);
764+						return;
765+					}
766+					if (memcmp(ctl->iobuf1, ctl->iobuf2, rsize))
767+						break;
768+				}
769+				close(fd2);
770+				if (fsize > 0)
771+					continue;
772+				if (lstat(name, &st3)) {
773+					warn(_("cannot stat %s"), name);
774+					close(fd);
775+					return;
776+				}
777+				st3.st_atime = st.st_atime;
778+				if (stcmp(&st, &st3, 0)) {
779+					warnx(_("file %s changed underneath us"), name);
780+					close(fd);
781+					return;
782+				}
783+				n1 = fp2->name;
784+				n2 = name;
785+
786+				if (!ctl->no_link) {
787+					const char *suffix =
788+					    ".$$$___cleanit___$$$";
789+					const size_t suffixlen = strlen(suffix);
790+					size_t n2len = strlen(n2);
791+					struct hardlink_dynstr nam2 = { NULL, 0 };
792+
793+					growstr(&nam2, add2(n2len, suffixlen));
794+					memcpy(nam2.buf, n2, n2len);
795+					memcpy(&nam2.buf[n2len], suffix,
796+					       suffixlen + 1);
797+					/* First create a temporary link to n1 under a new name */
798+					if (link(n1, nam2.buf)) {
799+						warn(_("failed to hardlink %s to %s (create temporary link as %s failed)"),
800+							n1, n2, nam2.buf);
801+						free(nam2.buf);
802+						continue;
803+					}
804+					/* Then rename into place over the existing n2 */
805+					if (rename(nam2.buf, n2)) {
806+						warn(_("failed to hardlink %s to %s (rename temporary link to %s failed)"),
807+							n1, n2, n2);
808+						/* Something went wrong, try to remove the now redundant temporary link */
809+						if (unlink(nam2.buf))
810+							warn(_("failed to remove temporary link %s"), nam2.buf);
811+						free(nam2.buf);
812+						continue;
813+					}
814+					free(nam2.buf);
815+				}
816+				ctl->nlinks++;
817+				if (st3.st_nlink > 1) {
818+					/* We actually did not save anything this time, since the link second argument
819+					   had some other links as well.  */
820+					if (ctl->verbose > 1)
821+						printf(_(" %s %s to %s\n"),
822+							(ctl->no_link ? _("Would link") : _("Linked")),
823+							n1, n2);
824+				} else {
825+					ctl->nsaved += ((st.st_size + 4095) / 4096) * 4096;
826+					if (ctl->verbose > 1)
827+						printf(_(" %s %s to %s, %s %jd\n"),
828+							(ctl->no_link ? _("Would link") : _("Linked")),
829+							n1, n2,
830+							(ctl->no_link ? _("would save") : _("saved")),
831+							(intmax_t)st.st_size);
832+				}
833+				close(fd);
834+				return;
835+			}
836+		}
837+		fp2 = xmalloc(add3(sizeof(*fp2), namelen, 1));
838+		close(fd);
839+		fp2->ino = st.st_ino;
840+		fp2->dev = st.st_dev;
841+		fp2->cksum = cksum;
842+		memcpy(fp2->name, name, namelen + 1);
843+
844+		if (fp) {
845+			fp2->next = fp->next;
846+			fp->next = fp2;
847+		} else {
848+			fp2->next = hp->chain;
849+			hp->chain = fp2;
850+		}
851+		return;
852+	}
853+}
854+
855+int main(int argc, char **argv)
856+{
857+	int ch;
858+	int i;
859+#ifdef HAVE_PCRE
860+	int errornumber;
861+	PCRE2_SIZE erroroffset;
862+	pcre2_code *re = NULL;
863+	PCRE2_SPTR exclude_pattern = NULL;
864+	pcre2_match_data *match_data = NULL;
865+#endif
866+	struct hardlink_dynstr nam1 = { NULL, 0 };
867+	struct hardlink_ctl *ctl = &global_ctl;
868+
869+	static const struct option longopts[] = {
870+		{ "content",    no_argument, NULL, 'c' },
871+		{ "dry-run",    no_argument, NULL, 'n' },
872+		{ "exclude",    required_argument, NULL, 'x' },
873+		{ "force",      no_argument, NULL, 'f' },
874+		{ "help",       no_argument, NULL, 'h' },
875+		{ "verbose",    no_argument, NULL, 'v' },
876+		{ "version",    no_argument, NULL, 'V' },
877+		{ NULL, 0, NULL, 0 },
878+	};
879+
880+	setlocale(LC_ALL, "");
881+	bindtextdomain(PACKAGE, LOCALEDIR);
882+	textdomain(PACKAGE);
883+	close_stdout_atexit();
884+
885+	while ((ch = getopt_long(argc, argv, "cnvfx:Vh", longopts, NULL)) != -1) {
886+		switch (ch) {
887+		case 'n':
888+			ctl->no_link = 1;
889+			break;
890+		case 'v':
891+			ctl->verbose++;
892+			break;
893+		case 'c':
894+			ctl->content_only = 1;
895+			break;
896+		case 'f':
897+			ctl->force = 1;
898+			break;
899+		case 'x':
900+#ifdef HAVE_PCRE
901+			exclude_pattern = (PCRE2_SPTR) optarg;
902+#else
903+			errx(EXIT_FAILURE,
904+			     _("option --exclude not supported (built without pcre2)"));
905+#endif
906+			break;
907+		case 'V':
908+			print_version(EXIT_SUCCESS);
909+		case 'h':
910+			usage();
911+		default:
912+			errtryhelp(EXIT_FAILURE);
913+		}
914+	}
915+
916+	if (optind == argc) {
917+		warnx(_("no directory specified"));
918+		errtryhelp(EXIT_FAILURE);
919+	}
920+
921+#ifdef HAVE_PCRE
922+	if (exclude_pattern) {
923+		re = pcre2_compile(exclude_pattern, /* the pattern */
924+				   PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminate */
925+				   0, /* default options */
926+				   &errornumber, &erroroffset, NULL); /* use default compile context */
927+		if (!re) {
928+			PCRE2_UCHAR buffer[256];
929+			pcre2_get_error_message(errornumber, buffer,
930+						sizeof(buffer));
931+			errx(EXIT_FAILURE, _("pattern error at offset %d: %s"),
932+				(int)erroroffset, buffer);
933+		}
934+		match_data = pcre2_match_data_create_from_pattern(re, NULL);
935+	}
936+#endif
937+	atexit(print_summary);
938+
939+	for (i = optind; i < argc; i++)
940+		process_path(ctl, argv[i]);
941+
942+	while (ctl->dirs) {
943+		DIR *dh;
944+		struct dirent *di;
945+		struct hardlink_dir *dp = ctl->dirs;
946+		size_t nam1baselen = strlen(dp->name);
947+
948+		ctl->dirs = dp->next;
949+		growstr(&nam1, add2(nam1baselen, 1));
950+		memcpy(nam1.buf, dp->name, nam1baselen);
951+		free(dp);
952+		nam1.buf[nam1baselen++] = '/';
953+		nam1.buf[nam1baselen] = 0;
954+		dh = opendir(nam1.buf);
955+
956+		if (dh == NULL)
957+			continue;
958+		ctl->ndirs++;
959+
960+		while ((di = readdir(dh)) != NULL) {
961+			if (!di->d_name[0])
962+				continue;
963+			if (di->d_name[0] == '.') {
964+				if (!di->d_name[1] || !strcmp(di->d_name, ".."))
965+					continue;
966+			}
967+#ifdef HAVE_PCRE
968+			if (re && pcre2_match(re, /* compiled regex */
969+					      (PCRE2_SPTR) di->d_name, strlen(di->d_name), 0, /* start at offset 0 */
970+					      0, /* default options */
971+					      match_data, /* block for storing the result */
972+					      NULL) /* use default match context */
973+			    >=0) {
974+				if (ctl->verbose) {
975+					nam1.buf[nam1baselen] = 0;
976+					printf(_("Skipping %s%s\n"), nam1.buf, di->d_name);
977+				}
978+				continue;
979+			}
980+#endif
981+			{
982+				size_t subdirlen;
983+				growstr(&nam1,
984+					add2(nam1baselen, subdirlen =
985+					     strlen(di->d_name)));
986+				memcpy(&nam1.buf[nam1baselen], di->d_name,
987+				       add2(subdirlen, 1));
988+			}
989+			process_path(ctl, nam1.buf);
990+		}
991+		closedir(dh);
992+	}
993+
994+	return 0;
995+}
996diff --git a/locale/programs/xalloc.h b/locale/programs/xalloc.h
997new file mode 100644
998index 0000000000..0129a85e2e
999--- /dev/null
1000+++ b/locale/programs/xalloc.h
1001@@ -0,0 +1,129 @@
1002+/*
1003+ * Copyright (C) 2010 Davidlohr Bueso <dave@gnu.org>
1004+ *
1005+ * This file may be redistributed under the terms of the
1006+ * GNU Lesser General Public License.
1007+ *
1008+ * General memory allocation wrappers for malloc, realloc, calloc and strdup
1009+ */
1010+
1011+#ifndef UTIL_LINUX_XALLOC_H
1012+#define UTIL_LINUX_XALLOC_H
1013+
1014+#include <stdlib.h>
1015+#include <string.h>
1016+
1017+#include "c.h"
1018+
1019+#ifndef XALLOC_EXIT_CODE
1020+# define XALLOC_EXIT_CODE EXIT_FAILURE
1021+#endif
1022+
1023+static inline void __attribute__((__noreturn__))
1024+__err_oom(const char *file, unsigned int line)
1025+{
1026+	err(XALLOC_EXIT_CODE, "%s: %u: cannot allocate memory", file, line);
1027+}
1028+
1029+#define err_oom()	__err_oom(__FILE__, __LINE__)
1030+
1031+static inline __ul_alloc_size(1) __ul_returns_nonnull
1032+void *xmalloc(const size_t size)
1033+{
1034+        void *ret = malloc(size);
1035+
1036+        if (!ret && size)
1037+                err(XALLOC_EXIT_CODE, "cannot allocate %zu bytes", size);
1038+        return ret;
1039+}
1040+
1041+static inline __ul_alloc_size(2) __ul_returns_nonnull
1042+void *xrealloc(void *ptr, const size_t size)
1043+{
1044+        void *ret = realloc(ptr, size);
1045+
1046+        if (!ret && size)
1047+                err(XALLOC_EXIT_CODE, "cannot allocate %zu bytes", size);
1048+        return ret;
1049+}
1050+
1051+static inline __ul_calloc_size(1, 2) __ul_returns_nonnull
1052+void *xcalloc(const size_t nelems, const size_t size)
1053+{
1054+        void *ret = calloc(nelems, size);
1055+
1056+        if (!ret && size && nelems)
1057+                err(XALLOC_EXIT_CODE, "cannot allocate %zu bytes", size);
1058+        return ret;
1059+}
1060+
1061+static inline char __attribute__((warn_unused_result)) __ul_returns_nonnull
1062+*xstrdup(const char *str)
1063+{
1064+        char *ret;
1065+
1066+        if (!str)
1067+                return NULL;
1068+
1069+        ret = strdup(str);
1070+
1071+        if (!ret)
1072+                err(XALLOC_EXIT_CODE, "cannot duplicate string");
1073+        return ret;
1074+}
1075+
1076+static inline char * __attribute__((warn_unused_result)) __ul_returns_nonnull
1077+xstrndup(const char *str, size_t size)
1078+{
1079+        char *ret;
1080+
1081+        if (!str)
1082+                return NULL;
1083+
1084+        ret = strndup(str, size);
1085+
1086+        if (!ret)
1087+                err(XALLOC_EXIT_CODE, "cannot duplicate string");
1088+        return ret;
1089+}
1090+
1091+
1092+static inline int __attribute__ ((__format__(printf, 2, 3)))
1093+    xasprintf(char **strp, const char *fmt, ...)
1094+{
1095+	int ret;
1096+	va_list args;
1097+	va_start(args, fmt);
1098+	ret = vasprintf(&(*strp), fmt, args);
1099+	va_end(args);
1100+	if (ret < 0)
1101+		err(XALLOC_EXIT_CODE, "cannot allocate string");
1102+	return ret;
1103+}
1104+
1105+static inline int  __attribute__ ((__format__(printf, 2, 0)))
1106+xvasprintf(char **strp, const char *fmt, va_list ap)
1107+{
1108+	int ret = vasprintf(&(*strp), fmt, ap);
1109+	if (ret < 0)
1110+		err(XALLOC_EXIT_CODE, "cannot allocate string");
1111+	return ret;
1112+}
1113+
1114+
1115+static inline char * __attribute__((warn_unused_result)) xgethostname(void)
1116+{
1117+	char *name;
1118+	size_t sz = get_hostname_max() + 1;
1119+
1120+	name = xmalloc(sizeof(char) * sz);
1121+
1122+	if (gethostname(name, sz) != 0) {
1123+		free(name);
1124+		return NULL;
1125+	}
1126+	name[sz - 1] = '\0';
1127+	return name;
1128+}
1129+
1130+#endif
1131