1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3
4# Kselftest framework requirement - SKIP code is 4.
5ksft_skip=4
6
7set -e
8
9if [[ $(id -u) -ne 0 ]]; then
10  echo "This test must be run as root. Skipping..."
11  exit $ksft_skip
12fi
13
14fault_limit_file=limit_in_bytes
15reservation_limit_file=rsvd.limit_in_bytes
16fault_usage_file=usage_in_bytes
17reservation_usage_file=rsvd.usage_in_bytes
18
19if [[ "$1" == "-cgroup-v2" ]]; then
20  cgroup2=1
21  fault_limit_file=max
22  reservation_limit_file=rsvd.max
23  fault_usage_file=current
24  reservation_usage_file=rsvd.current
25fi
26
27if [[ $cgroup2 ]]; then
28  cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}')
29  if [[ -z "$cgroup_path" ]]; then
30    cgroup_path=/dev/cgroup/memory
31    mount -t cgroup2 none $cgroup_path
32    do_umount=1
33  fi
34  echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
35else
36  cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
37  if [[ -z "$cgroup_path" ]]; then
38    cgroup_path=/dev/cgroup/memory
39    mount -t cgroup memory,hugetlb $cgroup_path
40    do_umount=1
41  fi
42fi
43export cgroup_path
44
45function cleanup() {
46  if [[ $cgroup2 ]]; then
47    echo $$ >$cgroup_path/cgroup.procs
48  else
49    echo $$ >$cgroup_path/tasks
50  fi
51
52  if [[ -e /mnt/huge ]]; then
53    rm -rf /mnt/huge/*
54    umount /mnt/huge || echo error
55    rmdir /mnt/huge
56  fi
57  if [[ -e $cgroup_path/hugetlb_cgroup_test ]]; then
58    rmdir $cgroup_path/hugetlb_cgroup_test
59  fi
60  if [[ -e $cgroup_path/hugetlb_cgroup_test1 ]]; then
61    rmdir $cgroup_path/hugetlb_cgroup_test1
62  fi
63  if [[ -e $cgroup_path/hugetlb_cgroup_test2 ]]; then
64    rmdir $cgroup_path/hugetlb_cgroup_test2
65  fi
66  echo 0 >/proc/sys/vm/nr_hugepages
67  echo CLEANUP DONE
68}
69
70function expect_equal() {
71  local expected="$1"
72  local actual="$2"
73  local error="$3"
74
75  if [[ "$expected" != "$actual" ]]; then
76    echo "expected ($expected) != actual ($actual): $3"
77    cleanup
78    exit 1
79  fi
80}
81
82function get_machine_hugepage_size() {
83  hpz=$(grep -i hugepagesize /proc/meminfo)
84  kb=${hpz:14:-3}
85  mb=$(($kb / 1024))
86  echo $mb
87}
88
89MB=$(get_machine_hugepage_size)
90
91function setup_cgroup() {
92  local name="$1"
93  local cgroup_limit="$2"
94  local reservation_limit="$3"
95
96  mkdir $cgroup_path/$name
97
98  echo writing cgroup limit: "$cgroup_limit"
99  echo "$cgroup_limit" >$cgroup_path/$name/hugetlb.${MB}MB.$fault_limit_file
100
101  echo writing reseravation limit: "$reservation_limit"
102  echo "$reservation_limit" > \
103    $cgroup_path/$name/hugetlb.${MB}MB.$reservation_limit_file
104
105  if [ -e "$cgroup_path/$name/cpuset.cpus" ]; then
106    echo 0 >$cgroup_path/$name/cpuset.cpus
107  fi
108  if [ -e "$cgroup_path/$name/cpuset.mems" ]; then
109    echo 0 >$cgroup_path/$name/cpuset.mems
110  fi
111}
112
113function wait_for_hugetlb_memory_to_get_depleted() {
114  local cgroup="$1"
115  local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
116  # Wait for hugetlbfs memory to get depleted.
117  while [ $(cat $path) != 0 ]; do
118    echo Waiting for hugetlb memory to get depleted.
119    cat $path
120    sleep 0.5
121  done
122}
123
124function wait_for_hugetlb_memory_to_get_reserved() {
125  local cgroup="$1"
126  local size="$2"
127
128  local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
129  # Wait for hugetlbfs memory to get written.
130  while [ $(cat $path) != $size ]; do
131    echo Waiting for hugetlb memory reservation to reach size $size.
132    cat $path
133    sleep 0.5
134  done
135}
136
137function wait_for_hugetlb_memory_to_get_written() {
138  local cgroup="$1"
139  local size="$2"
140
141  local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
142  # Wait for hugetlbfs memory to get written.
143  while [ $(cat $path) != $size ]; do
144    echo Waiting for hugetlb memory to reach size $size.
145    cat $path
146    sleep 0.5
147  done
148}
149
150function write_hugetlbfs_and_get_usage() {
151  local cgroup="$1"
152  local size="$2"
153  local populate="$3"
154  local write="$4"
155  local path="$5"
156  local method="$6"
157  local private="$7"
158  local expect_failure="$8"
159  local reserve="$9"
160
161  # Function return values.
162  reservation_failed=0
163  oom_killed=0
164  hugetlb_difference=0
165  reserved_difference=0
166
167  local hugetlb_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file
168  local reserved_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file
169
170  local hugetlb_before=$(cat $hugetlb_usage)
171  local reserved_before=$(cat $reserved_usage)
172
173  echo
174  echo Starting:
175  echo hugetlb_usage="$hugetlb_before"
176  echo reserved_usage="$reserved_before"
177  echo expect_failure is "$expect_failure"
178
179  output=$(mktemp)
180  set +e
181  if [[ "$method" == "1" ]] || [[ "$method" == 2 ]] ||
182    [[ "$private" == "-r" ]] && [[ "$expect_failure" != 1 ]]; then
183
184    bash write_hugetlb_memory.sh "$size" "$populate" "$write" \
185      "$cgroup" "$path" "$method" "$private" "-l" "$reserve" 2>&1 | tee $output &
186
187    local write_result=$?
188    local write_pid=$!
189
190    until grep -q -i "DONE" $output; do
191      echo waiting for DONE signal.
192      if ! ps $write_pid > /dev/null
193      then
194        echo "FAIL: The write died"
195        cleanup
196        exit 1
197      fi
198      sleep 0.5
199    done
200
201    echo ================= write_hugetlb_memory.sh output is:
202    cat $output
203    echo ================= end output.
204
205    if [[ "$populate" == "-o" ]] || [[ "$write" == "-w" ]]; then
206      wait_for_hugetlb_memory_to_get_written "$cgroup" "$size"
207    elif [[ "$reserve" != "-n" ]]; then
208      wait_for_hugetlb_memory_to_get_reserved "$cgroup" "$size"
209    else
210      # This case doesn't produce visible effects, but we still have
211      # to wait for the async process to start and execute...
212      sleep 0.5
213    fi
214
215    echo write_result is $write_result
216  else
217    bash write_hugetlb_memory.sh "$size" "$populate" "$write" \
218      "$cgroup" "$path" "$method" "$private" "$reserve"
219    local write_result=$?
220
221    if [[ "$reserve" != "-n" ]]; then
222      wait_for_hugetlb_memory_to_get_reserved "$cgroup" "$size"
223    fi
224  fi
225  set -e
226
227  if [[ "$write_result" == 1 ]]; then
228    reservation_failed=1
229  fi
230
231  # On linus/master, the above process gets SIGBUS'd on oomkill, with
232  # return code 135. On earlier kernels, it gets actual oomkill, with return
233  # code 137, so just check for both conditions in case we're testing
234  # against an earlier kernel.
235  if [[ "$write_result" == 135 ]] || [[ "$write_result" == 137 ]]; then
236    oom_killed=1
237  fi
238
239  local hugetlb_after=$(cat $hugetlb_usage)
240  local reserved_after=$(cat $reserved_usage)
241
242  echo After write:
243  echo hugetlb_usage="$hugetlb_after"
244  echo reserved_usage="$reserved_after"
245
246  hugetlb_difference=$(($hugetlb_after - $hugetlb_before))
247  reserved_difference=$(($reserved_after - $reserved_before))
248}
249
250function cleanup_hugetlb_memory() {
251  set +e
252  local cgroup="$1"
253  if [[ "$(pgrep -f write_to_hugetlbfs)" != "" ]]; then
254    echo killing write_to_hugetlbfs
255    killall -2 write_to_hugetlbfs
256    wait_for_hugetlb_memory_to_get_depleted $cgroup
257  fi
258  set -e
259
260  if [[ -e /mnt/huge ]]; then
261    rm -rf /mnt/huge/*
262    umount /mnt/huge
263    rmdir /mnt/huge
264  fi
265}
266
267function run_test() {
268  local size=$(($1 * ${MB} * 1024 * 1024))
269  local populate="$2"
270  local write="$3"
271  local cgroup_limit=$(($4 * ${MB} * 1024 * 1024))
272  local reservation_limit=$(($5 * ${MB} * 1024 * 1024))
273  local nr_hugepages="$6"
274  local method="$7"
275  local private="$8"
276  local expect_failure="$9"
277  local reserve="${10}"
278
279  # Function return values.
280  hugetlb_difference=0
281  reserved_difference=0
282  reservation_failed=0
283  oom_killed=0
284
285  echo nr hugepages = "$nr_hugepages"
286  echo "$nr_hugepages" >/proc/sys/vm/nr_hugepages
287
288  setup_cgroup "hugetlb_cgroup_test" "$cgroup_limit" "$reservation_limit"
289
290  mkdir -p /mnt/huge
291  mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge
292
293  write_hugetlbfs_and_get_usage "hugetlb_cgroup_test" "$size" "$populate" \
294    "$write" "/mnt/huge/test" "$method" "$private" "$expect_failure" \
295    "$reserve"
296
297  cleanup_hugetlb_memory "hugetlb_cgroup_test"
298
299  local final_hugetlb=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$fault_usage_file)
300  local final_reservation=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$reservation_usage_file)
301
302  echo $hugetlb_difference
303  echo $reserved_difference
304  expect_equal "0" "$final_hugetlb" "final hugetlb is not zero"
305  expect_equal "0" "$final_reservation" "final reservation is not zero"
306}
307
308function run_multiple_cgroup_test() {
309  local size1="$1"
310  local populate1="$2"
311  local write1="$3"
312  local cgroup_limit1="$4"
313  local reservation_limit1="$5"
314
315  local size2="$6"
316  local populate2="$7"
317  local write2="$8"
318  local cgroup_limit2="$9"
319  local reservation_limit2="${10}"
320
321  local nr_hugepages="${11}"
322  local method="${12}"
323  local private="${13}"
324  local expect_failure="${14}"
325  local reserve="${15}"
326
327  # Function return values.
328  hugetlb_difference1=0
329  reserved_difference1=0
330  reservation_failed1=0
331  oom_killed1=0
332
333  hugetlb_difference2=0
334  reserved_difference2=0
335  reservation_failed2=0
336  oom_killed2=0
337
338  echo nr hugepages = "$nr_hugepages"
339  echo "$nr_hugepages" >/proc/sys/vm/nr_hugepages
340
341  setup_cgroup "hugetlb_cgroup_test1" "$cgroup_limit1" "$reservation_limit1"
342  setup_cgroup "hugetlb_cgroup_test2" "$cgroup_limit2" "$reservation_limit2"
343
344  mkdir -p /mnt/huge
345  mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge
346
347  write_hugetlbfs_and_get_usage "hugetlb_cgroup_test1" "$size1" \
348    "$populate1" "$write1" "/mnt/huge/test1" "$method" "$private" \
349    "$expect_failure" "$reserve"
350
351  hugetlb_difference1=$hugetlb_difference
352  reserved_difference1=$reserved_difference
353  reservation_failed1=$reservation_failed
354  oom_killed1=$oom_killed
355
356  local cgroup1_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$fault_usage_file
357  local cgroup1_reservation_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$reservation_usage_file
358  local cgroup2_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$fault_usage_file
359  local cgroup2_reservation_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$reservation_usage_file
360
361  local usage_before_second_write=$(cat $cgroup1_hugetlb_usage)
362  local reservation_usage_before_second_write=$(cat $cgroup1_reservation_usage)
363
364  write_hugetlbfs_and_get_usage "hugetlb_cgroup_test2" "$size2" \
365    "$populate2" "$write2" "/mnt/huge/test2" "$method" "$private" \
366    "$expect_failure" "$reserve"
367
368  hugetlb_difference2=$hugetlb_difference
369  reserved_difference2=$reserved_difference
370  reservation_failed2=$reservation_failed
371  oom_killed2=$oom_killed
372
373  expect_equal "$usage_before_second_write" \
374    "$(cat $cgroup1_hugetlb_usage)" "Usage changed."
375  expect_equal "$reservation_usage_before_second_write" \
376    "$(cat $cgroup1_reservation_usage)" "Reservation usage changed."
377
378  cleanup_hugetlb_memory
379
380  local final_hugetlb=$(cat $cgroup1_hugetlb_usage)
381  local final_reservation=$(cat $cgroup1_reservation_usage)
382
383  expect_equal "0" "$final_hugetlb" \
384    "hugetlbt_cgroup_test1 final hugetlb is not zero"
385  expect_equal "0" "$final_reservation" \
386    "hugetlbt_cgroup_test1 final reservation is not zero"
387
388  local final_hugetlb=$(cat $cgroup2_hugetlb_usage)
389  local final_reservation=$(cat $cgroup2_reservation_usage)
390
391  expect_equal "0" "$final_hugetlb" \
392    "hugetlb_cgroup_test2 final hugetlb is not zero"
393  expect_equal "0" "$final_reservation" \
394    "hugetlb_cgroup_test2 final reservation is not zero"
395}
396
397cleanup
398
399for populate in "" "-o"; do
400  for method in 0 1 2; do
401    for private in "" "-r"; do
402      for reserve in "" "-n"; do
403
404        # Skip mmap(MAP_HUGETLB | MAP_SHARED). Doesn't seem to be supported.
405        if [[ "$method" == 1 ]] && [[ "$private" == "" ]]; then
406          continue
407        fi
408
409        # Skip populated shmem tests. Doesn't seem to be supported.
410        if [[ "$method" == 2"" ]] && [[ "$populate" == "-o" ]]; then
411          continue
412        fi
413
414        if [[ "$method" == 2"" ]] && [[ "$reserve" == "-n" ]]; then
415          continue
416        fi
417
418        cleanup
419        echo
420        echo
421        echo
422        echo Test normal case.
423        echo private=$private, populate=$populate, method=$method, reserve=$reserve
424        run_test 5 "$populate" "" 10 10 10 "$method" "$private" "0" "$reserve"
425
426        echo Memory charged to hugtlb=$hugetlb_difference
427        echo Memory charged to reservation=$reserved_difference
428
429        if [[ "$populate" == "-o" ]]; then
430          expect_equal "$((5 * $MB * 1024 * 1024))" "$hugetlb_difference" \
431            "Reserved memory charged to hugetlb cgroup."
432        else
433          expect_equal "0" "$hugetlb_difference" \
434            "Reserved memory charged to hugetlb cgroup."
435        fi
436
437        if [[ "$reserve" != "-n" ]] || [[ "$populate" == "-o" ]]; then
438          expect_equal "$((5 * $MB * 1024 * 1024))" "$reserved_difference" \
439            "Reserved memory not charged to reservation usage."
440        else
441          expect_equal "0" "$reserved_difference" \
442            "Reserved memory not charged to reservation usage."
443        fi
444
445        echo 'PASS'
446
447        cleanup
448        echo
449        echo
450        echo
451        echo Test normal case with write.
452        echo private=$private, populate=$populate, method=$method, reserve=$reserve
453        run_test 5 "$populate" '-w' 5 5 10 "$method" "$private" "0" "$reserve"
454
455        echo Memory charged to hugtlb=$hugetlb_difference
456        echo Memory charged to reservation=$reserved_difference
457
458        expect_equal "$((5 * $MB * 1024 * 1024))" "$hugetlb_difference" \
459          "Reserved memory charged to hugetlb cgroup."
460
461        expect_equal "$((5 * $MB * 1024 * 1024))" "$reserved_difference" \
462          "Reserved memory not charged to reservation usage."
463
464        echo 'PASS'
465
466        cleanup
467        continue
468        echo
469        echo
470        echo
471        echo Test more than reservation case.
472        echo private=$private, populate=$populate, method=$method, reserve=$reserve
473
474        if [ "$reserve" != "-n" ]; then
475          run_test "5" "$populate" '' "10" "2" "10" "$method" "$private" "1" \
476            "$reserve"
477
478          expect_equal "1" "$reservation_failed" "Reservation succeeded."
479        fi
480
481        echo 'PASS'
482
483        cleanup
484
485        echo
486        echo
487        echo
488        echo Test more than cgroup limit case.
489        echo private=$private, populate=$populate, method=$method, reserve=$reserve
490
491        # Not sure if shm memory can be cleaned up when the process gets sigbus'd.
492        if [[ "$method" != 2 ]]; then
493          run_test 5 "$populate" "-w" 2 10 10 "$method" "$private" "1" "$reserve"
494
495          expect_equal "1" "$oom_killed" "Not oom killed."
496        fi
497        echo 'PASS'
498
499        cleanup
500
501        echo
502        echo
503        echo
504        echo Test normal case, multiple cgroups.
505        echo private=$private, populate=$populate, method=$method, reserve=$reserve
506        run_multiple_cgroup_test "3" "$populate" "" "10" "10" "5" \
507          "$populate" "" "10" "10" "10" \
508          "$method" "$private" "0" "$reserve"
509
510        echo Memory charged to hugtlb1=$hugetlb_difference1
511        echo Memory charged to reservation1=$reserved_difference1
512        echo Memory charged to hugtlb2=$hugetlb_difference2
513        echo Memory charged to reservation2=$reserved_difference2
514
515        if [[ "$reserve" != "-n" ]] || [[ "$populate" == "-o" ]]; then
516          expect_equal "3" "$reserved_difference1" \
517            "Incorrect reservations charged to cgroup 1."
518
519          expect_equal "5" "$reserved_difference2" \
520            "Incorrect reservation charged to cgroup 2."
521
522        else
523          expect_equal "0" "$reserved_difference1" \
524            "Incorrect reservations charged to cgroup 1."
525
526          expect_equal "0" "$reserved_difference2" \
527            "Incorrect reservation charged to cgroup 2."
528        fi
529
530        if [[ "$populate" == "-o" ]]; then
531          expect_equal "3" "$hugetlb_difference1" \
532            "Incorrect hugetlb charged to cgroup 1."
533
534          expect_equal "5" "$hugetlb_difference2" \
535            "Incorrect hugetlb charged to cgroup 2."
536
537        else
538          expect_equal "0" "$hugetlb_difference1" \
539            "Incorrect hugetlb charged to cgroup 1."
540
541          expect_equal "0" "$hugetlb_difference2" \
542            "Incorrect hugetlb charged to cgroup 2."
543        fi
544        echo 'PASS'
545
546        cleanup
547        echo
548        echo
549        echo
550        echo Test normal case with write, multiple cgroups.
551        echo private=$private, populate=$populate, method=$method, reserve=$reserve
552        run_multiple_cgroup_test "3" "$populate" "-w" "10" "10" "5" \
553          "$populate" "-w" "10" "10" "10" \
554          "$method" "$private" "0" "$reserve"
555
556        echo Memory charged to hugtlb1=$hugetlb_difference1
557        echo Memory charged to reservation1=$reserved_difference1
558        echo Memory charged to hugtlb2=$hugetlb_difference2
559        echo Memory charged to reservation2=$reserved_difference2
560
561        expect_equal "3" "$hugetlb_difference1" \
562          "Incorrect hugetlb charged to cgroup 1."
563
564        expect_equal "3" "$reserved_difference1" \
565          "Incorrect reservation charged to cgroup 1."
566
567        expect_equal "5" "$hugetlb_difference2" \
568          "Incorrect hugetlb charged to cgroup 2."
569
570        expect_equal "5" "$reserved_difference2" \
571          "Incorrected reservation charged to cgroup 2."
572        echo 'PASS'
573
574        cleanup
575
576      done # reserve
577    done   # private
578  done     # populate
579done       # method
580
581if [[ $do_umount ]]; then
582  umount $cgroup_path
583  rmdir $cgroup_path
584fi
585