Skip to content

Commit

Permalink
Update retry and reboot logic in epoxy template and 1to2 scripts (#154)
Browse files Browse the repository at this point in the history
* Reboot after max retry
* Reboot after max delay is reached
* Reduce max retry
  • Loading branch information
stephen-soltesz committed Nov 5, 2019
1 parent 423937c commit 1498bcf
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 22 deletions.
25 changes: 10 additions & 15 deletions actions/stage2/stage1to2.ipxe
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,17 @@ echo -- Downloading stage2 image from ${vmlinuz_url}

# Initialize retry counters.
set retry_count:int32 1
set max_retry_count 20
set max_retry_count 15

goto firstfetch

:loop
inc retry_count 1
echo -- Retries ${retry_count} ${max_retry_count}
echo Failed ${retry_count} times... Retrying after ${retry_count} seconds
# This doesn't work?
# iseq ${retry_count} ${max_retry_count} && goto fetch_timeout
echo Failed ${retry_count} of ${max_retry_count} times... Retrying after ${retry_count} seconds
sleep ${retry_count}
inc retry_count 1
iseq ${retry_count} ${max_retry_count} && goto fetch_timeout ||

:firstfetch
echo -- Retries ${retry_count} ${max_retry_count}
kernel --name vmlinuz ${vmlinuz_url} || goto loop

# Initialize retry counters.
Expand All @@ -34,13 +31,12 @@ set max_retry_count 20
goto firstfetch_initrd

:loop_initrd
inc retry_count 1
echo -- Retries ${retry_count} ${max_retry_count}
echo Failed ${retry_count} times... Retrying after ${retry_count} seconds
echo Failed ${retry_count} of ${max_retry_count} times... Retrying after ${retry_count} seconds
sleep ${retry_count}
inc retry_count 1
iseq ${retry_count} ${max_retry_count} && goto fetch_timeout ||

:firstfetch_initrd
echo -- Retries ${retry_count} ${max_retry_count}
initrd --name initrd ${initram_url} || goto loop_initrd

imgstat
Expand Down Expand Up @@ -81,7 +77,6 @@ set kargs ${kargs} epoxy.project=${project}
boot vmlinuz ${kargs} || shell

:fetch_timeout
echo -- Retries ${retry_count} ${max_retry_count}
echo Failed too many times..
# Reboot?
shell
echo Rebooting after ${retry_count} of ${max_retry_count} attempts.
sleep 10
reboot || shell
13 changes: 6 additions & 7 deletions configs/stage1_mlxrom/stage1-template.ipxe
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ set epoxyaddress epoxy-boot-api.{{project}}.measurementlab.net:4430
set menu_timeout_ms:int32 5000
set fetch_timeout_ms 10000

set retry_delay_s:int32 30
set max_retry_delay_s 480
set retry_delay_s:int32 15
set max_retry_delay_s 240

# IPv6 network configuration.
# ipv6_enabled will always be defined. When ipv6_enabled is "false", then other
Expand Down Expand Up @@ -50,7 +50,6 @@ set stage1_url https://${epoxyaddress}/v1/boot/${hostname}/stage1.ipxe
menu M-Lab iPXE boot menu: ${epoxyaddress}
item --gap -- Production options:
item stage1 -- Boot stage1 script
item localboot -- Boot local disk
item --gap
item --gap -- Diagnostic options:
item shell -- iPXE Shell
Expand Down Expand Up @@ -84,7 +83,7 @@ set stage1_url https://${epoxyaddress}/v1/boot/${hostname}/stage1.ipxe
echo Fetching stage1 script.
goto firstfetch

:retry_loop iseq ${retry_delay_s} ${max_retry_delay_s} && goto fetch_timeout_local_boot ||
:retry_loop iseq ${retry_delay_s} ${max_retry_delay_s} && goto fetch_timeout_reboot ||
inc retry_delay_s ${retry_delay_s}
echo Sleeping ${retry_delay_s} seconds...
sleep ${retry_delay_s}
Expand Down Expand Up @@ -121,7 +120,7 @@ set stage1_url https://${epoxyaddress}/v1/boot/${hostname}/stage1.ipxe


:localboot
:fetch_timeout_local_boot
echo Sleeping 10 seconds and booting from local hard drive.
:fetch_timeout_reboot
echo Sleeping 10 seconds and rebooting system to retry.
sleep 10
sanboot --no-describe --drive 0x80 || shell
reboot || shell

0 comments on commit 1498bcf

Please sign in to comment.