diff --git a/services/forgejo-nsc/internal/nsc/macos_nsc.go b/services/forgejo-nsc/internal/nsc/macos_nsc.go index 26cbab0..a337572 100644 --- a/services/forgejo-nsc/internal/nsc/macos_nsc.go +++ b/services/forgejo-nsc/internal/nsc/macos_nsc.go @@ -363,6 +363,15 @@ func (d *Dispatcher) runMacOSNSCSSHScript(ctx context.Context, runnerName, insta if errors.Is(sshCtx.Err(), context.DeadlineExceeded) { return fmt.Errorf("nsc ssh timed out after %s\n%s", 5*time.Minute, strings.TrimSpace(buf.String())) } + if nscSSHBootstrapLikelySucceeded(err, buf.String()) { + d.log.Warn("nsc ssh exited after runner handoff; treating bootstrap as successful", + "runner", runnerName, + "instance", instanceID, + "err", err, + ) + d.log.Info("macos runner bootstrap completed via nsc ssh", "runner", runnerName, "instance", instanceID) + return nil + } return fmt.Errorf("nsc ssh runner bootstrap failed: %w\n%s", err, strings.TrimSpace(buf.String())) } @@ -370,6 +379,22 @@ func (d *Dispatcher) runMacOSNSCSSHScript(ctx context.Context, runnerName, insta return nil } +func nscSSHBootstrapLikelySucceeded(err error, output string) bool { + if err == nil { + return false + } + + errText := strings.ToLower(err.Error()) + if !strings.Contains(errText, "remote command exited without exit status or exit signal") { + return false + } + + output = strings.ToLower(output) + return strings.Contains(output, "runner registered successfully") && + strings.Contains(output, "starting job") && + strings.Contains(output, "task ") +} + func prependNSCRegionArgs(args []string, computeBaseURL string) []string { region := strings.TrimSpace(os.Getenv("NSC_REGION")) if region == "" { diff --git a/services/forgejo-nsc/internal/nsc/macos_nsc_test.go b/services/forgejo-nsc/internal/nsc/macos_nsc_test.go new file mode 100644 index 0000000..7e5c5fa --- /dev/null +++ b/services/forgejo-nsc/internal/nsc/macos_nsc_test.go @@ -0,0 +1,47 @@ +package nsc + +import ( + "errors" + "testing" +) + +func TestNSCSSHBootstrapLikelySucceeded(t *testing.T) { + t.Parallel() + + err := errors.New("wait: remote command exited without exit status or exit signal") + output := ` +level=info msg="Runner registered successfully." +time="2026-03-19T11:29:49Z" level=info msg="Starting job" +time="2026-03-19T11:29:50Z" level=info msg="task 124 repo is hackclub/burrow" +` + + if !nscSSHBootstrapLikelySucceeded(err, output) { + t.Fatal("expected handoff success heuristic to match") + } +} + +func TestNSCSSHBootstrapLikelySucceededRejectsIncompleteOutput(t *testing.T) { + t.Parallel() + + err := errors.New("wait: remote command exited without exit status or exit signal") + output := `level=info msg="Runner registered successfully."` + + if nscSSHBootstrapLikelySucceeded(err, output) { + t.Fatal("expected incomplete runner output to remain a failure") + } +} + +func TestNSCSSHBootstrapLikelySucceededRejectsDifferentErrors(t *testing.T) { + t.Parallel() + + err := errors.New("exit status 1") + output := ` +level=info msg="Runner registered successfully." +time="2026-03-19T11:29:49Z" level=info msg="Starting job" +time="2026-03-19T11:29:50Z" level=info msg="task 124 repo is hackclub/burrow" +` + + if nscSSHBootstrapLikelySucceeded(err, output) { + t.Fatal("expected unrelated nsc ssh errors to remain failures") + } +}