Add Forgejo namespace workflow stack
Some checks are pending
Build Rust / Cargo Test (push) Waiting to run
Build Site / Next.js Build (push) Waiting to run

This commit is contained in:
Conrad Kramer 2026-03-18 02:49:55 -07:00
parent 482fd5d085
commit 865b676c99
68 changed files with 9709 additions and 11 deletions

View file

@ -0,0 +1,460 @@
package nsc
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"os/exec"
"strings"
"time"
"github.com/google/uuid"
"golang.org/x/sync/semaphore"
)
type Options struct {
BinaryPath string
DefaultImage string
DefaultMachine string
DefaultDuration time.Duration
WorkDir string
MaxParallel int64
RunnerNamePrefix string
Executor string
Network string
ComputeBaseURL string
MacosBaseImageID string
MacosMachineArch string
Logger *slog.Logger
}
type LaunchRequest struct {
Token string
InstanceURL string
Labels []string
Duration time.Duration
MachineType string
Image string
ExtraEnv map[string]string
}
type Dispatcher struct {
opts Options
sem *semaphore.Weighted
log *slog.Logger
}
func NewDispatcher(opts Options) (*Dispatcher, error) {
if opts.BinaryPath == "" {
return nil, errors.New("nsc binary path is required")
}
if opts.DefaultImage == "" {
return nil, errors.New("default Namespace runner image is required")
}
if opts.RunnerNamePrefix == "" {
opts.RunnerNamePrefix = "nscloud-"
}
if opts.Executor == "" {
opts.Executor = "shell"
}
if opts.MacosBaseImageID == "" {
opts.MacosBaseImageID = "tahoe"
}
if opts.MacosMachineArch == "" {
opts.MacosMachineArch = "arm64"
}
if opts.MaxParallel <= 0 {
opts.MaxParallel = 4
}
if opts.DefaultDuration == 0 {
opts.DefaultDuration = 30 * time.Minute
}
logger := opts.Logger
if logger == nil {
logger = slog.New(slog.NewTextHandler(io.Discard, nil))
}
return &Dispatcher{
opts: opts,
sem: semaphore.NewWeighted(opts.MaxParallel),
log: logger,
}, nil
}
func (d *Dispatcher) LaunchRunner(ctx context.Context, req LaunchRequest) (string, error) {
if req.Token == "" {
return "", errors.New("registration token is required")
}
if req.InstanceURL == "" {
return "", errors.New("forgejo instance url is required")
}
if err := d.sem.Acquire(ctx, 1); err != nil {
return "", err
}
defer d.sem.Release(1)
runnerName := d.generateName()
duration := req.Duration
if duration == 0 {
duration = d.opts.DefaultDuration
}
machineType := choose(req.MachineType, d.opts.DefaultMachine)
image := choose(req.Image, d.opts.DefaultImage)
if hasWindowsLabel(req.Labels) {
if err := d.launchWindowsRunnerViaWinRM(ctx, runnerName, req, duration, machineType); err != nil {
return "", err
}
return runnerName, nil
}
if hasMacOSLabel(req.Labels) {
// Compute macOS shapes differ from the Linux "run" defaults. If the request
// didn't specify a machine type, ensure we pick a macOS-valid default.
if machineType == "" || machineType == d.opts.DefaultMachine {
machineType = "12x28"
}
// Prefer the Compute API path because it uses the service token (NSC_TOKEN_FILE)
// and does not require an interactive `nsc login` session.
if err := d.launchMacOSRunner(ctx, runnerName, req, duration, machineType); err != nil {
d.log.Warn("macos compute launch failed; falling back to nsc create+ssh", "runner", runnerName, "err", err)
if err := d.launchMacOSRunnerViaNSC(ctx, runnerName, req, duration, machineType); err != nil {
return "", err
}
}
return runnerName, nil
}
env := map[string]string{
"FORGEJO_INSTANCE_URL": req.InstanceURL,
"FORGEJO_RUNNER_TOKEN": req.Token,
"FORGEJO_RUNNER_NAME": runnerName,
"FORGEJO_RUNNER_LABELS": strings.Join(req.Labels, ","),
"FORGEJO_RUNNER_EXEC": d.opts.Executor,
}
for k, v := range req.ExtraEnv {
env[k] = v
}
if _, ok := env["NSC_CACHE_PATH"]; !ok {
env["NSC_CACHE_PATH"] = "/nix/store"
}
script := d.bootstrapScript()
args := []string{
"run",
"--wait",
"--output",
"json",
"--duration", duration.String(),
"--image", image,
"--name", runnerName,
"--user", "root",
}
if machineType != "" {
args = append(args, "--machine_type", machineType)
}
if d.opts.Network != "" {
args = append(args, "--network", d.opts.Network)
}
for key, value := range env {
if value == "" {
continue
}
args = append(args, "-e", fmt.Sprintf("%s=%s", key, value))
}
if d.opts.WorkDir != "" {
args = append(args, "-e", fmt.Sprintf("FORGEJO_RUNNER_WORKDIR=%s", d.opts.WorkDir))
}
args = append(args, "--", "/bin/sh", "-c", script)
cmd := exec.CommandContext(ctx, d.opts.BinaryPath, args...)
var buf bytes.Buffer
cmd.Stdout = &buf
cmd.Stderr = &buf
start := time.Now()
d.log.Info("launching Namespace runner",
"runner", runnerName,
"machine_type", machineType,
"image", image,
)
err := cmd.Run()
if err != nil {
return "", fmt.Errorf("nsc run failed: %w\n%s", err, buf.String())
}
if output := strings.TrimSpace(buf.String()); output != "" {
d.log.Info("runner output", "runner", runnerName, "output", output)
}
d.log.Info("runner completed",
"runner", runnerName,
"duration", time.Since(start),
)
if instanceID := parseInstanceID(buf.String()); instanceID != "" {
waitCtx, cancel := context.WithTimeout(context.Background(), duration)
defer cancel()
stopped := d.waitForInstanceStop(waitCtx, runnerName, instanceID, duration)
if !stopped {
d.log.Warn("runner did not stop before timeout", "runner", runnerName, "instance", instanceID)
}
d.destroyInstance(waitCtx, runnerName, instanceID)
}
return runnerName, nil
}
func (d *Dispatcher) generateName() string {
id := strings.ReplaceAll(uuid.NewString(), "-", "")
return d.opts.RunnerNamePrefix + id[:12]
}
func parseInstanceID(output string) string {
if jsonBlob := extractJSON(output); jsonBlob != "" {
var payload struct {
ClusterID string `json:"cluster_id"`
}
if err := json.Unmarshal([]byte(jsonBlob), &payload); err == nil && payload.ClusterID != "" {
return payload.ClusterID
}
}
const marker = "ID:"
idx := strings.Index(output, marker)
if idx == -1 {
return ""
}
rest := strings.TrimSpace(output[idx+len(marker):])
if rest == "" {
return ""
}
fields := strings.Fields(rest)
if len(fields) == 0 {
return ""
}
return fields[0]
}
func extractJSON(output string) string {
trimmed := strings.TrimSpace(output)
if trimmed == "" {
return ""
}
start := strings.IndexAny(trimmed, "[{")
if start == -1 {
return ""
}
end := strings.LastIndexAny(trimmed, "]}")
if end == -1 || end < start {
return ""
}
return trimmed[start : end+1]
}
type describeResponse struct {
Resource string `json:"resource"`
PerResource map[string]describeTarget `json:"per_resource"`
}
type describeTarget struct {
Tombstone string `json:"tombstone"`
Container []describeContainer `json:"container"`
}
type describeContainer struct {
Status string `json:"status"`
TerminatedAt string `json:"terminated_at"`
}
func instanceStopped(output string) bool {
jsonBlob := extractJSON(output)
if jsonBlob == "" {
return false
}
var payload []describeResponse
if err := json.Unmarshal([]byte(jsonBlob), &payload); err != nil {
return false
}
if len(payload) == 0 {
return false
}
for _, entry := range payload {
for _, target := range entry.PerResource {
if target.Tombstone != "" {
return true
}
if len(target.Container) == 0 {
continue
}
for _, container := range target.Container {
if container.Status != "stopped" && container.TerminatedAt == "" {
return false
}
}
}
}
return true
}
func (d *Dispatcher) waitForInstanceStop(ctx context.Context, runnerName, instanceID string, timeout time.Duration) bool {
if timeout <= 0 {
timeout = d.opts.DefaultDuration
}
deadline := time.Now().Add(timeout)
ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()
for {
stopped, err := d.checkInstanceStopped(ctx, instanceID)
if err != nil {
d.log.Warn("runner stop check failed", "runner", runnerName, "instance", instanceID, "err", err)
return false
}
if stopped {
return true
}
if time.Now().After(deadline) {
return false
}
select {
case <-ctx.Done():
return false
case <-ticker.C:
}
}
}
func (d *Dispatcher) checkInstanceStopped(ctx context.Context, instanceID string) (bool, error) {
cmd := exec.CommandContext(ctx, d.opts.BinaryPath, "describe", "--output", "json", instanceID)
var buf bytes.Buffer
cmd.Stdout = &buf
cmd.Stderr = &buf
if err := cmd.Run(); err != nil {
output := strings.ToLower(buf.String())
if strings.Contains(output, "destroyed") || strings.Contains(output, "not found") {
return true, nil
}
return false, fmt.Errorf("nsc describe failed: %w\n%s", err, strings.TrimSpace(buf.String()))
}
return instanceStopped(buf.String()), nil
}
func (d *Dispatcher) destroyInstance(ctx context.Context, runnerName, instanceID string) {
cmd := exec.CommandContext(ctx, d.opts.BinaryPath, "destroy", "--force", instanceID)
var buf bytes.Buffer
cmd.Stdout = &buf
cmd.Stderr = &buf
if err := cmd.Run(); err != nil {
d.log.Warn("runner destroy failed", "runner", runnerName, "instance", instanceID, "err", err, "output", strings.TrimSpace(buf.String()))
return
}
if output := strings.TrimSpace(buf.String()); output != "" {
d.log.Info("runner destroyed", "runner", runnerName, "instance", instanceID, "output", output)
} else {
d.log.Info("runner destroyed", "runner", runnerName, "instance", instanceID)
}
}
func choose(values ...string) string {
for _, v := range values {
if strings.TrimSpace(v) != "" {
return v
}
}
return ""
}
func (d *Dispatcher) bootstrapScript() string {
var builder strings.Builder
builder.WriteString(`set -euo pipefail
mkdir -p "${FORGEJO_RUNNER_WORKDIR:-/tmp/forgejo-runner}"
cd "${FORGEJO_RUNNER_WORKDIR:-/tmp/forgejo-runner}"
if ! command -v node >/dev/null 2>&1; then
apk add --no-cache nodejs npm >/dev/null
fi
if ! command -v sudo >/dev/null 2>&1; then
apk add --no-cache sudo bash >/dev/null
fi
if ! command -v curl >/dev/null 2>&1; then
apk add --no-cache curl >/dev/null
fi
if ! command -v xz >/dev/null 2>&1; then
apk add --no-cache xz >/dev/null
fi
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
node --version >/dev/null
cat > runner.yaml <<'EOF'
log:
level: info
runner:
file: .runner
capacity: 1
name: ${FORGEJO_RUNNER_NAME}
labels:
EOF
`)
builder.WriteString(`runner_exec="${FORGEJO_RUNNER_EXEC:-host}"
if [ "$runner_exec" = "shell" ]; then
runner_exec="host"
fi
resolved_labels=""
for label in ${FORGEJO_RUNNER_LABELS//,/ } ; do
if [ -z "${label}" ]; then
continue
fi
case "${label}" in
*:*) resolved="${label}" ;;
*)
if [ "$runner_exec" = "host" ]; then
resolved="${label}:host"
else
resolved="${label}:${runner_exec}"
fi
;;
esac
echo " - ${resolved}" >> runner.yaml
if [ -z "${resolved_labels}" ]; then
resolved_labels="${resolved}"
else
resolved_labels="${resolved_labels},${resolved}"
fi
done
`)
builder.WriteString(`cat >> runner.yaml <<'EOF'
cache:
enabled: false
EOF
forgejo-runner register \
--no-interactive \
--instance "${FORGEJO_INSTANCE_URL}" \
--token "${FORGEJO_RUNNER_TOKEN}" \
--name "${FORGEJO_RUNNER_NAME}" \
--labels "${resolved_labels}" \
--config runner.yaml
runner_mode="${FORGEJO_RUNNER_MODE:-one-job}"
case "$runner_mode" in
one-job)
forgejo-runner one-job --config runner.yaml
;;
daemon)
forgejo-runner daemon --config runner.yaml
;;
*)
echo "Unknown FORGEJO_RUNNER_MODE: ${runner_mode}" >&2
exit 1
;;
esac
`)
return builder.String()
}

View file

@ -0,0 +1,708 @@
package nsc
import (
"bytes"
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/http"
"os"
"strconv"
"strings"
"sync"
"time"
computev1betaconnect "buf.build/gen/go/namespace/cloud/connectrpc/go/proto/namespace/cloud/compute/v1beta/computev1betaconnect"
computev1beta "buf.build/gen/go/namespace/cloud/protocolbuffers/go/proto/namespace/cloud/compute/v1beta"
stdlib "buf.build/gen/go/namespace/cloud/protocolbuffers/go/proto/namespace/stdlib"
"connectrpc.com/connect"
"golang.org/x/crypto/ssh"
"google.golang.org/protobuf/types/known/timestamppb"
)
func hasMacOSLabel(labels []string) bool {
for _, label := range labels {
l := strings.TrimSpace(label)
if l == "" {
continue
}
if strings.HasPrefix(l, "namespace-profile-macos-") {
return true
}
}
return false
}
type lockedBuffer struct {
mu sync.Mutex
b bytes.Buffer
}
func (lb *lockedBuffer) Write(p []byte) (int, error) {
lb.mu.Lock()
defer lb.mu.Unlock()
return lb.b.Write(p)
}
func (lb *lockedBuffer) Len() int {
lb.mu.Lock()
defer lb.mu.Unlock()
return lb.b.Len()
}
func (lb *lockedBuffer) String() string {
lb.mu.Lock()
defer lb.mu.Unlock()
return lb.b.String()
}
func macosSupportDiskSelectors(baseImageID string) []*stdlib.Label {
id := strings.TrimSpace(baseImageID)
if id == "" {
id = "tahoe"
}
// Allow specifying selectors directly, e.g. "macos.version=26.x,image.with=xcode-26".
if strings.Contains(id, "=") {
var out []*stdlib.Label
for _, part := range strings.Split(id, ",") {
part = strings.TrimSpace(part)
if part == "" {
continue
}
name, value, ok := strings.Cut(part, "=")
name = strings.TrimSpace(name)
value = strings.TrimSpace(value)
if !ok || name == "" || value == "" {
continue
}
out = append(out, &stdlib.Label{Name: name, Value: value})
}
if len(out) > 0 {
return out
}
}
// Human-friendly presets used by burrow config.
switch strings.ToLower(id) {
case "sonoma", "macos-14", "macos14", "14":
return []*stdlib.Label{{Name: "macos.version", Value: "14.x"}}
case "sequoia", "macos-15", "macos15", "15":
return []*stdlib.Label{{Name: "macos.version", Value: "15.x"}}
case "tahoe", "macos-26", "macos26", "26":
// Constrain to the Xcode 26 support disk explicitly, since Apple builds
// depend on Xcode being present and Compute currently errors if it can't
// resolve a support disk selection.
return []*stdlib.Label{{Name: "macos.version", Value: "26.x"}, {Name: "image.with", Value: "xcode-26"}}
default:
return []*stdlib.Label{{Name: "macos.version", Value: "26.x"}}
}
}
func macosComputeBaseImageID(baseImageID string) string {
id := strings.TrimSpace(baseImageID)
if id == "" {
return "tahoe"
}
// If selectors were provided directly, we cannot safely infer a canonical
// base image ID from them.
if strings.Contains(id, "=") {
return ""
}
switch strings.ToLower(id) {
case "sonoma", "macos-14", "macos14", "14":
return "sonoma"
case "sequoia", "macos-15", "macos15", "15":
return "sequoia"
case "tahoe", "macos-26", "macos26", "26":
return "tahoe"
default:
return id
}
}
type nscBearerTokenFile struct {
BearerToken string `json:"bearer_token"`
}
func readNSCBearerToken() (string, error) {
path := os.Getenv("NSC_TOKEN_FILE")
if path == "" {
return "", errors.New("NSC_TOKEN_FILE is required for macos runners")
}
raw, err := os.ReadFile(path)
if err != nil {
return "", fmt.Errorf("read NSC_TOKEN_FILE: %w", err)
}
trimmed := strings.TrimSpace(string(raw))
if trimmed == "" {
return "", errors.New("NSC_TOKEN_FILE is empty")
}
// Support the on-host format used by burrow: {"bearer_token":"..."}.
var parsed nscBearerTokenFile
if err := json.Unmarshal([]byte(trimmed), &parsed); err == nil && parsed.BearerToken != "" {
return parsed.BearerToken, nil
}
// Fallback: allow a raw bearer token.
return trimmed, nil
}
func parseMachineTypeCPUxMemGB(machineType string) (vcpu int32, memoryMB int32, err error) {
parts := strings.Split(machineType, "x")
if len(parts) != 2 {
return 0, 0, fmt.Errorf("invalid machine_type %q: expected CPUxMemoryGB (e.g. 12x28)", machineType)
}
cpu64, err := strconv.ParseInt(parts[0], 10, 32)
if err != nil {
return 0, 0, fmt.Errorf("invalid machine_type %q: cpu: %w", machineType, err)
}
memGB64, err := strconv.ParseInt(parts[1], 10, 32)
if err != nil {
return 0, 0, fmt.Errorf("invalid machine_type %q: memory: %w", machineType, err)
}
return int32(cpu64), int32(memGB64 * 1024), nil
}
func (d *Dispatcher) launchMacOSRunner(ctx context.Context, runnerName string, req LaunchRequest, ttl time.Duration, machineType string) error {
if machineType == "" {
return errors.New("machine_type is required for macos runners")
}
vcpu, memoryMB, err := parseMachineTypeCPUxMemGB(machineType)
if err != nil {
return err
}
bearer, err := readNSCBearerToken()
if err != nil {
return err
}
httpClient := &http.Client{Timeout: 60 * time.Second}
client := computev1betaconnect.NewComputeServiceClient(httpClient, d.opts.ComputeBaseURL)
workdir := d.opts.WorkDir
if strings.TrimSpace(workdir) == "" {
workdir = "/tmp/forgejo-runner"
}
env := map[string]string{
"FORGEJO_INSTANCE_URL": req.InstanceURL,
"FORGEJO_RUNNER_TOKEN": req.Token,
"FORGEJO_RUNNER_NAME": runnerName,
"FORGEJO_RUNNER_LABELS": strings.Join(req.Labels, ","),
"FORGEJO_RUNNER_EXEC": d.opts.Executor,
"FORGEJO_RUNNER_WORKDIR": workdir,
}
for k, v := range req.ExtraEnv {
env[k] = v
}
// Best-effort caching: workflows call Scripts/nscloud-cache.sh, which is a
// no-op unless NSC_CACHE_PATH is set. This may still be skipped if spacectl
// lacks credentials, but setting the path is harmless and keeps behavior
// consistent across macOS / Linux runners.
if _, ok := env["NSC_CACHE_PATH"]; !ok {
env["NSC_CACHE_PATH"] = "/Users/runner/.cache/nscloud"
}
deadline := timestamppb.New(time.Now().Add(ttl))
createReq := &computev1beta.CreateInstanceRequest{
Shape: &computev1beta.InstanceShape{
VirtualCpu: vcpu,
MemoryMegabytes: memoryMB,
MachineArch: d.opts.MacosMachineArch,
Os: "macos",
// Namespace macOS compute requires selectors to pick the base image
// ("support disk"), otherwise instance creation fails.
Selectors: macosSupportDiskSelectors(d.opts.MacosBaseImageID),
},
DocumentedPurpose: fmt.Sprintf("burrow forgejo runner %s", runnerName),
Deadline: deadline,
Labels: []*stdlib.Label{
{Name: "nsc.source", Value: "forgejo-nsc"},
{Name: "burrow.service", Value: "forgejo-runner"},
{Name: "burrow.runner", Value: runnerName},
},
Applications: []*computev1beta.ApplicationRequest{
{
Name: "forgejo-runner",
Command: "/bin/bash",
Args: []string{"-lc", macosBootstrapScript()},
Environment: env,
WorkloadType: computev1beta.ApplicationRequest_JOB,
},
},
}
if imageID := macosComputeBaseImageID(d.opts.MacosBaseImageID); imageID != "" {
createReq.Experimental = &computev1beta.CreateInstanceRequest_ExperimentalFeatures{
MacosBaseImageId: imageID,
}
}
d.log.Info("launching Namespace macos runner",
"runner", runnerName,
"compute_base_url", d.opts.ComputeBaseURL,
"macos_base_image_id", d.opts.MacosBaseImageID,
"shape", fmt.Sprintf("%dx%d", vcpu, memoryMB/1024),
"arch", d.opts.MacosMachineArch,
)
reqCreate := connect.NewRequest(createReq)
reqCreate.Header().Set("Authorization", "Bearer "+bearer)
resp, err := client.CreateInstance(ctx, reqCreate)
if err != nil {
return fmt.Errorf("compute create instance failed: %w", err)
}
if resp.Msg == nil || resp.Msg.Metadata == nil {
return errors.New("compute create instance returned no metadata")
}
instanceID := resp.Msg.Metadata.InstanceId
waitErr := d.waitForMacOSRunnerStop(ctx, client, bearer, runnerName, instanceID, ttl)
d.destroyComputeInstance(context.Background(), client, bearer, runnerName, instanceID)
return waitErr
}
func (d *Dispatcher) runMacOSComputeSSHScript(ctx context.Context, runnerName, instanceID, script string) error {
bearer, err := readNSCBearerToken()
if err != nil {
return err
}
httpClient := &http.Client{Timeout: 60 * time.Second}
client := computev1betaconnect.NewComputeServiceClient(httpClient, d.opts.ComputeBaseURL)
getReq := connect.NewRequest(&computev1beta.GetSSHConfigRequest{
InstanceId: instanceID,
// TargetContainer is optional. Keep it empty to run commands in the default instance environment.
})
getReq.Header().Set("Authorization", "Bearer "+bearer)
resp, err := client.GetSSHConfig(ctx, getReq)
if err != nil {
return fmt.Errorf("compute get ssh config failed: %w", err)
}
if resp.Msg == nil {
return errors.New("compute get ssh config returned empty response")
}
if resp.Msg.Endpoint == "" {
return errors.New("compute get ssh config returned empty endpoint")
}
if len(resp.Msg.SshPrivateKey) == 0 {
return errors.New("compute get ssh config returned empty ssh private key")
}
if strings.TrimSpace(resp.Msg.Username) == "" {
return errors.New("compute get ssh config returned empty username")
}
signer, err := ssh.ParsePrivateKey(resp.Msg.SshPrivateKey)
if err != nil {
return fmt.Errorf("parse ssh private key: %w", err)
}
addr := fmt.Sprintf("%s:22", resp.Msg.Endpoint)
conn, err := net.Dial("tcp", addr)
if err != nil {
return fmt.Errorf("dial ssh endpoint: %w", err)
}
defer conn.Close()
sshCfg := &ssh.ClientConfig{
User: resp.Msg.Username,
Auth: []ssh.AuthMethod{ssh.PublicKeys(signer)},
HostKeyCallback: ssh.InsecureIgnoreHostKey(), // Endpoint is short-lived and key is delivered out-of-band.
Timeout: 30 * time.Second,
}
c, chans, reqs, err := ssh.NewClientConn(conn, addr, sshCfg)
if err != nil {
return fmt.Errorf("ssh client conn: %w", err)
}
clientSSH := ssh.NewClient(c, chans, reqs)
defer clientSSH.Close()
session, err := clientSSH.NewSession()
if err != nil {
return fmt.Errorf("ssh new session: %w", err)
}
defer session.Close()
var buf bytes.Buffer
session.Stdout = &buf
session.Stderr = &buf
session.Stdin = strings.NewReader(script)
// Feed the bootstrap script via stdin so we don't need to quote/escape it.
//
// Note: Some SSH servers do not reliably parse exec strings with arguments.
// Running bare `/bin/bash` still reads from stdin and avoids argument parsing.
if err := session.Run("/bin/bash"); err != nil {
outRaw := buf.String()
out := strings.TrimSpace(outRaw)
// Some SSH servers reject exec requests and only allow interactive shells,
// and others will "succeed" but still interpret stdin under the default
// login shell (showing the zsh banner / prompts).
//
// In those cases, retry via Shell() with a PTY.
exitStatus := 0
exitErr, isExitErr := err.(*ssh.ExitError)
if isExitErr {
exitStatus = exitErr.ExitStatus()
}
looksInteractive := strings.Contains(outRaw, "The default interactive shell is now zsh") ||
strings.Contains(outRaw, " runner$ ") ||
strings.Contains(outRaw, "bash-3.2$")
shouldFallback := !isExitErr || looksInteractive
if shouldFallback {
d.log.Warn("compute ssh exec bootstrap failed; retrying via interactive shell",
"runner", runnerName,
"instance", instanceID,
"exit_status", exitStatus,
)
session2, err2 := clientSSH.NewSession()
if err2 != nil {
return fmt.Errorf("ssh new session (fallback): %w", err2)
}
defer session2.Close()
// bytes.Buffer isn't safe for concurrent writes + reads; the SSH session
// writes from background goroutines. Wrap it so we can poll for a prompt
// before sending commands.
lb := &lockedBuffer{}
session2.Stdout = lb
session2.Stderr = lb
in, err2 := session2.StdinPipe()
if err2 != nil {
return fmt.Errorf("ssh stdin pipe (fallback): %w", err2)
}
// Request a PTY to match interactive semantics even when the caller
// doesn't have a local terminal.
_ = session2.RequestPty("xterm", 24, 80, nil)
if err2 := session2.Shell(); err2 != nil {
return fmt.Errorf("ssh shell (fallback): %w", err2)
}
// Wait briefly for the prompt/banner so the first command isn't dropped.
// We also emit a sentinel `echo` to verify the TTY is live.
deadline := time.Now().Add(3 * time.Second)
for time.Now().Before(deadline) {
n := lb.Len()
if n > 0 {
break
}
time.Sleep(50 * time.Millisecond)
}
// Stream the script then exit. Prefer LF line endings; macOS shells and
// PTYs can treat CRLF as literal CR characters (breaking heredoc
// delimiters and quoting).
writeTTY := func(s string) {
if s == "" {
return
}
s = strings.ReplaceAll(s, "\r\n", "\n")
_, _ = io.WriteString(in, s)
}
scriptTTY := strings.ReplaceAll(script, "\r\n", "\n")
// Cut down noise in logs and reduce the chance of ZSH line-editing
// behavior corrupting long inputs.
writeTTY("stty -echo 2>/dev/null || true\n")
writeTTY("echo BURROW_BOOTSTRAP_TTY_OK\n")
// Avoid heredocs for the script itself (PTY newline handling is fragile).
// Instead, stream base64 in short chunks to a file, then decode and run it.
enc := base64.StdEncoding.EncodeToString([]byte(scriptTTY))
idSafe := strings.ReplaceAll(instanceID, "-", "_")
b64Path := "/tmp/burrow-bootstrap-" + idSafe + ".b64"
shPath := "/tmp/burrow-bootstrap-" + idSafe + ".sh"
writeTTY("rm -f " + b64Path + " " + shPath + "\n")
writeTTY(": > " + b64Path + "\n")
const chunkSize = 80
for i := 0; i < len(enc); i += chunkSize {
j := i + chunkSize
if j > len(enc) {
j = len(enc)
}
chunk := enc[i:j]
// Base64 chunks contain only [A-Za-z0-9+/=], which are safe to pass
// unquoted. Avoid quotes entirely so a truncated line can't leave
// the remote shell in a multi-line continuation state.
writeTTY("printf %s " + chunk + " >> " + b64Path + "\n")
time.Sleep(5 * time.Millisecond)
}
// macOS uses `base64 -D` (BSD), some environments use `-d` (GNU).
writeTTY("base64 -D " + b64Path + " > " + shPath + " 2>/dev/null || base64 -d " + b64Path + " > " + shPath + "\n")
writeTTY("/bin/bash " + shPath + "\n")
writeTTY("exit\n")
_ = in.Close()
if err2 := session2.Wait(); err2 != nil {
out2 := strings.TrimSpace(lb.String())
if len(out2) > 16*1024 {
out2 = out2[len(out2)-16*1024:]
}
return fmt.Errorf("compute ssh runner bootstrap failed (shell fallback): %w\n%s", err2, out2)
}
d.log.Info("macos runner bootstrap completed via compute ssh shell", "runner", runnerName, "instance", instanceID)
return nil
}
if len(out) > 16*1024 {
out = out[len(out)-16*1024:]
}
return fmt.Errorf("compute ssh runner bootstrap failed: %w\n%s", err, out)
}
d.log.Info("macos runner bootstrap completed via compute ssh", "runner", runnerName, "instance", instanceID)
return nil
}
func (d *Dispatcher) waitForMacOSRunnerStop(ctx context.Context, client computev1betaconnect.ComputeServiceClient, bearer, runnerName, instanceID string, ttl time.Duration) error {
if ttl <= 0 {
ttl = d.opts.DefaultDuration
}
deadline := time.Now().Add(ttl)
ticker := time.NewTicker(15 * time.Second)
defer ticker.Stop()
for {
stopped, err := d.checkComputeInstanceStopped(ctx, client, bearer, instanceID)
if err != nil {
d.log.Warn("macos runner stop check failed", "runner", runnerName, "instance", instanceID, "err", err)
} else if stopped {
return nil
}
if time.Now().After(deadline) {
return fmt.Errorf("macos runner exceeded ttl (%s) without stopping", ttl)
}
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
}
}
}
func (d *Dispatcher) checkComputeInstanceStopped(ctx context.Context, client computev1betaconnect.ComputeServiceClient, bearer, instanceID string) (bool, error) {
describeReq := connect.NewRequest(&computev1beta.DescribeInstanceRequest{InstanceId: instanceID})
describeReq.Header().Set("Authorization", "Bearer "+bearer)
resp, err := client.DescribeInstance(ctx, describeReq)
if err != nil {
// NotFound means the instance is already gone.
if connect.CodeOf(err) == connect.CodeNotFound {
return true, nil
}
return false, err
}
if resp.Msg == nil || resp.Msg.Metadata == nil {
return false, errors.New("describe instance returned no metadata")
}
switch resp.Msg.Metadata.Status {
case computev1beta.InstanceMetadata_DESTROYED:
return true, nil
case computev1beta.InstanceMetadata_ERROR:
// Best-effort include shutdown reasons; do not include unbounded output.
var b strings.Builder
for _, reason := range resp.Msg.ShutdownReasons {
if reason == nil {
continue
}
if b.Len() > 0 {
b.WriteString("; ")
}
b.WriteString(reason.String())
if b.Len() > 1024 {
break
}
}
msg := strings.TrimSpace(b.String())
if msg == "" {
msg = "unknown shutdown reason"
}
return true, fmt.Errorf("instance entered error state: %s", msg)
default:
if resp.Msg.Metadata.DestroyedAt != nil {
return true, nil
}
return false, nil
}
}
func (d *Dispatcher) destroyComputeInstance(ctx context.Context, client computev1betaconnect.ComputeServiceClient, bearer, runnerName, instanceID string) {
if ctx == nil {
ctx = context.Background()
}
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
destroyReq := connect.NewRequest(&computev1beta.DestroyInstanceRequest{InstanceId: instanceID})
destroyReq.Header().Set("Authorization", "Bearer "+bearer)
if _, err := client.DestroyInstance(ctx, destroyReq); err != nil {
if connect.CodeOf(err) == connect.CodeNotFound {
d.log.Info("macos runner destroyed", "runner", runnerName, "instance", instanceID, "status", "not_found")
return
}
d.log.Warn("macos runner destroy failed", "runner", runnerName, "instance", instanceID, "err", err)
return
}
d.log.Info("macos runner destroyed", "runner", runnerName, "instance", instanceID)
}
func macosBootstrapScript() string {
// Keep this script self-contained: it runs on a fresh macOS VM base image.
var b strings.Builder
b.WriteString(`set -euo pipefail
workdir="${FORGEJO_RUNNER_WORKDIR:-/tmp/forgejo-runner}"
mkdir -p "${workdir}"
cd "${workdir}"
export PATH="/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin:/usr/sbin:/sbin:${PATH}"
if ! command -v curl >/dev/null 2>&1; then
echo "curl is required" >&2
exit 1
fi
if ! command -v nix >/dev/null 2>&1; then
echo "Installing nix (Determinate Systems installer)..."
installer="/tmp/nix-installer.$$"
curl -fsSL -o "${installer}" https://install.determinate.systems/nix
chmod +x "${installer}"
if command -v sudo >/dev/null 2>&1; then
if sudo -n true 2>/dev/null; then
sudo -n sh "${installer}" install --no-confirm
else
sudo sh "${installer}" install --no-confirm
fi
else
sh "${installer}" install --no-confirm
fi
rm -f "${installer}"
fi
if [[ -f /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh ]]; then
# shellcheck disable=SC1091
. /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh
fi
export PATH="/nix/var/nix/profiles/default/bin:/nix/var/nix/profiles/default/sbin:${PATH}"
# Flake builds need nix-command + flakes enabled. Workflows may layer additional
# config, but ensure a sane default exists.
mkdir -p "${XDG_CONFIG_HOME:-$HOME/.config}/nix"
cat > "${XDG_CONFIG_HOME:-$HOME/.config}/nix/nix.conf" <<'EOF'
experimental-features = nix-command flakes
sandbox = true
fallback = true
substituters = https://cache.nixos.org
trusted-public-keys = cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=
EOF
mkdir -p bin
export PATH="${PWD}/bin:${PATH}"
runner_version="v12.6.4"
runner_src_tgz="forgejo-runner-${runner_version}.tar.gz"
runner_src_url="https://code.forgejo.org/forgejo/runner/archive/${runner_version}.tar.gz"
runner_src_dir="forgejo-runner-src"
if ! command -v forgejo-runner >/dev/null 2>&1; then
rm -rf "${runner_src_dir}"
mkdir -p "${runner_src_dir}"
curl -fsSL "${runner_src_url}" -o "${runner_src_tgz}"
tar -xzf "${runner_src_tgz}" -C "${runner_src_dir}" --strip-components=1
toolchain="$(grep -E '^toolchain ' "${runner_src_dir}/go.mod" | awk '{print $2}' | head -n 1 || true)"
if [ -z "${toolchain}" ]; then
toolchain="go1.25.7"
fi
if ! command -v go >/dev/null 2>&1; then
go_tgz="${toolchain}.darwin-arm64.tar.gz"
go_url="https://go.dev/dl/${go_tgz}"
curl -fsSL "${go_url}" -o "${go_tgz}"
tar -xzf "${go_tgz}"
export GOROOT="${PWD}/go"
export PATH="${GOROOT}/bin:${PATH}"
fi
export GOPATH="${PWD}/.gopath"
export GOMODCACHE="${PWD}/.gomodcache"
export GOCACHE="${PWD}/.gocache"
mkdir -p "${GOPATH}" "${GOMODCACHE}" "${GOCACHE}"
(cd "${runner_src_dir}" && go build -o "${workdir}/bin/forgejo-runner" .)
chmod +x "${workdir}/bin/forgejo-runner"
fi
cat > runner.yaml <<'EOF'
log:
level: info
runner:
file: .runner
capacity: 1
name: ${FORGEJO_RUNNER_NAME}
labels:
EOF
runner_exec="${FORGEJO_RUNNER_EXEC:-host}"
if [ "$runner_exec" = "shell" ]; then
runner_exec="host"
fi
resolved_labels=""
for label in ${FORGEJO_RUNNER_LABELS//,/ } ; do
if [ -z "${label}" ]; then
continue
fi
case "${label}" in
*:*) resolved="${label}" ;;
*)
resolved="${label}:host"
;;
esac
echo " - ${resolved}" >> runner.yaml
if [ -z "${resolved_labels}" ]; then
resolved_labels="${resolved}"
else
resolved_labels="${resolved_labels},${resolved}"
fi
done
cat >> runner.yaml <<'EOF'
cache:
enabled: false
EOF
forgejo-runner register \
--no-interactive \
--instance "${FORGEJO_INSTANCE_URL}" \
--token "${FORGEJO_RUNNER_TOKEN}" \
--name "${FORGEJO_RUNNER_NAME}" \
--labels "${resolved_labels}" \
--config runner.yaml
forgejo-runner one-job --config runner.yaml
`)
return b.String()
}

View file

@ -0,0 +1,373 @@
package nsc
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
func normalizeMacOSNSCMachineType(machineType string) (normalized string, changed bool, err error) {
vcpu, memoryMB, err := parseMachineTypeCPUxMemGB(machineType)
if err != nil {
return "", false, err
}
memGB := memoryMB / 1024
if memGB <= 0 || vcpu <= 0 {
return "", false, fmt.Errorf("invalid machine_type %q after parse: vcpu=%d memGB=%d", machineType, vcpu, memGB)
}
// NSC CLI (and the underlying InstanceService) enforce discrete cpu/mem sets
// for macOS. Normalize requested values by rounding up to the closest allowed
// values to keep provisioning stable even when configs drift.
//
// Observed allowed sets from Namespace API error output for macos/arm64:
// cpu: [4 6 8 12]
// mem: [7 14 28 56] (GB)
allowedCPU := []int32{4, 6, 8, 12}
allowedMemGB := []int32{7, 14, 28, 56}
roundUp := func(v int32, allowed []int32) (int32, bool) {
for _, a := range allowed {
if v <= a {
return a, a != v
}
}
// Clamp to max if above all allowed values.
return allowed[len(allowed)-1], true
}
newCPU, cpuChanged := roundUp(vcpu, allowedCPU)
newMemGB, memChanged := roundUp(memGB, allowedMemGB)
normalized = fmt.Sprintf("%dx%d", newCPU, newMemGB)
changed = cpuChanged || memChanged
return normalized, changed, nil
}
func (d *Dispatcher) launchMacOSRunnerViaNSC(ctx context.Context, runnerName string, req LaunchRequest, ttl time.Duration, machineType string) error {
if machineType == "" {
return errors.New("machine_type is required for macos runners")
}
if strings.TrimSpace(os.Getenv("NSC_TOKEN_FILE")) == "" {
// The Burrow forge host feeds NSC_TOKEN_FILE from the intake-backed runtime token.
return errors.New("NSC_TOKEN_FILE is required for macos runners")
}
selectors := macosSelectorsArg(d.opts.MacosBaseImageID)
if selectors == "" {
return errors.New("macos selectors resolved empty")
}
normalizedMachineType := machineType
if n, changed, err := normalizeMacOSNSCMachineType(machineType); err != nil {
return err
} else if changed {
normalizedMachineType = n
}
// If capacity is constrained for the requested (large) shape, try a small
// set of progressively smaller shapes before failing the dispatch request.
// This keeps macOS builds flowing even when large runners are scarce.
candidates := []string{normalizedMachineType, "8x28", "6x14", "4x7"}
seen := map[string]struct{}{}
var uniq []string
for _, c := range candidates {
c = strings.TrimSpace(c)
if c == "" {
continue
}
if _, ok := seen[c]; ok {
continue
}
seen[c] = struct{}{}
uniq = append(uniq, c)
}
candidates = uniq
type attemptCfg struct {
waitTimeout time.Duration
createTimeout time.Duration
}
attempts := []attemptCfg{
{waitTimeout: 6 * time.Minute, createTimeout: 8 * time.Minute},
{waitTimeout: 4 * time.Minute, createTimeout: 6 * time.Minute},
{waitTimeout: 3 * time.Minute, createTimeout: 5 * time.Minute},
}
createInstance := func(mt string, a attemptCfg) (instanceID string, out string, err error) {
tmpDir, err := os.MkdirTemp("", "forgejo-nsc-macos-*")
if err != nil {
return "", "", fmt.Errorf("mktemp: %w", err)
}
defer os.RemoveAll(tmpDir)
metaPath := filepath.Join(tmpDir, "create.json")
cidPath := filepath.Join(tmpDir, "create.cid")
arch := strings.TrimSpace(d.opts.MacosMachineArch)
if arch == "" {
arch = "arm64"
}
// Namespace CLI requires the "os/arch:" prefix to create a macOS instance.
// Without it, `nsc create` defaults to Linux even if selectors include macos.*.
machineType := fmt.Sprintf("macos/%s:%s", arch, mt)
args := []string{
"create",
"--duration", ttl.String(),
"--machine_type", machineType,
"--selectors", selectors,
"--bare",
"--cidfile", cidPath,
"--log_actions",
"--purpose", fmt.Sprintf("burrow forgejo runner %s", runnerName),
// Prefer plain output for debuggability (progress, capacity errors, etc).
"--output", "plain",
"--output_json_to", metaPath,
// macOS instances can take a while to become ready.
"--wait_timeout", a.waitTimeout.String(),
}
args = prependNSCRegionArgs(args, d.opts.ComputeBaseURL)
createCtx, cancel := context.WithTimeout(ctx, a.createTimeout)
defer cancel()
cmd := exec.CommandContext(createCtx, d.opts.BinaryPath, args...)
var buf bytes.Buffer
cmd.Stdout = &buf
cmd.Stderr = &buf
if err := cmd.Run(); err != nil {
// Best-effort cleanup: if the instance ID was written before the command failed
// (or before we timed it out), attempt to destroy it to avoid idling machines.
if instanceID := strings.TrimSpace(mustReadFile(cidPath)); instanceID != "" {
d.destroyNSCInstance(context.Background(), runnerName, instanceID)
}
if errors.Is(createCtx.Err(), context.DeadlineExceeded) {
return "", buf.String(), fmt.Errorf("nsc create timed out after %s", a.createTimeout)
}
return "", buf.String(), fmt.Errorf("nsc create failed: %w", err)
}
instanceID, err = readNSCCreateInstanceID(metaPath)
if err != nil {
return "", buf.String(), fmt.Errorf("nsc create output parse failed: %w", err)
}
if instanceID == "" {
return "", buf.String(), fmt.Errorf("nsc create returned empty instance id")
}
return instanceID, buf.String(), nil
}
var (
instanceID string
lastOut string
lastErr error
)
for i, mt := range candidates {
a := attempts[i]
if i >= len(attempts) {
a = attempts[len(attempts)-1]
}
d.log.Info("launching Namespace macos runner via nsc",
"runner", runnerName,
"attempt", i+1,
"machine_type", mt,
"requested_machine_type", machineType,
"selectors", selectors,
)
id, out, err := createInstance(mt, a)
lastOut = out
lastErr = err
if err != nil {
// Timeouts are treated as retryable (capacity constrained).
if strings.Contains(err.Error(), "timed out") || strings.Contains(strings.ToLower(out), "capacity") {
continue
}
return fmt.Errorf("%w\n%s", err, out)
}
instanceID = id
break
}
if instanceID == "" {
if lastErr != nil {
return fmt.Errorf("%w\n%s", lastErr, lastOut)
}
return fmt.Errorf("nsc create failed without producing an instance id\n%s", lastOut)
}
// Always attempt cleanup even if the runner fails.
defer d.destroyNSCInstance(context.Background(), runnerName, instanceID)
script := macosBootstrapWrapperScript(runnerName, req, d.opts.Executor, d.opts.WorkDir)
// Use the Compute SSH config endpoint (direct TCP) instead of `nsc ssh`, which
// relies on a websocket-based SSH proxy that is not supported by the
// revokable tenant token we run the dispatcher with.
if err := d.runMacOSComputeSSHScript(ctx, runnerName, instanceID, script); err != nil {
return err
}
return nil
}
func mustReadFile(path string) string {
raw, err := os.ReadFile(path)
if err != nil {
return ""
}
return string(raw)
}
func macosSelectorsArg(baseImageID string) string {
id := strings.TrimSpace(baseImageID)
if id == "" {
id = "tahoe"
}
// Allow passing selectors directly via config, e.g. "macos.version=26.x,image.with=xcode-26".
if strings.Contains(id, "=") {
return id
}
switch strings.ToLower(id) {
case "sonoma", "macos-14", "macos14", "14":
return "macos.version=14.x"
case "sequoia", "macos-15", "macos15", "15":
return "macos.version=15.x"
case "tahoe", "macos-26", "macos26", "26":
return "macos.version=26.x,image.with=xcode-26"
default:
return "macos.version=26.x"
}
}
type nscCreateMetadata struct {
InstanceID string `json:"instance_id"`
ClusterID string `json:"cluster_id"`
ID string `json:"id"`
}
func readNSCCreateInstanceID(path string) (string, error) {
raw, err := os.ReadFile(path)
if err != nil {
return "", fmt.Errorf("read %s: %w", path, err)
}
var meta nscCreateMetadata
if err := json.Unmarshal(raw, &meta); err != nil {
return "", err
}
if meta.InstanceID != "" {
return meta.InstanceID, nil
}
if meta.ClusterID != "" {
return meta.ClusterID, nil
}
if meta.ID != "" {
return meta.ID, nil
}
return "", nil
}
func (d *Dispatcher) destroyNSCInstance(ctx context.Context, runnerName, instanceID string) {
if ctx == nil {
ctx = context.Background()
}
ctx, cancel := context.WithTimeout(ctx, 2*time.Minute)
defer cancel()
args := []string{"destroy", "--force", instanceID}
args = prependNSCRegionArgs(args, d.opts.ComputeBaseURL)
cmd := exec.CommandContext(ctx, d.opts.BinaryPath, args...)
var buf bytes.Buffer
cmd.Stdout = &buf
cmd.Stderr = &buf
if err := cmd.Run(); err != nil {
d.log.Warn("nsc destroy failed", "runner", runnerName, "instance", instanceID, "err", err, "output", strings.TrimSpace(buf.String()))
return
}
d.log.Info("nsc instance destroyed", "runner", runnerName, "instance", instanceID)
}
func macosBootstrapWrapperScript(runnerName string, req LaunchRequest, executor, workdir string) string {
if strings.TrimSpace(workdir) == "" {
workdir = "/tmp/forgejo-runner"
}
// Pass all values via stdin script so secrets do not appear in the nsc ssh argv.
env := map[string]string{
"FORGEJO_INSTANCE_URL": req.InstanceURL,
"FORGEJO_RUNNER_TOKEN": req.Token,
"FORGEJO_RUNNER_NAME": runnerName,
"FORGEJO_RUNNER_LABELS": strings.Join(req.Labels, ","),
"FORGEJO_RUNNER_EXEC": executor,
"FORGEJO_RUNNER_WORKDIR": workdir,
}
for k, v := range req.ExtraEnv {
env[k] = v
}
var b strings.Builder
b.WriteString("set -euo pipefail\n")
for k, v := range env {
if strings.TrimSpace(k) == "" {
continue
}
// Single-quote shell escaping: safe for arbitrary tokens.
b.WriteString("export ")
b.WriteString(k)
b.WriteString("=")
b.WriteString(shellSingleQuote(v))
b.WriteString("\n")
}
b.WriteString("\n")
b.WriteString(macosBootstrapScript())
return b.String()
}
func shellSingleQuote(value string) string {
// 'foo' -> '\'' within single quotes: '"'"'
return "'" + strings.ReplaceAll(value, "'", `'\"'\"'`) + "'"
}
func prependNSCRegionArgs(args []string, computeBaseURL string) []string {
region := strings.TrimSpace(os.Getenv("NSC_REGION"))
if region == "" {
region = regionFromComputeBaseURL(computeBaseURL)
}
if region == "" {
// Default to the burrow region used for other Namespace integrations.
region = "ord4"
}
return append([]string{"--region", region}, args...)
}
func regionFromComputeBaseURL(raw string) string {
raw = strings.TrimSpace(raw)
if raw == "" {
return ""
}
u, err := url.Parse(raw)
if err != nil {
return ""
}
host := u.Hostname()
if host == "" {
return ""
}
parts := strings.Split(host, ".")
if len(parts) == 0 {
return ""
}
// ord4.compute.namespaceapis.com -> ord4
if strings.HasSuffix(host, ".compute.namespaceapis.com") || strings.Contains(host, ".compute.") {
return parts[0]
}
return ""
}

View file

@ -0,0 +1,59 @@
package nsc
import (
"regexp"
"strings"
)
const windowsDefaultMachineType = "windows/amd64:8x16"
var cpuMemShapePattern = regexp.MustCompile(`^\d+x\d+$`)
func hasWindowsLabel(labels []string) bool {
for _, label := range labels {
l := strings.TrimSpace(label)
if l == "" {
continue
}
base := l
if before, _, ok := strings.Cut(l, ":"); ok {
base = before
}
if strings.HasPrefix(base, "namespace-profile-windows-") {
return true
}
}
return false
}
func normalizeWindowsMachineType(machineType string, labels []string) string {
mt := strings.TrimSpace(machineType)
if strings.HasPrefix(mt, "windows/") {
return mt
}
if cpuMemShapePattern.MatchString(mt) {
return "windows/amd64:" + mt
}
// Label-derived defaults: keep a simple shape ladder for explicit profile sizes.
for _, label := range labels {
base := strings.TrimSpace(label)
if before, _, ok := strings.Cut(base, ":"); ok {
base = before
}
switch {
case strings.HasPrefix(base, "namespace-profile-windows-small"):
return "windows/amd64:2x4"
case strings.HasPrefix(base, "namespace-profile-windows-medium"):
return "windows/amd64:4x8"
case strings.HasPrefix(base, "namespace-profile-windows-large"):
return windowsDefaultMachineType
}
}
return windowsDefaultMachineType
}
func powershellSingleQuote(value string) string {
// PowerShell single-quoted string escaping: ' -> ''
return "'" + strings.ReplaceAll(value, "'", "''") + "'"
}

View file

@ -0,0 +1,98 @@
package nsc
import "testing"
func TestHasWindowsLabel(t *testing.T) {
t.Parallel()
cases := []struct {
name string
labels []string
want bool
}{
{
name: "namespace windows label",
labels: []string{"namespace-profile-windows-large"},
want: true,
},
{
name: "namespace windows label with host suffix",
labels: []string{"namespace-profile-windows-large:host"},
want: true,
},
{
name: "non namespace windows-like label",
labels: []string{"burrow-winrunner:host"},
want: false,
},
{
name: "macos label",
labels: []string{"namespace-profile-macos-large"},
want: false,
},
}
for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := hasWindowsLabel(tc.labels)
if got != tc.want {
t.Fatalf("hasWindowsLabel(%v) = %v, want %v", tc.labels, got, tc.want)
}
})
}
}
func TestNormalizeWindowsMachineType(t *testing.T) {
t.Parallel()
cases := []struct {
name string
machine string
labels []string
wantPrefix string
}{
{
name: "explicit windows machine type keeps value",
machine: "windows/amd64:8x16",
labels: []string{"namespace-profile-windows-large"},
wantPrefix: "windows/amd64:8x16",
},
{
name: "shape only is normalized",
machine: "4x8",
labels: []string{"namespace-profile-windows-large"},
wantPrefix: "windows/amd64:4x8",
},
{
name: "large label default",
machine: "",
labels: []string{"namespace-profile-windows-large"},
wantPrefix: "windows/amd64:8x16",
},
{
name: "medium label default",
machine: "",
labels: []string{"namespace-profile-windows-medium"},
wantPrefix: "windows/amd64:4x8",
},
{
name: "fallback default",
machine: "",
labels: []string{"namespace-profile-windows-custom"},
wantPrefix: "windows/amd64:8x16",
},
}
for _, tc := range cases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := normalizeWindowsMachineType(tc.machine, tc.labels)
if got != tc.wantPrefix {
t.Fatalf("normalizeWindowsMachineType(%q, %v) = %q, want %q", tc.machine, tc.labels, got, tc.wantPrefix)
}
})
}
}

View file

@ -0,0 +1,499 @@
package nsc
import (
"bufio"
"bytes"
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
type windowsProxyOutput struct {
Endpoint string `json:"endpoint"`
RDP struct {
Credentials struct {
Username string `json:"username"`
Password string `json:"password"`
} `json:"credentials"`
} `json:"rdp"`
}
func (d *Dispatcher) launchWindowsRunnerViaWinRM(ctx context.Context, runnerName string, req LaunchRequest, ttl time.Duration, machineType string) error {
script := windowsBootstrapScript(runnerName, req, d.opts.Executor, d.opts.WorkDir)
return d.launchWindowsScriptViaWinRM(ctx, runnerName, ttl, machineType, req.Labels, script)
}
func (d *Dispatcher) launchWindowsScriptViaWinRM(ctx context.Context, runnerName string, ttl time.Duration, machineType string, labels []string, script string) error {
if ttl <= 0 {
ttl = d.opts.DefaultDuration
}
mt := normalizeWindowsMachineType(machineType, labels)
instanceID, createOutput, err := d.createWindowsInstance(ctx, runnerName, ttl, mt)
if err != nil {
return fmt.Errorf("windows create failed: %w\n%s", err, createOutput)
}
defer d.destroyNSCInstance(context.Background(), runnerName, instanceID)
username, password, err := d.resolveWindowsCredentials(ctx, instanceID)
if err != nil {
return err
}
if err := d.probeWindowsWinRMService(ctx, instanceID); err != nil {
return err
}
endpoint, stopForward, err := d.startWindowsWinRMPortForward(ctx, instanceID)
if err != nil {
return err
}
defer stopForward()
if err := d.runWindowsWinRMPowerShell(ctx, endpoint, username, password, script); err != nil {
return err
}
return nil
}
func (d *Dispatcher) createWindowsInstance(ctx context.Context, runnerName string, ttl time.Duration, machineType string) (instanceID string, output string, err error) {
tmpDir, err := os.MkdirTemp("", "forgejo-nsc-windows-*")
if err != nil {
return "", "", fmt.Errorf("mktemp: %w", err)
}
defer os.RemoveAll(tmpDir)
metaPath := filepath.Join(tmpDir, "create.json")
cidPath := filepath.Join(tmpDir, "create.cid")
args := []string{
"create",
"--duration", ttl.String(),
"--machine_type", machineType,
"--cidfile", cidPath,
"--purpose", fmt.Sprintf("burrow forgejo runner %s", runnerName),
"--output", "plain",
"--output_json_to", metaPath,
"--wait_timeout", "6m",
}
args = prependNSCRegionArgs(args, d.opts.ComputeBaseURL)
createCtx, cancel := context.WithTimeout(ctx, 8*time.Minute)
defer cancel()
cmd := exec.CommandContext(createCtx, d.opts.BinaryPath, args...)
var buf bytes.Buffer
cmd.Stdout = &buf
cmd.Stderr = &buf
if err := cmd.Run(); err != nil {
if created := strings.TrimSpace(mustReadFile(cidPath)); created != "" {
d.destroyNSCInstance(context.Background(), runnerName, created)
}
if errors.Is(createCtx.Err(), context.DeadlineExceeded) {
return "", buf.String(), fmt.Errorf("nsc create timed out after %s", 8*time.Minute)
}
return "", buf.String(), fmt.Errorf("nsc create failed: %w", err)
}
instanceID, err = readNSCCreateInstanceID(metaPath)
if err != nil {
return "", buf.String(), fmt.Errorf("nsc create output parse failed: %w", err)
}
if instanceID == "" {
return "", buf.String(), errors.New("nsc create returned empty instance id")
}
return instanceID, buf.String(), nil
}
func (d *Dispatcher) resolveWindowsCredentials(ctx context.Context, instanceID string) (username string, password string, err error) {
tmpDir, err := os.MkdirTemp("", "forgejo-nsc-winproxy-*")
if err != nil {
return "", "", fmt.Errorf("mktemp: %w", err)
}
defer os.RemoveAll(tmpDir)
outPath := filepath.Join(tmpDir, "proxy.json")
outFile, err := os.Create(outPath)
if err != nil {
return "", "", fmt.Errorf("create proxy output file: %w", err)
}
defer outFile.Close()
var stderr bytes.Buffer
args := []string{"instance", "proxy", instanceID, "-s", "rdp", "-o", "json"}
args = prependNSCRegionArgs(args, d.opts.ComputeBaseURL)
proxyCtx, cancel := context.WithTimeout(ctx, 90*time.Second)
defer cancel()
cmd := exec.CommandContext(proxyCtx, d.opts.BinaryPath, args...)
cmd.Stdout = outFile
cmd.Stderr = &stderr
if err := cmd.Start(); err != nil {
return "", "", fmt.Errorf("start nsc instance proxy: %w", err)
}
waitDone := make(chan struct{})
var waitErr error
go func() {
waitErr = cmd.Wait()
close(waitDone)
}()
var payload windowsProxyOutput
deadline := time.Now().Add(45 * time.Second)
for time.Now().Before(deadline) {
raw, _ := os.ReadFile(outPath)
jsonBlob := extractJSON(string(raw))
if jsonBlob != "" {
if err := json.Unmarshal([]byte(jsonBlob), &payload); err == nil {
username = strings.TrimSpace(payload.RDP.Credentials.Username)
password = strings.TrimSpace(payload.RDP.Credentials.Password)
if username != "" && password != "" {
break
}
}
}
select {
case <-waitDone:
if waitErr != nil {
return "", "", fmt.Errorf("nsc instance proxy exited before credentials were available: %w\n%s", waitErr, stderr.String())
}
default:
}
time.Sleep(1 * time.Second)
}
if cmd.Process != nil {
_ = cmd.Process.Kill()
}
<-waitDone
if username == "" || password == "" {
raw, _ := os.ReadFile(outPath)
return "", "", fmt.Errorf("failed to resolve windows credentials from nsc instance proxy output\nstdout=%s\nstderr=%s", strings.TrimSpace(string(raw)), strings.TrimSpace(stderr.String()))
}
return username, password, nil
}
func (d *Dispatcher) probeWindowsWinRMService(ctx context.Context, instanceID string) error {
args := []string{"instance", "proxy", instanceID, "-s", "winrm", "-o", "json", "--once"}
args = prependNSCRegionArgs(args, d.opts.ComputeBaseURL)
probeCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
defer cancel()
cmd := exec.CommandContext(probeCtx, d.opts.BinaryPath, args...)
var out bytes.Buffer
cmd.Stdout = &out
cmd.Stderr = &out
err := cmd.Run()
raw := strings.TrimSpace(out.String())
if endpoint, ok := parseProxyEndpoint(raw); ok && endpoint != "" {
return nil
}
if indicatesMissingProxyService(raw, "winrm") {
return fmt.Errorf("namespace windows non-interactive channel unavailable: instance does not expose winrm service (rdp-only)\n%s", raw)
}
if errors.Is(probeCtx.Err(), context.DeadlineExceeded) {
return fmt.Errorf("timed out probing Namespace winrm service before bootstrap\n%s", raw)
}
if err != nil {
return fmt.Errorf("nsc winrm service probe failed: %w\n%s", err, raw)
}
return fmt.Errorf("nsc winrm service probe did not yield endpoint output\n%s", raw)
}
func parseProxyEndpoint(raw string) (string, bool) {
jsonBlob := extractJSON(raw)
if jsonBlob == "" {
return "", false
}
var payload struct {
Endpoint string `json:"endpoint"`
}
if err := json.Unmarshal([]byte(jsonBlob), &payload); err != nil {
return "", false
}
endpoint := strings.TrimSpace(payload.Endpoint)
if endpoint == "" {
return "", false
}
return endpoint, true
}
func indicatesMissingProxyService(raw string, service string) bool {
service = strings.TrimSpace(service)
if service == "" {
return false
}
token := fmt.Sprintf("does not have service %q", service)
return strings.Contains(raw, token)
}
func (d *Dispatcher) startWindowsWinRMPortForward(ctx context.Context, instanceID string) (endpoint string, stop func(), err error) {
args := []string{"instance", "port-forward", instanceID, "--target_port", "5985"}
args = prependNSCRegionArgs(args, d.opts.ComputeBaseURL)
forwardCtx, cancel := context.WithCancel(ctx)
cmd := exec.CommandContext(forwardCtx, d.opts.BinaryPath, args...)
stdout, err := cmd.StdoutPipe()
if err != nil {
cancel()
return "", nil, fmt.Errorf("port-forward stdout pipe: %w", err)
}
var stderr bytes.Buffer
cmd.Stderr = &stderr
if err := cmd.Start(); err != nil {
cancel()
return "", nil, fmt.Errorf("start nsc port-forward: %w", err)
}
waitDone := make(chan struct{})
var waitErr error
go func() {
waitErr = cmd.Wait()
close(waitDone)
}()
endpointCh := make(chan string, 1)
scanErrCh := make(chan error, 1)
go func() {
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(line, "Listening on ") {
endpointCh <- strings.TrimSpace(strings.TrimPrefix(line, "Listening on "))
return
}
}
if err := scanner.Err(); err != nil {
scanErrCh <- err
}
}()
select {
case endpoint = <-endpointCh:
stop = func() {
cancel()
if cmd.Process != nil {
_ = cmd.Process.Kill()
}
<-waitDone
}
return endpoint, stop, nil
case err := <-scanErrCh:
cancel()
if cmd.Process != nil {
_ = cmd.Process.Kill()
}
<-waitDone
return "", nil, fmt.Errorf("failed reading port-forward output: %w", err)
case <-waitDone:
cancel()
if waitErr != nil {
return "", nil, fmt.Errorf("nsc port-forward exited early: %w\n%s", waitErr, stderr.String())
}
return "", nil, fmt.Errorf("nsc port-forward exited without endpoint\n%s", stderr.String())
case <-time.After(45 * time.Second):
cancel()
if cmd.Process != nil {
_ = cmd.Process.Kill()
}
<-waitDone
return "", nil, fmt.Errorf("timed out waiting for WinRM port-forward endpoint\n%s", stderr.String())
case <-ctx.Done():
cancel()
if cmd.Process != nil {
_ = cmd.Process.Kill()
}
<-waitDone
return "", nil, ctx.Err()
}
}
func (d *Dispatcher) runWindowsWinRMPowerShell(ctx context.Context, endpoint, username, password, script string) error {
pythonPath, err := exec.LookPath("python3")
if err != nil {
return fmt.Errorf("python3 is required for windows WinRM bootstrap: %w", err)
}
workdir := strings.TrimSpace(d.opts.WorkDir)
if workdir == "" {
workdir = "/tmp/forgejo-runner"
}
if err := os.MkdirAll(workdir, 0o755); err != nil {
return fmt.Errorf("create workdir %s: %w", workdir, err)
}
venvPath := filepath.Join(workdir, ".winrm-venv")
venvPython := filepath.Join(venvPath, "bin", "python")
if _, err := os.Stat(venvPython); err != nil {
cmd := exec.CommandContext(ctx, pythonPath, "-m", "venv", venvPath)
var out bytes.Buffer
cmd.Stdout = &out
cmd.Stderr = &out
if err := cmd.Run(); err != nil {
return fmt.Errorf("create python venv for winrm failed: %w\n%s", err, out.String())
}
}
ensurePyWinRM := `
import importlib.util, subprocess, sys
if importlib.util.find_spec("winrm") is None:
subprocess.check_call([sys.executable, "-m", "pip", "install", "--quiet", "pywinrm"])
`
ensureCmd := exec.CommandContext(ctx, venvPython, "-c", ensurePyWinRM)
var ensureOut bytes.Buffer
ensureCmd.Stdout = &ensureOut
ensureCmd.Stderr = &ensureOut
if err := ensureCmd.Run(); err != nil {
return fmt.Errorf("install pywinrm failed: %w\n%s", err, ensureOut.String())
}
runScript := `
import base64, os, sys, time, traceback, winrm
endpoint = os.environ["WINRM_ENDPOINT"]
user = os.environ["WINRM_USER"]
password = os.environ["WINRM_PASS"]
script = base64.b64decode(os.environ["WINRM_SCRIPT_B64"]).decode("utf-8")
deadline = time.time() + 300.0
last_err = None
while time.time() < deadline:
try:
session = winrm.Session(f"http://{endpoint}/wsman", auth=(user, password), transport="ntlm")
result = session.run_ps(script)
sys.stdout.write(result.std_out.decode("utf-8", errors="replace"))
sys.stderr.write(result.std_err.decode("utf-8", errors="replace"))
print(f"winrm_exit={result.status_code}")
sys.exit(result.status_code)
except Exception as err:
last_err = err
time.sleep(5.0)
sys.stderr.write("timed out waiting for WinRM connectivity after 300s\\n")
if last_err is not None:
traceback.print_exception(last_err, file=sys.stderr)
sys.exit(111)
`
runCmd := exec.CommandContext(ctx, venvPython, "-c", runScript)
runCmd.Env = append(os.Environ(),
"WINRM_ENDPOINT="+endpoint,
"WINRM_USER="+username,
"WINRM_PASS="+password,
"WINRM_SCRIPT_B64="+base64.StdEncoding.EncodeToString([]byte(script)),
)
var runOut bytes.Buffer
runCmd.Stdout = &runOut
runCmd.Stderr = &runOut
if err := runCmd.Run(); err != nil {
return fmt.Errorf("windows winrm bootstrap command failed: %w\n%s", err, runOut.String())
}
return nil
}
func windowsBootstrapScript(runnerName string, req LaunchRequest, executor, workdir string) string {
if strings.TrimSpace(workdir) == "" {
workdir = `C:\burrow\forgejo-runner`
}
runnerExec := strings.TrimSpace(executor)
if runnerExec == "" || runnerExec == "shell" {
runnerExec = "host"
}
safeName := strings.NewReplacer(`\`, "-", ":", "-", "/", "-", " ", "-").Replace(runnerName)
workRoot := strings.TrimRight(workdir, `\`) + `\` + safeName
var b strings.Builder
b.WriteString("$ErrorActionPreference = 'Stop'\n")
b.WriteString("$ProgressPreference = 'SilentlyContinue'\n")
b.WriteString("[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12\n")
b.WriteString("$runnerName = " + powershellSingleQuote(runnerName) + "\n")
b.WriteString("$runnerToken = " + powershellSingleQuote(req.Token) + "\n")
b.WriteString("$instanceURL = " + powershellSingleQuote(req.InstanceURL) + "\n")
b.WriteString("$labelsCsv = " + powershellSingleQuote(strings.Join(req.Labels, ",")) + "\n")
b.WriteString("$runnerExec = " + powershellSingleQuote(runnerExec) + "\n")
b.WriteString("$workRoot = " + powershellSingleQuote(workRoot) + "\n")
b.WriteString(`
New-Item -Path $workRoot -ItemType Directory -Force | Out-Null
Set-Location $workRoot
$runnerVersion = "12.6.4"
$zipUrl = "https://code.forgejo.org/forgejo/runner/releases/download/v${runnerVersion}/forgejo-runner-${runnerVersion}-windows-amd64.zip"
$zipPath = Join-Path $workRoot "forgejo-runner.zip"
$extractDir = Join-Path $workRoot "forgejo-runner"
if (Test-Path $extractDir) {
Remove-Item -Path $extractDir -Recurse -Force
}
Invoke-WebRequest -Uri $zipUrl -OutFile $zipPath
Expand-Archive -Path $zipPath -DestinationPath $extractDir -Force
$runnerExe = Join-Path $extractDir "forgejo-runner.exe"
if (-not (Test-Path $runnerExe)) {
throw "Missing forgejo-runner.exe after extract: $runnerExe"
}
$labels = @()
foreach ($label in ($labelsCsv -split ",")) {
$trimmed = $label.Trim()
if ([string]::IsNullOrWhiteSpace($trimmed)) { continue }
if ($trimmed.Contains(":")) {
$labels += $trimmed
} else {
$labels += ("{0}:{1}" -f $trimmed, $runnerExec)
}
}
if ($labels.Count -eq 0) {
throw "No runner labels resolved for windows bootstrap"
}
$labelLines = ($labels | ForEach-Object { " - $_" }) -join [Environment]::NewLine
$configPath = Join-Path $workRoot "runner.yaml"
$runnerYaml = @"
log:
level: info
runner:
file: .runner
capacity: 1
name: $runnerName
labels:
$labelLines
cache:
enabled: false
"@
Set-Content -Path $configPath -Value $runnerYaml -Encoding UTF8
$labelsArg = ($labels -join ",")
& $runnerExe register --no-interactive --instance $instanceURL --token $runnerToken --name $runnerName --labels $labelsArg --config $configPath
if ($LASTEXITCODE -ne 0) {
throw ("forgejo-runner register failed: {0}" -f $LASTEXITCODE)
}
& $runnerExe one-job --config $configPath
if ($LASTEXITCODE -ne 0) {
throw ("forgejo-runner one-job failed: {0}" -f $LASTEXITCODE)
}
`)
return b.String()
}

View file

@ -0,0 +1,59 @@
package nsc
import (
"context"
"io"
"log/slog"
"os"
"os/exec"
"strings"
"testing"
"time"
)
func TestWindowsWinRMScriptRoundTrip(t *testing.T) {
if os.Getenv("NSC_WINDOWS_E2E") != "1" {
t.Skip("set NSC_WINDOWS_E2E=1 to run Namespace Windows integration test")
}
nscBinary, err := exec.LookPath("nsc")
if err != nil {
t.Skipf("nsc not found in PATH: %v", err)
}
authCheck := exec.Command(nscBinary, "auth", "check-login")
if out, err := authCheck.CombinedOutput(); err != nil {
t.Skipf("nsc auth check-login failed: %v (%s)", err, strings.TrimSpace(string(out)))
}
machineType := strings.TrimSpace(os.Getenv("NSC_WINDOWS_E2E_MACHINE_TYPE"))
if machineType == "" {
machineType = "windows/amd64:4x8"
}
dispatcher, err := NewDispatcher(Options{
BinaryPath: nscBinary,
DefaultImage: "code.forgejo.org/forgejo/runner:11",
DefaultMachine: machineType,
DefaultDuration: 20 * time.Minute,
MaxParallel: 1,
WorkDir: t.TempDir(),
ComputeBaseURL: strings.TrimSpace(os.Getenv("NSC_COMPUTE_BASE_URL")),
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
})
if err != nil {
t.Fatalf("NewDispatcher() error: %v", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Minute)
defer cancel()
script := "Write-Output ('winrm-ok:' + $env:COMPUTERNAME)"
labels := []string{"namespace-profile-windows-medium"}
if err := dispatcher.launchWindowsScriptViaWinRM(ctx, "nsc-winrm-itest", 20*time.Minute, machineType, labels, script); err != nil {
if strings.Contains(err.Error(), "does not expose winrm service (rdp-only)") {
t.Skipf("namespace windows control channel is rdp-only: %v", err)
}
t.Fatalf("launchWindowsScriptViaWinRM() error: %v", err)
}
}

View file

@ -0,0 +1,65 @@
package nsc
import "testing"
func TestParseProxyEndpoint(t *testing.T) {
t.Parallel()
tests := []struct {
name string
raw string
want string
wantOK bool
}{
{
name: "plain json payload",
raw: `{"endpoint":"127.0.0.1:61234"}`,
want: "127.0.0.1:61234",
wantOK: true,
},
{
name: "json wrapped with extra output",
raw: `Connected.
{"endpoint":"127.0.0.1:61235","rdp":{"credentials":{"username":"runneradmin","password":"runneradmin"}}}`,
want: "127.0.0.1:61235",
wantOK: true,
},
{
name: "missing endpoint field",
raw: `{"rdp":{"credentials":{"username":"runneradmin"}}}`,
wantOK: false,
},
{
name: "non-json output",
raw: `Failed: instance does not have service "winrm"`,
wantOK: false,
},
}
for _, tc := range tests {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got, ok := parseProxyEndpoint(tc.raw)
if ok != tc.wantOK {
t.Fatalf("parseProxyEndpoint(%q) ok=%v, want %v", tc.raw, ok, tc.wantOK)
}
if got != tc.want {
t.Fatalf("parseProxyEndpoint(%q) endpoint=%q, want %q", tc.raw, got, tc.want)
}
})
}
}
func TestIndicatesMissingProxyService(t *testing.T) {
t.Parallel()
raw := `Failed: instance does not have service "winrm"`
if !indicatesMissingProxyService(raw, "winrm") {
t.Fatalf("indicatesMissingProxyService should return true for missing winrm message")
}
if indicatesMissingProxyService(raw, "ssh") {
t.Fatalf("indicatesMissingProxyService should be false when service name does not match")
}
}