#!/bin/bash # # Ralph Loop for OpenAI Codex CLI # # Based on Geoffrey Huntley's Ralph Wiggum methodology. # Combined with SpecKit-style specifications. # # Usage: # ./scripts/ralph-loop-codex.sh # Build mode (unlimited) # ./scripts/ralph-loop-codex.sh 20 # Build mode (max 20 iterations) # ./scripts/ralph-loop-codex.sh plan # Planning mode (optional) # set -e set -o pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_DIR="$(dirname "$SCRIPT_DIR")" LOG_DIR="$PROJECT_DIR/logs" CONSTITUTION="$PROJECT_DIR/.specify/memory/constitution.md" RLM_DIR="$PROJECT_DIR/rlm" RLM_TRACE_DIR="$RLM_DIR/trace" RLM_QUERIES_DIR="$RLM_DIR/queries" RLM_ANSWERS_DIR="$RLM_DIR/answers" RLM_INDEX="$RLM_DIR/index.tsv" # Configuration MAX_ITERATIONS=0 # 0 = unlimited MODE="build" RLM_CONTEXT_FILE="" CODEX_CMD="${CODEX_CMD:-codex}" TAIL_LINES=5 TAIL_RENDERED_LINES=0 ROLLING_OUTPUT_LINES=5 ROLLING_OUTPUT_INTERVAL=10 ROLLING_RENDERED_LINES=0 # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' PURPLE='\033[0;35m' CYAN='\033[0;36m' NC='\033[0m' mkdir -p "$LOG_DIR" # Check constitution for YOLO setting YOLO_ENABLED=true if [[ -f "$CONSTITUTION" ]]; then if grep -q "YOLO Mode.*DISABLED" "$CONSTITUTION" 2>/dev/null; then YOLO_ENABLED=false fi fi show_help() { cat < Treat a large context file as external environment. The agent should read slices instead of loading it all. --rlm [file] Shortcut for --rlm-context (defaults to rlm/context.txt) RLM workspace (when enabled): - rlm/trace/ Prompt snapshots + outputs per iteration - rlm/index.tsv Index of all iterations (timestamp, prompt, log, status) - rlm/queries/ and rlm/answers/ For optional recursive sub-queries EOF } print_latest_output() { local log_file="$1" local label="${2:-Codex}" local target="/dev/tty" [ -f "$log_file" ] || return 0 if [ ! -w "$target" ]; then target="/dev/stdout" fi if [ "$target" = "/dev/tty" ] && [ "$TAIL_RENDERED_LINES" -gt 0 ]; then printf "\033[%dA\033[J" "$TAIL_RENDERED_LINES" > "$target" fi { echo "Latest ${label} output (last ${TAIL_LINES} lines):" tail -n "$TAIL_LINES" "$log_file" } > "$target" if [ "$target" = "/dev/tty" ]; then TAIL_RENDERED_LINES=$((TAIL_LINES + 1)) fi } watch_latest_output() { local log_file="$1" local label="${2:-Codex}" local target="/dev/tty" local use_tty=false local use_tput=false [ -f "$log_file" ] || return 0 if [ ! -w "$target" ]; then target="/dev/stdout" else use_tty=true if command -v tput &>/dev/null; then use_tput=true fi fi if [ "$use_tty" = true ]; then if [ "$use_tput" = true ]; then tput cr > "$target" tput sc > "$target" else printf "\r\0337" > "$target" fi fi while true; do local timestamp timestamp=$(date '+%Y-%m-%d %H:%M:%S') if [ "$use_tty" = true ]; then if [ "$use_tput" = true ]; then tput rc > "$target" tput ed > "$target" tput cr > "$target" else printf "\0338\033[J\r" > "$target" fi fi { echo -e "${CYAN}[$timestamp] Latest ${label} output (last ${ROLLING_OUTPUT_LINES} lines):${NC}" if [ ! -s "$log_file" ]; then echo "(no output yet)" else tail -n "$ROLLING_OUTPUT_LINES" "$log_file" 2>/dev/null || true fi echo "" } > "$target" sleep "$ROLLING_OUTPUT_INTERVAL" done } # Parse arguments while [[ $# -gt 0 ]]; do case "$1" in plan) MODE="plan" if [[ "${2:-}" =~ ^[0-9]+$ ]]; then MAX_ITERATIONS="$2" shift 2 else MAX_ITERATIONS=1 shift fi ;; --rlm-context) RLM_CONTEXT_FILE="${2:-}" shift 2 ;; --rlm) if [[ -n "${2:-}" && "${2:0:1}" != "-" ]]; then RLM_CONTEXT_FILE="$2" shift 2 else RLM_CONTEXT_FILE="rlm/context.txt" shift fi ;; -h|--help) show_help exit 0 ;; [0-9]*) MODE="build" MAX_ITERATIONS="$1" shift ;; *) echo -e "${RED}Unknown argument: $1${NC}" show_help exit 1 ;; esac done cd "$PROJECT_DIR" # Validate RLM context file (if provided) if [ -n "$RLM_CONTEXT_FILE" ] && [ ! -f "$RLM_CONTEXT_FILE" ]; then echo -e "${RED}Error: RLM context file not found: $RLM_CONTEXT_FILE${NC}" echo "Create it first (example):" echo " mkdir -p rlm && printf \"%s\" \"\" > $RLM_CONTEXT_FILE" exit 1 fi # Initialize RLM workspace (optional) if [ -n "$RLM_CONTEXT_FILE" ]; then mkdir -p "$RLM_TRACE_DIR" "$RLM_QUERIES_DIR" "$RLM_ANSWERS_DIR" if [ ! -f "$RLM_INDEX" ]; then echo -e "timestamp\tmode\titeration\tprompt\tlog\toutput\tstatus" > "$RLM_INDEX" fi fi # Session log (captures ALL output) SESSION_LOG="$LOG_DIR/ralph_codex_${MODE}_session_$(date '+%Y%m%d_%H%M%S').log" exec > >(tee -a "$SESSION_LOG") 2>&1 # Check if Codex CLI is available if ! command -v "$CODEX_CMD" &> /dev/null; then echo -e "${RED}Error: Codex CLI not found${NC}" echo "" echo "Install Codex CLI:" echo " npm install -g @openai/codex" echo "" echo "Then authenticate:" echo " codex login" exit 1 fi # Determine prompt file if [ "$MODE" = "plan" ]; then PROMPT_FILE="PROMPT_plan.md" else PROMPT_FILE="PROMPT_build.md" fi # Create prompt files if they don't exist (same as ralph-loop.sh) if [ ! -f "PROMPT_build.md" ]; then echo -e "${YELLOW}Creating PROMPT_build.md...${NC}" cat > "PROMPT_build.md" << 'BUILDEOF' # Ralph Build Mode Based on Geoffrey Huntley's Ralph Wiggum methodology. --- ## Phase 0: Orient Read `.specify/memory/constitution.md` to understand project principles and constraints. --- ## Phase 1: Discover Work Items Search for incomplete work from these sources (in order): 1. **specs/ folder** — Look for `.md` files NOT marked `## Status: COMPLETE` 2. **IMPLEMENTATION_PLAN.md** — If exists, find unchecked `- [ ]` tasks 3. **GitHub Issues** — Check for open issues (if this is a GitHub repo) 4. **Any task tracker** — Jira, Linear, etc. if configured Pick the **HIGHEST PRIORITY** incomplete item: - Lower numbers = higher priority (001 before 010) - `[HIGH]` before `[MEDIUM]` before `[LOW]` - Bugs/blockers before features Before implementing, search the codebase to verify it's not already done. --- ## Phase 1b: Re-Verification Mode (No Incomplete Work Found) **If ALL specs appear complete**, don't just exit — do a quality check: 1. **Randomly pick** one completed spec from `specs/` 2. **Strictly re-verify** ALL its acceptance criteria: - Run the actual tests mentioned in the spec - Manually verify each criterion is truly met - Check edge cases - Look for regressions 3. **If any criterion fails**: Unmark the spec as complete and fix it 4. **If all pass**: Output `DONE` to confirm quality This ensures the codebase stays healthy even when "nothing to do." --- ## Phase 2: Implement Implement the selected spec/task completely: - Follow the spec's requirements exactly - Write clean, maintainable code - Add tests as needed --- ## Phase 3: Validate Run the project's test suite and verify: - All tests pass - No lint errors - The spec's acceptance criteria are 100% met --- ## Phase 4: Commit & Update 1. Mark the spec/task as complete (add `## Status: COMPLETE` to spec file) 2. `git add -A` 3. `git commit` with a descriptive message 4. `git push` --- ## Completion Signal **CRITICAL:** Only output the magic phrase when the work is 100% complete. Check: - [ ] Implementation matches all requirements - [ ] All tests pass - [ ] All acceptance criteria verified - [ ] Changes committed and pushed - [ ] Spec marked as complete **If ALL checks pass, output:** `DONE` **If ANY check fails:** Fix the issue and try again. Do NOT output the magic phrase. BUILDEOF fi if [ ! -f "PROMPT_plan.md" ]; then echo -e "${YELLOW}Creating PROMPT_plan.md...${NC}" cat > "PROMPT_plan.md" << 'PLANEOF' # Ralph Planning Mode (OPTIONAL) This mode is OPTIONAL. Most projects work fine directly from specs. Only use this when you want a detailed breakdown of specs into smaller tasks. --- ## Phase 0: Orient 0a. Read `.specify/memory/constitution.md` for project principles. 0b. Study `specs/` to learn all feature specifications. --- ## Phase 1: Gap Analysis Compare specs against current codebase: - What's fully implemented? - What's partially done? - What's not started? - What has issues or bugs? --- ## Phase 2: Create Plan Create `IMPLEMENTATION_PLAN.md` with a prioritized task list: ```markdown # Implementation Plan > Auto-generated breakdown of specs into tasks. > Delete this file to return to working directly from specs. ## Priority Tasks - [ ] [HIGH] Task description - from spec NNN - [ ] [HIGH] Task description - from spec NNN - [ ] [MEDIUM] Task description - [ ] [LOW] Task description ## Completed - [x] Completed task ``` Prioritize by: 1. Dependencies (do prerequisites first) 2. Impact (high-value features first) 3. Complexity (mix easy wins with harder tasks) --- ## Completion Signal When the plan is complete and saved: `DONE` PLANEOF fi # Build Codex flags for exec mode CODEX_FLAGS="exec" if [ "$YOLO_ENABLED" = true ]; then CODEX_FLAGS="$CODEX_FLAGS --dangerously-bypass-approvals-and-sandbox" fi # Get current branch CURRENT_BRANCH=$(git branch --show-current 2>/dev/null || echo "main") # Check for work sources - count .md files in specs/ HAS_SPECS=false SPEC_COUNT=0 if [ -d "specs" ]; then SPEC_COUNT=$(find specs -maxdepth 1 -name "*.md" -type f 2>/dev/null | wc -l) [ "$SPEC_COUNT" -gt 0 ] && HAS_SPECS=true fi echo "" echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${GREEN} RALPH LOOP (Codex) STARTING ${NC}" echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo "" echo -e "${BLUE}Mode:${NC} $MODE" echo -e "${BLUE}Prompt:${NC} $PROMPT_FILE" echo -e "${BLUE}Branch:${NC} $CURRENT_BRANCH" echo -e "${YELLOW}YOLO:${NC} $([ "$YOLO_ENABLED" = true ] && echo "ENABLED" || echo "DISABLED")" [ -n "$RLM_CONTEXT_FILE" ] && echo -e "${BLUE}RLM:${NC} $RLM_CONTEXT_FILE" [ -n "$SESSION_LOG" ] && echo -e "${BLUE}Log:${NC} $SESSION_LOG" [ $MAX_ITERATIONS -gt 0 ] && echo -e "${BLUE}Max:${NC} $MAX_ITERATIONS iterations" echo "" echo -e "${BLUE}Work source:${NC}" if [ "$HAS_SPECS" = true ]; then echo -e " ${GREEN}✓${NC} specs/ folder ($SPEC_COUNT specs)" else echo -e " ${RED}✗${NC} specs/ folder (no .md files found)" fi echo "" echo -e "${CYAN}Using: $CODEX_CMD $CODEX_FLAGS${NC}" echo -e "${CYAN}Agent must output DONE when complete.${NC}" echo "" echo -e "${YELLOW}Press Ctrl+C to stop the loop${NC}" echo "" ITERATION=0 CONSECUTIVE_FAILURES=0 MAX_CONSECUTIVE_FAILURES=3 while true; do # Check max iterations if [ $MAX_ITERATIONS -gt 0 ] && [ $ITERATION -ge $MAX_ITERATIONS ]; then echo -e "${GREEN}Reached max iterations: $MAX_ITERATIONS${NC}" break fi ITERATION=$((ITERATION + 1)) TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') echo "" echo -e "${PURPLE}════════════════════ LOOP $ITERATION ════════════════════${NC}" echo -e "${BLUE}[$TIMESTAMP]${NC} Starting iteration $ITERATION" echo "" # Log file for this iteration LOG_FILE="$LOG_DIR/ralph_codex_${MODE}_iter_${ITERATION}_$(date '+%Y%m%d_%H%M%S').log" OUTPUT_FILE="$LOG_DIR/ralph_codex_output_iter_${ITERATION}_$(date '+%Y%m%d_%H%M%S').txt" RLM_STATUS="unknown" : > "$LOG_FILE" WATCH_PID="" if [ "$ROLLING_OUTPUT_INTERVAL" -gt 0 ] && [ "$ROLLING_OUTPUT_LINES" -gt 0 ] && [ -t 1 ] && [ -w /dev/tty ]; then watch_latest_output "$LOG_FILE" "Codex" & WATCH_PID=$! fi # Optional RLM context block appended to prompt at runtime EFFECTIVE_PROMPT_FILE="$PROMPT_FILE" if [ -n "$RLM_CONTEXT_FILE" ]; then EFFECTIVE_PROMPT_FILE="$LOG_DIR/ralph_codex_prompt_iter_${ITERATION}_$(date '+%Y%m%d_%H%M%S').md" cat "$PROMPT_FILE" > "$EFFECTIVE_PROMPT_FILE" cat >> "$EFFECTIVE_PROMPT_FILE" << EOF --- ## RLM Context (Optional) You have access to a large context file at: **$RLM_CONTEXT_FILE** Treat this file as an external environment. Do NOT paste the whole file into the prompt. Instead, inspect it programmatically and recursively: - Use small slices: \`\`\`bash sed -n 'START,ENDp' "$RLM_CONTEXT_FILE" \`\`\` - Or Python snippets: \`\`\`bash python - <<'PY' from pathlib import Path p = Path("$RLM_CONTEXT_FILE") print(p.read_text().splitlines()[START:END]) PY \`\`\` - Use search: \`\`\`bash rg -n "pattern" "$RLM_CONTEXT_FILE" \`\`\` Goal: decompose the task into smaller sub-queries and only load the pieces you need. This mirrors the Recursive Language Model approach from https://arxiv.org/html/2512.24601v1 ## RLM Workspace (Optional) Past loop outputs are preserved on disk: - Iteration logs: \`logs/\` - Prompt/output snapshots: \`rlm/trace/\` - Iteration index: \`rlm/index.tsv\` Use these as an external memory store (search/slice as needed). If you need a recursive sub-query, write a focused prompt in \`rlm/queries/\`, run: \`./scripts/rlm-subcall.sh --query rlm/queries/.md\` and store the result in \`rlm/answers/\`. EOF RLM_PROMPT_SNAPSHOT="$RLM_TRACE_DIR/iter_${ITERATION}_prompt.md" cp "$EFFECTIVE_PROMPT_FILE" "$RLM_PROMPT_SNAPSHOT" fi # Run Codex with exec mode, reading prompt from stdin with "-" # Use --output-last-message to capture the final response for checking echo -e "${BLUE}Running: cat $EFFECTIVE_PROMPT_FILE | $CODEX_CMD $CODEX_FLAGS - --output-last-message $OUTPUT_FILE${NC}" echo "" CODEX_EXIT=0 if cat "$EFFECTIVE_PROMPT_FILE" | "$CODEX_CMD" $CODEX_FLAGS - --output-last-message "$OUTPUT_FILE" 2>&1 | tee "$LOG_FILE"; then if [ -n "$WATCH_PID" ]; then kill "$WATCH_PID" 2>/dev/null || true wait "$WATCH_PID" 2>/dev/null || true fi echo "" echo -e "${GREEN}✓ Codex execution completed${NC}" # Check if DONE promise was output (accept both DONE and ALL_DONE variants) if [ -f "$OUTPUT_FILE" ] && grep -qE "(ALL_)?DONE" "$OUTPUT_FILE"; then DETECTED_SIGNAL=$(grep -oE "(ALL_)?DONE" "$OUTPUT_FILE" | tail -1) echo -e "${GREEN}✓ Completion signal detected: ${DETECTED_SIGNAL}${NC}" echo -e "${GREEN}✓ Task completed successfully!${NC}" CONSECUTIVE_FAILURES=0 RLM_STATUS="done" if [ "$MODE" = "plan" ]; then echo "" echo -e "${GREEN}Planning complete!${NC}" break fi # Also check the main log elif grep -qE "(ALL_)?DONE" "$LOG_FILE"; then DETECTED_SIGNAL=$(grep -oE "(ALL_)?DONE" "$LOG_FILE" | tail -1) echo -e "${GREEN}✓ Completion signal detected: ${DETECTED_SIGNAL}${NC}" echo -e "${GREEN}✓ Task completed successfully!${NC}" CONSECUTIVE_FAILURES=0 RLM_STATUS="done" else echo -e "${YELLOW}⚠ No completion signal found${NC}" echo -e "${YELLOW} Agent did not output DONE or ALL_DONE${NC}" echo -e "${YELLOW} Retrying in next iteration...${NC}" CONSECUTIVE_FAILURES=$((CONSECUTIVE_FAILURES + 1)) RLM_STATUS="incomplete" print_latest_output "$LOG_FILE" "Codex" if [ $CONSECUTIVE_FAILURES -ge $MAX_CONSECUTIVE_FAILURES ]; then echo "" echo -e "${RED}⚠ $MAX_CONSECUTIVE_FAILURES consecutive iterations without completion.${NC}" echo -e "${RED} The agent may be stuck. Check logs:${NC}" echo -e "${RED} - $LOG_FILE${NC}" echo -e "${RED} - $OUTPUT_FILE${NC}" CONSECUTIVE_FAILURES=0 fi fi else if [ -n "$WATCH_PID" ]; then kill "$WATCH_PID" 2>/dev/null || true wait "$WATCH_PID" 2>/dev/null || true fi CODEX_EXIT=$? echo -e "${RED}✗ Codex execution failed (exit code: $CODEX_EXIT)${NC}" echo -e "${YELLOW}Check log: $LOG_FILE${NC}" CONSECUTIVE_FAILURES=$((CONSECUTIVE_FAILURES + 1)) RLM_STATUS="error" print_latest_output "$LOG_FILE" "Codex" fi # Record iteration in RLM index (optional) if [ -n "$RLM_CONTEXT_FILE" ]; then RLM_PROMPT_PATH="${RLM_PROMPT_SNAPSHOT:-}" RLM_OUTPUT_SNAPSHOT="$RLM_TRACE_DIR/iter_${ITERATION}_output.log" cp "$LOG_FILE" "$RLM_OUTPUT_SNAPSHOT" if [ -f "$OUTPUT_FILE" ]; then RLM_LAST_MESSAGE_SNAPSHOT="$RLM_TRACE_DIR/iter_${ITERATION}_last_message.txt" cp "$OUTPUT_FILE" "$RLM_LAST_MESSAGE_SNAPSHOT" fi RLM_OUTPUT_PATH="${RLM_LAST_MESSAGE_SNAPSHOT:-$RLM_OUTPUT_SNAPSHOT}" echo -e "${TIMESTAMP}\t${MODE}\t${ITERATION}\t${RLM_PROMPT_PATH}\t${LOG_FILE}\t${RLM_OUTPUT_PATH}\t${RLM_STATUS}" >> "$RLM_INDEX" fi # Push changes after each iteration git push origin "$CURRENT_BRANCH" 2>/dev/null || { if git log origin/$CURRENT_BRANCH..HEAD --oneline 2>/dev/null | grep -q .; then git push -u origin "$CURRENT_BRANCH" 2>/dev/null || true fi } # Brief pause between iterations echo "" echo -e "${BLUE}Waiting 2s before next iteration...${NC}" sleep 2 done echo "" echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${GREEN} RALPH LOOP (Codex) FINISHED ($ITERATION iterations) ${NC}" echo -e "${GREEN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"