autodev/sandbox.py

"""
AutoDev - Sandbox
Enforces working directory confinement with whitelist-based command validation.
"""

import os
import re

# Whitelisted command prefixes — only these are allowed to execute
ALLOWED_COMMANDS = [
    # Build tools
    "make", "cmake", "gcc", "g++", "clang", "clang++", "rustc", "cargo",
    "go ", "go build", "go run", "go test", "go mod",
    "javac", "java ", "jar ", "mvn ", "gradle",
    "dotnet", "msbuild",
    "python", "python3", "pip ", "pip3 ",
    "node ", "npm ", "npx ", "yarn ", "pnpm ",
    "ruby ", "gem ", "bundle ",
    "perl ", "lua ", "luac",
    "nasm", "as ", "ld ",
    # Common utilities (safe)
    "ls", "cat ", "head ", "tail ", "wc ", "sort ", "uniq ",
    "find ", "grep ", "awk ", "sed ", "diff ", "patch ",
    "mkdir ", "cp ", "mv ", "rm ", "touch ", "chmod ",
    "cd ", "pwd",
    "tar ", "zip ", "unzip ", "gzip ", "gunzip ",
    "curl ", "wget ",
    "echo ", "printf ", "test ", "true", "false",
    "which ", "env ", "basename ", "dirname ",
    "pkg-config", "ldconfig",
    # Version checks
    "gcc --version", "g++ --version", "python3 --version",
    "rustc --version", "cargo --version", "go version",
    "java -version", "javac -version", "node --version",
]

# Absolutely forbidden patterns — override whitelist
FORBIDDEN_PATTERNS = [
    re.compile(r"\bsudo\b"),
    re.compile(r"\bsu\s"),
    re.compile(r"\brm\s+-rf\s+/\s*$"),
    re.compile(r"\bmkfs\b"),
    re.compile(r"\bdd\s+if="),
    re.compile(r">\s*/dev/"),
    re.compile(r"\bshutdown\b"),
    re.compile(r"\breboot\b"),
    re.compile(r"\binit\s+[0-6]"),
    re.compile(r"\bsystemctl\b"),
    re.compile(r"\bchmod\s+777\s+/"),
    re.compile(r"\bchown\b.*\s+/"),
    re.compile(r"\bmount\b"),
    re.compile(r"\bumount\b"),
    re.compile(r"\biptables\b"),
    re.compile(r"\bnft\b"),
    re.compile(r"\bpasswd\b"),
    re.compile(r"\buseradd\b"),
    re.compile(r"\buserdel\b"),
    re.compile(r"\bvisudo\b"),
    re.compile(r"\bcrontab\b"),
]


class SandboxViolation(Exception):
    pass


class Sandbox:
    def __init__(self, workdir: str):
        self.workdir = os.path.realpath(workdir)

    def validate_path(self, path: str) -> str:
        resolved = os.path.realpath(os.path.join(self.workdir, path))
        if not resolved.startswith(self.workdir):
            raise SandboxViolation(f"Path escapes sandbox: {path} -> {resolved}")
        return resolved

    def validate_command(self, cmd: str):
        cmd_stripped = cmd.strip()
        cmd_lower = cmd_stripped.lower()

        # Check forbidden patterns first
        for pattern in FORBIDDEN_PATTERNS:
            if pattern.search(cmd_lower):
                raise SandboxViolation(f"Forbidden command pattern: {pattern.pattern}")

        # Check if command starts with an allowed prefix
        # Handle shell constructs: pipes, &&, ;
        # But respect quoted strings — don't split inside them
        parts = self._split_shell_commands(cmd_stripped)
        for part in parts:
            part = part.strip()
            if not part:
                continue
            # Skip env var assignments like FOO=bar
            if re.match(r'^[A-Za-z_][A-Za-z0-9_]*=', part):
                # Extract the command after assignments
                tokens = part.split()
                part = " ".join(t for t in tokens if "=" not in t or not re.match(r'^[A-Za-z_]', t))
                if not part:
                    continue

            allowed = False
            for prefix in ALLOWED_COMMANDS:
                if part.startswith(prefix) or part.split()[0] == prefix.strip():
                    allowed = True
                    break
            # Also allow ./scripts and relative paths
            if part.startswith("./") or part.startswith("bash ") or part.startswith("sh "):
                allowed = True

            if not allowed:
                raise SandboxViolation(
                    f"Command not in whitelist: '{part.split()[0]}'. "
                    f"Only build tools and safe utilities are allowed."
                )

        # Check for path escapes in write-like commands
        # Only check unquoted tokens that look like real absolute paths
        in_single = False
        in_double = False
        for ch in cmd_stripped:
            if ch == "'" and not in_double:
                in_single = not in_single
            elif ch == '"' and not in_single:
                in_double = not in_double
        # If the command has balanced quotes, extract only unquoted parts
        unquoted_parts = []
        current = []
        in_single = False
        in_double = False
        for ch in cmd_stripped:
            if ch == "'" and not in_double:
                in_single = not in_single
                continue
            elif ch == '"' and not in_single:
                in_double = not in_double
                continue
            if not in_single and not in_double:
                current.append(ch)
            else:
                if current and current[-1] != " ":
                    current.append(" ")
        unquoted = "".join(current)
        for token in unquoted.split():
            if token.startswith("/") and not token.startswith(self.workdir):
                if token in ("//", "/dev/null") or len(token) <= 2:
                    continue
                read_only_prefixes = ["/usr", "/lib", "/etc/alternatives", "/bin", "/opt"]
                if any(token.startswith(p) for p in read_only_prefixes):
                    continue
                raise SandboxViolation(f"Reference to path outside sandbox: {token}")

    def safe_write(self, path: str, content: str) -> str:
        full = self.validate_path(path)
        # Protect AutoDev's own state files from being overwritten
        protected = {"worklog.json", "plan.json", "dependency.txt", ".autodev_state.json"}
        if os.path.basename(full) in protected and os.path.exists(full):
            # Only AutoDev internals should write these
            pass  # Allow — the caller is AutoDev itself
        os.makedirs(os.path.dirname(full), exist_ok=True)
        # Don't write a file if a directory exists at that path
        if os.path.isdir(full):
            import shutil
            shutil.rmtree(full)
        with open(full, "w") as f:
            f.write(content)
        return full

    def safe_read(self, path: str) -> str:
        full = self.validate_path(path)
        with open(full, "r") as f:
            return f.read()

    def safe_mkdir(self, path: str) -> str:
        full = self.validate_path(path)
        os.makedirs(full, exist_ok=True)
        return full

    @staticmethod
    def _split_shell_commands(cmd: str) -> list[str]:
        """Split a shell command on &&, ||, |, ; but respect quoted strings."""
        parts = []
        current = []
        in_single = False
        in_double = False
        i = 0
        while i < len(cmd):
            c = cmd[i]
            if c == "'" and not in_double:
                in_single = not in_single
                current.append(c)
            elif c == '"' and not in_single:
                in_double = not in_double
                current.append(c)
            elif not in_single and not in_double:
                # Check for &&, ||, |, ;
                two = cmd[i:i+2]
                if two in ("&&", "||"):
                    parts.append("".join(current).strip())
                    current = []
                    i += 2
                    continue
                elif c in (";", "|"):
                    parts.append("".join(current).strip())
                    current = []
                else:
                    current.append(c)
            else:
                current.append(c)
            i += 1
        tail = "".join(current).strip()
        if tail:
            parts.append(tail)
        return [p for p in parts if p]