""" AutoDev - Sandbox Enforces working directory confinement with whitelist-based command validation. """ import os import re # Whitelisted command prefixes — only these are allowed to execute ALLOWED_COMMANDS = [ # Build tools "make", "cmake", "gcc", "g++", "clang", "clang++", "rustc", "cargo", "go ", "go build", "go run", "go test", "go mod", "javac", "java ", "jar ", "mvn ", "gradle", "dotnet", "msbuild", "python", "python3", "pip ", "pip3 ", "node ", "npm ", "npx ", "yarn ", "pnpm ", "ruby ", "gem ", "bundle ", "perl ", "lua ", "luac", "nasm", "as ", "ld ", # Common utilities (safe) "ls", "cat ", "head ", "tail ", "wc ", "sort ", "uniq ", "find ", "grep ", "awk ", "sed ", "diff ", "patch ", "mkdir ", "cp ", "mv ", "rm ", "touch ", "chmod ", "cd ", "pwd", "tar ", "zip ", "unzip ", "gzip ", "gunzip ", "curl ", "wget ", "echo ", "printf ", "test ", "true", "false", "which ", "env ", "basename ", "dirname ", "pkg-config", "ldconfig", # Version checks "gcc --version", "g++ --version", "python3 --version", "rustc --version", "cargo --version", "go version", "java -version", "javac -version", "node --version", ] # Absolutely forbidden patterns — override whitelist FORBIDDEN_PATTERNS = [ re.compile(r"\bsudo\b"), re.compile(r"\bsu\s"), re.compile(r"\brm\s+-rf\s+/\s*$"), re.compile(r"\bmkfs\b"), re.compile(r"\bdd\s+if="), re.compile(r">\s*/dev/"), re.compile(r"\bshutdown\b"), re.compile(r"\breboot\b"), re.compile(r"\binit\s+[0-6]"), re.compile(r"\bsystemctl\b"), re.compile(r"\bchmod\s+777\s+/"), re.compile(r"\bchown\b.*\s+/"), re.compile(r"\bmount\b"), re.compile(r"\bumount\b"), re.compile(r"\biptables\b"), re.compile(r"\bnft\b"), re.compile(r"\bpasswd\b"), re.compile(r"\buseradd\b"), re.compile(r"\buserdel\b"), re.compile(r"\bvisudo\b"), re.compile(r"\bcrontab\b"), ] class SandboxViolation(Exception): pass class Sandbox: def __init__(self, workdir: str): self.workdir = os.path.realpath(workdir) def validate_path(self, path: str) -> str: resolved = os.path.realpath(os.path.join(self.workdir, path)) if not resolved.startswith(self.workdir): raise SandboxViolation(f"Path escapes sandbox: {path} -> {resolved}") return resolved def validate_command(self, cmd: str): cmd_stripped = cmd.strip() cmd_lower = cmd_stripped.lower() # Check forbidden patterns first for pattern in FORBIDDEN_PATTERNS: if pattern.search(cmd_lower): raise SandboxViolation(f"Forbidden command pattern: {pattern.pattern}") # Check if command starts with an allowed prefix # Handle shell constructs: pipes, &&, ; # But respect quoted strings — don't split inside them parts = self._split_shell_commands(cmd_stripped) for part in parts: part = part.strip() if not part: continue # Skip env var assignments like FOO=bar if re.match(r'^[A-Za-z_][A-Za-z0-9_]*=', part): # Extract the command after assignments tokens = part.split() part = " ".join(t for t in tokens if "=" not in t or not re.match(r'^[A-Za-z_]', t)) if not part: continue allowed = False for prefix in ALLOWED_COMMANDS: if part.startswith(prefix) or part.split()[0] == prefix.strip(): allowed = True break # Also allow ./scripts and relative paths if part.startswith("./") or part.startswith("bash ") or part.startswith("sh "): allowed = True if not allowed: raise SandboxViolation( f"Command not in whitelist: '{part.split()[0]}'. " f"Only build tools and safe utilities are allowed." ) # Check for path escapes in write-like commands # Only check unquoted tokens that look like real absolute paths in_single = False in_double = False for ch in cmd_stripped: if ch == "'" and not in_double: in_single = not in_single elif ch == '"' and not in_single: in_double = not in_double # If the command has balanced quotes, extract only unquoted parts unquoted_parts = [] current = [] in_single = False in_double = False for ch in cmd_stripped: if ch == "'" and not in_double: in_single = not in_single continue elif ch == '"' and not in_single: in_double = not in_double continue if not in_single and not in_double: current.append(ch) else: if current and current[-1] != " ": current.append(" ") unquoted = "".join(current) for token in unquoted.split(): if token.startswith("/") and not token.startswith(self.workdir): if token in ("//", "/dev/null") or len(token) <= 2: continue read_only_prefixes = ["/usr", "/lib", "/etc/alternatives", "/bin", "/opt"] if any(token.startswith(p) for p in read_only_prefixes): continue raise SandboxViolation(f"Reference to path outside sandbox: {token}") def safe_write(self, path: str, content: str) -> str: full = self.validate_path(path) # Protect AutoDev's own state files from being overwritten protected = {"worklog.json", "plan.json", "dependency.txt", ".autodev_state.json"} if os.path.basename(full) in protected and os.path.exists(full): # Only AutoDev internals should write these pass # Allow — the caller is AutoDev itself os.makedirs(os.path.dirname(full), exist_ok=True) # Don't write a file if a directory exists at that path if os.path.isdir(full): import shutil shutil.rmtree(full) with open(full, "w") as f: f.write(content) return full def safe_read(self, path: str) -> str: full = self.validate_path(path) with open(full, "r") as f: return f.read() def safe_mkdir(self, path: str) -> str: full = self.validate_path(path) os.makedirs(full, exist_ok=True) return full @staticmethod def _split_shell_commands(cmd: str) -> list[str]: """Split a shell command on &&, ||, |, ; but respect quoted strings.""" parts = [] current = [] in_single = False in_double = False i = 0 while i < len(cmd): c = cmd[i] if c == "'" and not in_double: in_single = not in_single current.append(c) elif c == '"' and not in_single: in_double = not in_double current.append(c) elif not in_single and not in_double: # Check for &&, ||, |, ; two = cmd[i:i+2] if two in ("&&", "||"): parts.append("".join(current).strip()) current = [] i += 2 continue elif c in (";", "|"): parts.append("".join(current).strip()) current = [] else: current.append(c) else: current.append(c) i += 1 tail = "".join(current).strip() if tail: parts.append(tail) return [p for p in parts if p]