Files

215 lines
7.7 KiB
Python

"""
AutoDev - Sandbox
Enforces working directory confinement with whitelist-based command validation.
"""
import os
import re
# Whitelisted command prefixes — only these are allowed to execute
ALLOWED_COMMANDS = [
# Build tools
"make", "cmake", "gcc", "g++", "clang", "clang++", "rustc", "cargo",
"go ", "go build", "go run", "go test", "go mod",
"javac", "java ", "jar ", "mvn ", "gradle",
"dotnet", "msbuild",
"python", "python3", "pip ", "pip3 ",
"node ", "npm ", "npx ", "yarn ", "pnpm ",
"ruby ", "gem ", "bundle ",
"perl ", "lua ", "luac",
"nasm", "as ", "ld ",
# Common utilities (safe)
"ls", "cat ", "head ", "tail ", "wc ", "sort ", "uniq ",
"find ", "grep ", "awk ", "sed ", "diff ", "patch ",
"mkdir ", "cp ", "mv ", "rm ", "touch ", "chmod ",
"cd ", "pwd",
"tar ", "zip ", "unzip ", "gzip ", "gunzip ",
"curl ", "wget ",
"echo ", "printf ", "test ", "true", "false",
"which ", "env ", "basename ", "dirname ",
"pkg-config", "ldconfig",
# Version checks
"gcc --version", "g++ --version", "python3 --version",
"rustc --version", "cargo --version", "go version",
"java -version", "javac -version", "node --version",
]
# Absolutely forbidden patterns — override whitelist
FORBIDDEN_PATTERNS = [
re.compile(r"\bsudo\b"),
re.compile(r"\bsu\s"),
re.compile(r"\brm\s+-rf\s+/\s*$"),
re.compile(r"\bmkfs\b"),
re.compile(r"\bdd\s+if="),
re.compile(r">\s*/dev/"),
re.compile(r"\bshutdown\b"),
re.compile(r"\breboot\b"),
re.compile(r"\binit\s+[0-6]"),
re.compile(r"\bsystemctl\b"),
re.compile(r"\bchmod\s+777\s+/"),
re.compile(r"\bchown\b.*\s+/"),
re.compile(r"\bmount\b"),
re.compile(r"\bumount\b"),
re.compile(r"\biptables\b"),
re.compile(r"\bnft\b"),
re.compile(r"\bpasswd\b"),
re.compile(r"\buseradd\b"),
re.compile(r"\buserdel\b"),
re.compile(r"\bvisudo\b"),
re.compile(r"\bcrontab\b"),
]
class SandboxViolation(Exception):
pass
class Sandbox:
def __init__(self, workdir: str):
self.workdir = os.path.realpath(workdir)
def validate_path(self, path: str) -> str:
resolved = os.path.realpath(os.path.join(self.workdir, path))
if not resolved.startswith(self.workdir):
raise SandboxViolation(f"Path escapes sandbox: {path} -> {resolved}")
return resolved
def validate_command(self, cmd: str):
cmd_stripped = cmd.strip()
cmd_lower = cmd_stripped.lower()
# Check forbidden patterns first
for pattern in FORBIDDEN_PATTERNS:
if pattern.search(cmd_lower):
raise SandboxViolation(f"Forbidden command pattern: {pattern.pattern}")
# Check if command starts with an allowed prefix
# Handle shell constructs: pipes, &&, ;
# But respect quoted strings — don't split inside them
parts = self._split_shell_commands(cmd_stripped)
for part in parts:
part = part.strip()
if not part:
continue
# Skip env var assignments like FOO=bar
if re.match(r'^[A-Za-z_][A-Za-z0-9_]*=', part):
# Extract the command after assignments
tokens = part.split()
part = " ".join(t for t in tokens if "=" not in t or not re.match(r'^[A-Za-z_]', t))
if not part:
continue
allowed = False
for prefix in ALLOWED_COMMANDS:
if part.startswith(prefix) or part.split()[0] == prefix.strip():
allowed = True
break
# Also allow ./scripts and relative paths
if part.startswith("./") or part.startswith("bash ") or part.startswith("sh "):
allowed = True
if not allowed:
raise SandboxViolation(
f"Command not in whitelist: '{part.split()[0]}'. "
f"Only build tools and safe utilities are allowed."
)
# Check for path escapes in write-like commands
# Only check unquoted tokens that look like real absolute paths
in_single = False
in_double = False
for ch in cmd_stripped:
if ch == "'" and not in_double:
in_single = not in_single
elif ch == '"' and not in_single:
in_double = not in_double
# If the command has balanced quotes, extract only unquoted parts
unquoted_parts = []
current = []
in_single = False
in_double = False
for ch in cmd_stripped:
if ch == "'" and not in_double:
in_single = not in_single
continue
elif ch == '"' and not in_single:
in_double = not in_double
continue
if not in_single and not in_double:
current.append(ch)
else:
if current and current[-1] != " ":
current.append(" ")
unquoted = "".join(current)
for token in unquoted.split():
if token.startswith("/") and not token.startswith(self.workdir):
if token in ("//", "/dev/null") or len(token) <= 2:
continue
read_only_prefixes = ["/usr", "/lib", "/etc/alternatives", "/bin", "/opt"]
if any(token.startswith(p) for p in read_only_prefixes):
continue
raise SandboxViolation(f"Reference to path outside sandbox: {token}")
def safe_write(self, path: str, content: str) -> str:
full = self.validate_path(path)
# Protect AutoDev's own state files from being overwritten
protected = {"worklog.json", "plan.json", "dependency.txt", ".autodev_state.json"}
if os.path.basename(full) in protected and os.path.exists(full):
# Only AutoDev internals should write these
pass # Allow — the caller is AutoDev itself
os.makedirs(os.path.dirname(full), exist_ok=True)
# Don't write a file if a directory exists at that path
if os.path.isdir(full):
import shutil
shutil.rmtree(full)
with open(full, "w") as f:
f.write(content)
return full
def safe_read(self, path: str) -> str:
full = self.validate_path(path)
with open(full, "r") as f:
return f.read()
def safe_mkdir(self, path: str) -> str:
full = self.validate_path(path)
os.makedirs(full, exist_ok=True)
return full
@staticmethod
def _split_shell_commands(cmd: str) -> list[str]:
"""Split a shell command on &&, ||, |, ; but respect quoted strings."""
parts = []
current = []
in_single = False
in_double = False
i = 0
while i < len(cmd):
c = cmd[i]
if c == "'" and not in_double:
in_single = not in_single
current.append(c)
elif c == '"' and not in_single:
in_double = not in_double
current.append(c)
elif not in_single and not in_double:
# Check for &&, ||, |, ;
two = cmd[i:i+2]
if two in ("&&", "||"):
parts.append("".join(current).strip())
current = []
i += 2
continue
elif c in (";", "|"):
parts.append("".join(current).strip())
current = []
else:
current.append(c)
else:
current.append(c)
i += 1
tail = "".join(current).strip()
if tail:
parts.append(tail)
return [p for p in parts if p]