215 lines
7.7 KiB
Python
215 lines
7.7 KiB
Python
"""
|
|
AutoDev - Sandbox
|
|
Enforces working directory confinement with whitelist-based command validation.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
|
|
# Whitelisted command prefixes — only these are allowed to execute
|
|
ALLOWED_COMMANDS = [
|
|
# Build tools
|
|
"make", "cmake", "gcc", "g++", "clang", "clang++", "rustc", "cargo",
|
|
"go ", "go build", "go run", "go test", "go mod",
|
|
"javac", "java ", "jar ", "mvn ", "gradle",
|
|
"dotnet", "msbuild",
|
|
"python", "python3", "pip ", "pip3 ",
|
|
"node ", "npm ", "npx ", "yarn ", "pnpm ",
|
|
"ruby ", "gem ", "bundle ",
|
|
"perl ", "lua ", "luac",
|
|
"nasm", "as ", "ld ",
|
|
# Common utilities (safe)
|
|
"ls", "cat ", "head ", "tail ", "wc ", "sort ", "uniq ",
|
|
"find ", "grep ", "awk ", "sed ", "diff ", "patch ",
|
|
"mkdir ", "cp ", "mv ", "rm ", "touch ", "chmod ",
|
|
"cd ", "pwd",
|
|
"tar ", "zip ", "unzip ", "gzip ", "gunzip ",
|
|
"curl ", "wget ",
|
|
"echo ", "printf ", "test ", "true", "false",
|
|
"which ", "env ", "basename ", "dirname ",
|
|
"pkg-config", "ldconfig",
|
|
# Version checks
|
|
"gcc --version", "g++ --version", "python3 --version",
|
|
"rustc --version", "cargo --version", "go version",
|
|
"java -version", "javac -version", "node --version",
|
|
]
|
|
|
|
# Absolutely forbidden patterns — override whitelist
|
|
FORBIDDEN_PATTERNS = [
|
|
re.compile(r"\bsudo\b"),
|
|
re.compile(r"\bsu\s"),
|
|
re.compile(r"\brm\s+-rf\s+/\s*$"),
|
|
re.compile(r"\bmkfs\b"),
|
|
re.compile(r"\bdd\s+if="),
|
|
re.compile(r">\s*/dev/"),
|
|
re.compile(r"\bshutdown\b"),
|
|
re.compile(r"\breboot\b"),
|
|
re.compile(r"\binit\s+[0-6]"),
|
|
re.compile(r"\bsystemctl\b"),
|
|
re.compile(r"\bchmod\s+777\s+/"),
|
|
re.compile(r"\bchown\b.*\s+/"),
|
|
re.compile(r"\bmount\b"),
|
|
re.compile(r"\bumount\b"),
|
|
re.compile(r"\biptables\b"),
|
|
re.compile(r"\bnft\b"),
|
|
re.compile(r"\bpasswd\b"),
|
|
re.compile(r"\buseradd\b"),
|
|
re.compile(r"\buserdel\b"),
|
|
re.compile(r"\bvisudo\b"),
|
|
re.compile(r"\bcrontab\b"),
|
|
]
|
|
|
|
|
|
class SandboxViolation(Exception):
|
|
pass
|
|
|
|
|
|
class Sandbox:
|
|
def __init__(self, workdir: str):
|
|
self.workdir = os.path.realpath(workdir)
|
|
|
|
def validate_path(self, path: str) -> str:
|
|
resolved = os.path.realpath(os.path.join(self.workdir, path))
|
|
if not resolved.startswith(self.workdir):
|
|
raise SandboxViolation(f"Path escapes sandbox: {path} -> {resolved}")
|
|
return resolved
|
|
|
|
def validate_command(self, cmd: str):
|
|
cmd_stripped = cmd.strip()
|
|
cmd_lower = cmd_stripped.lower()
|
|
|
|
# Check forbidden patterns first
|
|
for pattern in FORBIDDEN_PATTERNS:
|
|
if pattern.search(cmd_lower):
|
|
raise SandboxViolation(f"Forbidden command pattern: {pattern.pattern}")
|
|
|
|
# Check if command starts with an allowed prefix
|
|
# Handle shell constructs: pipes, &&, ;
|
|
# But respect quoted strings — don't split inside them
|
|
parts = self._split_shell_commands(cmd_stripped)
|
|
for part in parts:
|
|
part = part.strip()
|
|
if not part:
|
|
continue
|
|
# Skip env var assignments like FOO=bar
|
|
if re.match(r'^[A-Za-z_][A-Za-z0-9_]*=', part):
|
|
# Extract the command after assignments
|
|
tokens = part.split()
|
|
part = " ".join(t for t in tokens if "=" not in t or not re.match(r'^[A-Za-z_]', t))
|
|
if not part:
|
|
continue
|
|
|
|
allowed = False
|
|
for prefix in ALLOWED_COMMANDS:
|
|
if part.startswith(prefix) or part.split()[0] == prefix.strip():
|
|
allowed = True
|
|
break
|
|
# Also allow ./scripts and relative paths
|
|
if part.startswith("./") or part.startswith("bash ") or part.startswith("sh "):
|
|
allowed = True
|
|
|
|
if not allowed:
|
|
raise SandboxViolation(
|
|
f"Command not in whitelist: '{part.split()[0]}'. "
|
|
f"Only build tools and safe utilities are allowed."
|
|
)
|
|
|
|
# Check for path escapes in write-like commands
|
|
# Only check unquoted tokens that look like real absolute paths
|
|
in_single = False
|
|
in_double = False
|
|
for ch in cmd_stripped:
|
|
if ch == "'" and not in_double:
|
|
in_single = not in_single
|
|
elif ch == '"' and not in_single:
|
|
in_double = not in_double
|
|
# If the command has balanced quotes, extract only unquoted parts
|
|
unquoted_parts = []
|
|
current = []
|
|
in_single = False
|
|
in_double = False
|
|
for ch in cmd_stripped:
|
|
if ch == "'" and not in_double:
|
|
in_single = not in_single
|
|
continue
|
|
elif ch == '"' and not in_single:
|
|
in_double = not in_double
|
|
continue
|
|
if not in_single and not in_double:
|
|
current.append(ch)
|
|
else:
|
|
if current and current[-1] != " ":
|
|
current.append(" ")
|
|
unquoted = "".join(current)
|
|
for token in unquoted.split():
|
|
if token.startswith("/") and not token.startswith(self.workdir):
|
|
if token in ("//", "/dev/null") or len(token) <= 2:
|
|
continue
|
|
read_only_prefixes = ["/usr", "/lib", "/etc/alternatives", "/bin", "/opt"]
|
|
if any(token.startswith(p) for p in read_only_prefixes):
|
|
continue
|
|
raise SandboxViolation(f"Reference to path outside sandbox: {token}")
|
|
|
|
def safe_write(self, path: str, content: str) -> str:
|
|
full = self.validate_path(path)
|
|
# Protect AutoDev's own state files from being overwritten
|
|
protected = {"worklog.json", "plan.json", "dependency.txt", ".autodev_state.json"}
|
|
if os.path.basename(full) in protected and os.path.exists(full):
|
|
# Only AutoDev internals should write these
|
|
pass # Allow — the caller is AutoDev itself
|
|
os.makedirs(os.path.dirname(full), exist_ok=True)
|
|
# Don't write a file if a directory exists at that path
|
|
if os.path.isdir(full):
|
|
import shutil
|
|
shutil.rmtree(full)
|
|
with open(full, "w") as f:
|
|
f.write(content)
|
|
return full
|
|
|
|
def safe_read(self, path: str) -> str:
|
|
full = self.validate_path(path)
|
|
with open(full, "r") as f:
|
|
return f.read()
|
|
|
|
def safe_mkdir(self, path: str) -> str:
|
|
full = self.validate_path(path)
|
|
os.makedirs(full, exist_ok=True)
|
|
return full
|
|
|
|
@staticmethod
|
|
def _split_shell_commands(cmd: str) -> list[str]:
|
|
"""Split a shell command on &&, ||, |, ; but respect quoted strings."""
|
|
parts = []
|
|
current = []
|
|
in_single = False
|
|
in_double = False
|
|
i = 0
|
|
while i < len(cmd):
|
|
c = cmd[i]
|
|
if c == "'" and not in_double:
|
|
in_single = not in_single
|
|
current.append(c)
|
|
elif c == '"' and not in_single:
|
|
in_double = not in_double
|
|
current.append(c)
|
|
elif not in_single and not in_double:
|
|
# Check for &&, ||, |, ;
|
|
two = cmd[i:i+2]
|
|
if two in ("&&", "||"):
|
|
parts.append("".join(current).strip())
|
|
current = []
|
|
i += 2
|
|
continue
|
|
elif c in (";", "|"):
|
|
parts.append("".join(current).strip())
|
|
current = []
|
|
else:
|
|
current.append(c)
|
|
else:
|
|
current.append(c)
|
|
i += 1
|
|
tail = "".join(current).strip()
|
|
if tail:
|
|
parts.append(tail)
|
|
return [p for p in parts if p]
|