Source code for batter.systems.mabfe

from __future__ import annotations

import shutil
from pathlib import Path
from typing import Dict, Iterable, List, Mapping, Optional, Sequence

from loguru import logger

from .core import SimSystem, SystemBuilder, CreateSystemLike


__all__ = [
    "MABFEBuilder",
    "make_ligand_subsystem",
    "prepare_subsystems_for_ligands",
]


[docs] class MABFEBuilder(SystemBuilder): """ Builder for membrane/absolute free-energy (MABFE) systems. This builder prepares a *shared* working directory under ``system.root`` and, optionally, stages *all ligands at once* into per-ligand subfolders. Directory layout (relative to ``system.root``):: inputs/ # canonical copies of user-provided inputs artifacts/ # files produced by builders (e.g., PRMTOP, RST7) simulations/ <LIG1>/inputs/ligand.<ext> artifacts/ <LIG2>/inputs/ligand.<ext> artifacts/ ... """ # -------------------- public API --------------------
[docs] def build(self, system: SimSystem, args: CreateSystemLike) -> SimSystem: """ Prepare the shared system area (stage protein/topology/coordinates/inputs). Uses the **actual suffixes** from user inputs (no hard-coded extensions). """ self._assert_names_match(system, args) root = system.root inputs_dir = root / "inputs" artifacts_dir = root / "artifacts" root.mkdir(parents=True, exist_ok=True) inputs_dir.mkdir(parents=True, exist_ok=True) artifacts_dir.mkdir(parents=True, exist_ok=True) marker = artifacts_dir / ".prepared" if marker.exists() and not args.overwrite: logger.info("System found at {} (overwrite=False) — keeping existing artifacts.", root) return self._assemble_system(system, inputs_dir, artifacts_dir, args) if args.overwrite: logger.warning("overwrite=True — wiping and re-preparing artifacts under {}", artifacts_dir) self._clean_dir(artifacts_dir) # Stage shared inputs with their actual suffixes staged_protein = self._stage_optional( inputs_dir, args.protein_input, f"protein{args.protein_input.suffix}" if args.protein_input else None, ) staged_top = self._stage_optional( inputs_dir, args.system_input, f"system{args.system_input.suffix}" if args.system_input else None, ) staged_coord = self._stage_optional( inputs_dir, args.system_coordinate, f"system{args.system_coordinate.suffix}" if args.system_coordinate else None, ) # Stage ligands with <NAME>.<ext> staged_ligs = self._stage_ligands_named(inputs_dir, args.ligand_paths) marker.touch() updated = system.with_artifacts( protein=staged_protein, topology=staged_top, coordinates=staged_coord, ligands=tuple(staged_ligs), lipid_mol=tuple(args.lipid_mol), anchors=tuple(args.anchor_atoms), meta=system.meta.merge(ligand_ff=getattr(args, "ligand_ff", "gaff2")), ) mode_label = updated.meta.mode or "MABFE" logger.info( f"Prepared {mode_label} system '{updated.name}' at {updated.root} (ligands: {len(updated.ligands)})") logger.info(" Protein: {}", updated.protein) logger.info(" System Topology: {}", updated.topology) logger.info(" System Coord: {}", updated.coordinates) logger.info(" Ligands: {}", ", ".join(l.stem for l in updated.ligands)) return updated
[docs] def build_all_ligands( self, parent: SimSystem, lig_paths: Sequence[Path], overwrite: bool = False, ) -> Dict[str, SimSystem]: """ Stage **all ligands at once** under ``parent.root/simulations/<NAME>/...``. Ligands are copied as ``inputs/ligand.<ext>`` using each source's suffix. """ lig_dir = parent.root / "simulations" lig_dir.mkdir(parents=True, exist_ok=True) children: Dict[str, SimSystem] = {} for src in lig_paths: p = Path(src) name = p.stem.upper() sub_root = lig_dir / name # ensure layout (sub_root / "inputs").mkdir(parents=True, exist_ok=True) art_dir = sub_root / "artifacts" art_dir.mkdir(parents=True, exist_ok=True) if overwrite: logger.warning("overwrite=True — wiping ligand artifacts under {}", art_dir) self._clean_dir(art_dir) # stage ligand into its own inputs/ as ligand.<ext> lig_dst = sub_root / "inputs" / f"ligand{p.suffix}" shutil.copy2(p, lig_dst) child = SimSystem( name=f"{parent.name}:{name}", root=sub_root, protein=parent.protein, topology=parent.topology, coordinates=parent.coordinates, ligands=(lig_dst,), lipid_mol=parent.lipid_mol, anchors=parent.anchors, meta=parent.meta.merge(ligand=name), ) children[name] = child logger.debug("Staged {} ligand subsystems under {}", len(children), lig_dir) return children
[docs] @staticmethod def make_child_for_ligand(parent: SimSystem, lig_name: str, lig_src: Path) -> SimSystem: """ Create a single per-ligand child system under ``simulations/<NAME>/`` with ligand.<ext>. """ lig_dir = parent.root / "simulations" / lig_name (lig_dir / "inputs").mkdir(parents=True, exist_ok=True) (lig_dir / "artifacts").mkdir(parents=True, exist_ok=True) p = Path(lig_src) dst = lig_dir / "inputs" / f"ligand{p.suffix}" shutil.copy2(p, dst) return SimSystem( name=f"{parent.name}:{lig_name}", root=lig_dir, protein=parent.protein, topology=parent.topology, coordinates=parent.coordinates, ligands=(dst,), lipid_mol=parent.lipid_mol, anchors=parent.anchors, meta=parent.meta.merge(ligand=lig_name), )
@staticmethod def _assert_names_match(system: SimSystem, args: CreateSystemLike) -> None: if system.name != args.system_name: raise ValueError( f"System name mismatch: SimSystem.name={system.name!r} vs args.system_name={args.system_name!r}" ) @staticmethod def _clean_dir(path: Path) -> None: for p in path.iterdir(): if p.is_dir(): shutil.rmtree(p) else: p.unlink(missing_ok=True) @staticmethod def _stage_optional(dst_dir: Path, maybe_src: Optional[Path], filename: Optional[str]) -> Optional[Path]: if maybe_src is None or filename is None: return None dst = dst_dir / filename dst.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(maybe_src, dst) return dst @staticmethod def _stage_ligands_named(dst_dir: Path, lig_map: Mapping[str, Path]) -> List[Path]: """ Copy ligand files into dst_dir as <LIG_NAME>.<ext> using the keys of lig_map. """ staged: List[Path] = [] for name, src in lig_map.items(): src = Path(src) dst = dst_dir / f"{name}{src.suffix}" dst.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src, dst) staged.append(dst) return staged @staticmethod def _first_existing(paths: Iterable[Optional[Path]]) -> Optional[Path]: for p in paths: if p and Path(p).exists(): return Path(p) return None def _assemble_system( self, system: SimSystem, inputs_dir: Path, artifacts_dir: Path, args: CreateSystemLike, ) -> SimSystem: """ Re-assemble a SimSystem referencing already-staged files. Prefer artifacts/, fallback to inputs/, using **suffixes from args** when available. """ # Determine expected filenames based on original input suffixes (if provided) prot_candidates: List[Optional[Path]] = [] if args.protein_input: prot_candidates += [ artifacts_dir / f"protein{args.protein_input.suffix}", inputs_dir / f"protein{args.protein_input.suffix}", ] # Also fallback to a couple of common names if inputs absent prot_candidates += [artifacts_dir / "protein.pdb", inputs_dir / "protein.pdb"] top_candidates: List[Optional[Path]] = [] if args.system_input: top_candidates += [ artifacts_dir / f"system{args.system_input.suffix}", inputs_dir / f"system{args.system_input.suffix}", ] top_candidates += [inputs_dir / "input.prmtop", artifacts_dir / "system.prmtop"] coord_candidates: List[Optional[Path]] = [] if args.system_coordinate: coord_candidates += [ artifacts_dir / f"system{args.system_coordinate.suffix}", inputs_dir / f"system{args.system_coordinate.suffix}", ] coord_candidates += [inputs_dir / "input.rst7", artifacts_dir / "system.rst7"] protein = self._first_existing(prot_candidates) topology = self._first_existing(top_candidates) coordinates = self._first_existing(coord_candidates) ligands = sorted(inputs_dir.glob("*.sdf")) + sorted(inputs_dir.glob("*.mol2")) + sorted(inputs_dir.glob("*.pdb")) return system.with_artifacts( protein=protein, topology=topology, coordinates=coordinates, ligands=tuple(ligands), lipid_mol=tuple(args.lipid_mol), anchors=tuple(args.anchor_atoms), meta={"ligand_ff": getattr(args, "ligand_ff", "gaff2")}, )
[docs] def make_ligand_subsystem(parent: SimSystem, lig_name: str, lig_src: Path) -> SimSystem: builder = MABFEBuilder() return builder.make_child_for_ligand(parent, lig_name, Path(lig_src))
[docs] def prepare_subsystems_for_ligands(parent: SimSystem, lig_paths: Iterable[Path]) -> Dict[str, SimSystem]: builder = MABFEBuilder() return builder.build_all_ligands(parent, [Path(p) for p in lig_paths], overwrite=False)