|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
NeMo dependency structure definition. |
|
|
This module analyzes the codebase to determine internal dependencies between NeMo collections and core components. |
|
|
""" |
|
|
|
|
|
import ast |
|
|
import json |
|
|
import os |
|
|
from typing import Dict, List, Set, Union |
|
|
|
|
|
|
|
|
def find_python_files(directory: str) -> List[str]: |
|
|
"""Find all Python files in the given directory and its subdirectories.""" |
|
|
python_files = [] |
|
|
|
|
|
relevant_dirs = ['nemo', 'scripts', 'examples', 'tests'] |
|
|
|
|
|
for dir_name in relevant_dirs: |
|
|
dir_path = os.path.join(directory, dir_name) |
|
|
if os.path.exists(dir_path): |
|
|
for root, _, files in os.walk(dir_path): |
|
|
for file in files: |
|
|
if file.endswith('.py'): |
|
|
python_files.append(os.path.join(root, file)) |
|
|
|
|
|
return python_files |
|
|
|
|
|
|
|
|
def analyze_imports(file_path: str) -> Set[str]: |
|
|
"""Analyze a Python file and return its NeMo package dependencies using AST parsing.""" |
|
|
imports = set() |
|
|
|
|
|
try: |
|
|
with open(file_path, 'r', encoding='utf-8') as f: |
|
|
tree = ast.parse(f.read(), filename=file_path) |
|
|
|
|
|
|
|
|
for node in ast.walk(tree): |
|
|
if isinstance(node, ast.ImportFrom) and node.module and node.module.startswith('nemo.'): |
|
|
|
|
|
parts = node.module.split('.') |
|
|
if len(parts) >= 2: |
|
|
module_type = parts[1] |
|
|
|
|
|
if module_type == 'collections' and len(parts) >= 3: |
|
|
imported_package = f"nemo.collections.{parts[2]}" |
|
|
imports.add(imported_package) |
|
|
elif module_type in ('core', 'utils', 'export', 'deploy', 'lightning', 'automodel'): |
|
|
imported_package = f"nemo.{module_type}" |
|
|
imports.add(imported_package) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error analyzing {file_path}: {e}") |
|
|
|
|
|
return imports |
|
|
|
|
|
|
|
|
def find_top_level_packages(nemo_root: str) -> List[str]: |
|
|
"""Find all top-level packages under nemo directory.""" |
|
|
packages: List[str] = [] |
|
|
nemo_dir = os.path.join(nemo_root, 'nemo') |
|
|
|
|
|
if not os.path.exists(nemo_dir): |
|
|
print(f"Warning: nemo directory not found at {nemo_dir}") |
|
|
return packages |
|
|
|
|
|
for item in os.listdir(nemo_dir): |
|
|
item_path = os.path.join(nemo_dir, item) |
|
|
if os.path.isdir(item_path) and not item.startswith('__'): |
|
|
packages.append(item) |
|
|
|
|
|
return sorted(packages) |
|
|
|
|
|
|
|
|
def find_collection_modules(nemo_root: str) -> Dict[str, List[str]]: |
|
|
"""Find all modules within collections.""" |
|
|
collection_modules: Dict[str, List[str]] = {} |
|
|
collections_dir = os.path.join(nemo_root, 'nemo', 'collections') |
|
|
|
|
|
if not os.path.exists(collections_dir): |
|
|
print(f"Warning: collections directory not found at {collections_dir}") |
|
|
return collection_modules |
|
|
|
|
|
for collection in os.listdir(collections_dir): |
|
|
collection_path = os.path.join(collections_dir, collection) |
|
|
if os.path.isdir(collection_path) and not collection.startswith('__'): |
|
|
modules = [] |
|
|
for root, _, files in os.walk(collection_path): |
|
|
for file in files: |
|
|
if file.endswith('.py') and not file.startswith('__'): |
|
|
rel_path = os.path.relpath(os.path.join(root, file), collections_dir) |
|
|
module = rel_path.replace(os.sep, '.').replace('.py', '') |
|
|
if module: |
|
|
modules.append(f"nemo.collections.{collection}.{module}") |
|
|
collection_modules[f"nemo.collections.{collection}"] = sorted(modules) |
|
|
|
|
|
return collection_modules |
|
|
|
|
|
|
|
|
def build_dependency_graph(nemo_root: str) -> Dict[str, Union[List[str], Dict[str, List[str]]]]: |
|
|
"""Build a dependency graph by analyzing all Python files.""" |
|
|
|
|
|
top_level_packages = find_top_level_packages(nemo_root) |
|
|
print(f"Found top-level packages: {top_level_packages}") |
|
|
|
|
|
|
|
|
reverse_deps: Dict[str, Set[str]] = {} |
|
|
|
|
|
|
|
|
python_files = find_python_files(nemo_root) |
|
|
|
|
|
|
|
|
for file_path in python_files: |
|
|
relative_path = os.path.relpath(file_path, nemo_root) |
|
|
parts = relative_path.split(os.sep) |
|
|
|
|
|
if len(parts) < 2: |
|
|
continue |
|
|
|
|
|
|
|
|
if parts[0] == 'nemo': |
|
|
if parts[1] == 'collections' and len(parts) >= 3: |
|
|
current_package = f"nemo.collections.{parts[2]}" |
|
|
elif parts[1] in top_level_packages: |
|
|
current_package = f"nemo.{parts[1]}" |
|
|
else: |
|
|
continue |
|
|
|
|
|
|
|
|
if current_package not in reverse_deps: |
|
|
reverse_deps[current_package] = set() |
|
|
|
|
|
|
|
|
for file_path in python_files: |
|
|
relative_path = os.path.relpath(file_path, nemo_root) |
|
|
parts = relative_path.split(os.sep) |
|
|
|
|
|
if len(parts) < 2: |
|
|
continue |
|
|
|
|
|
|
|
|
if parts[0] == 'nemo': |
|
|
if parts[1] == 'collections' and len(parts) >= 3: |
|
|
current_package = f"nemo.collections.{parts[2]}" |
|
|
elif parts[1] in top_level_packages: |
|
|
current_package = f"nemo.{parts[1]}" |
|
|
else: |
|
|
continue |
|
|
|
|
|
|
|
|
imports = analyze_imports(file_path) |
|
|
|
|
|
for imported_pkg in imports: |
|
|
if imported_pkg in reverse_deps and imported_pkg != current_package: |
|
|
reverse_deps[imported_pkg].add(current_package) |
|
|
|
|
|
|
|
|
dependencies: Dict[str, Union[List[str], Dict[str, List[str]]]] = {} |
|
|
|
|
|
|
|
|
collections = {} |
|
|
for pkg, deps in reverse_deps.items(): |
|
|
if pkg.startswith('nemo.collections.') and not pkg.endswith('__init__.py'): |
|
|
collections[pkg] = sorted(list(deps)) |
|
|
if collections: |
|
|
dependencies['nemo.collections'] = collections |
|
|
|
|
|
|
|
|
for pkg, deps in reverse_deps.items(): |
|
|
if not pkg.startswith('nemo.collections.'): |
|
|
dependencies[pkg] = sorted(list(deps)) |
|
|
|
|
|
return dependencies |
|
|
|
|
|
|
|
|
def main(): |
|
|
"""Main function to analyze dependencies and output JSON.""" |
|
|
|
|
|
nemo_root = os.path.dirname(os.path.abspath(__file__)) |
|
|
|
|
|
|
|
|
dependencies = build_dependency_graph(nemo_root) |
|
|
|
|
|
|
|
|
print(json.dumps(dependencies, indent=4)) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |
|
|
|