unable to run on docker container
Have any one encounter this issue and please advice me with the solution
when i try to
run a small python project using
Load model directly
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
and build a docker and run
it silently crash without any error
i asked AI and it saying that PyTorch + Docker incompatibility
i'm build docker on my mac device
this is requirements:
torch==2.1.0
transformers==4.35.2
numpy==1.26.4
Docker:
Minimal Dockerfile to test transformers in Docker
FROM python:3.11-slim
WORKDIR /app
Install system dependencies
RUN apt-get update && apt-get install -y
curl
&& rm -rf /var/lib/apt/lists/*
Copy requirements
COPY requirements.txt .
Upgrade pip
RUN pip install --no-cache-dir --upgrade pip
Install PyTorch CPU version
RUN pip install --no-cache-dir
--index-url https://download.pytorch.org/whl/cpu
torch==2.1.0
Install transformers and dependencies
RUN pip install --no-cache-dir
transformers==4.35.2
numpy==1.26.4
Pre-download the model to avoid download during runtime
RUN python3 -c "from transformers import AutoTokenizer, AutoModel;
AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2');
AutoModel.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')"
Copy test script
COPY test_embedding.py .
Set environment variables to prevent threading issues
ENV TOKENIZERS_PARALLELISM=false
ENV OMP_NUM_THREADS=1
ENV MKL_NUM_THREADS=1
ENV OPENBLAS_NUM_THREADS=1
Run the test
CMD ["python3", "test_embedding.py"]
and
test file:
"""
Simple test script to verify transformers work in Docker
Tests both loading and inference with sentence transformers
"""
import os
import sys
import torch
from transformers import AutoTokenizer, AutoModel
print("=" * 60)
print("Testing Transformers in Docker")
print("=" * 60)
Print environment info
print(f"\nPython version: {sys.version}")
print(f"PyTorch version: {torch.version}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device: cpu")
Environment variables
print(f"\nEnvironment Variables:")
print(f" OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'not set')}")
print(f" MKL_NUM_THREADS: {os.environ.get('MKL_NUM_THREADS', 'not set')}")
print(f" TOKENIZERS_PARALLELISM: {os.environ.get('TOKENIZERS_PARALLELISM', 'not set')}")
Test 1: Load model
print("\n" + "=" * 60)
print("TEST 1: Loading Model")
print("=" * 60)
try:
model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
print(f"Loading tokenizer: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("β
Tokenizer loaded successfully")
print(f"Loading model: {model_name}")
model = AutoModel.from_pretrained(model_name)
print("β
Model loaded successfully")
# Move to CPU explicitly
device = torch.device('cpu')
model = model.to(device)
model.eval()
print(f"β
Model moved to {device} and set to eval mode")
except Exception as e:
print(f"β Error loading model: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
Test 2: Tokenize text
print("\n" + "=" * 60)
print("TEST 2: Tokenizing Text")
print("=" * 60)
test_text = "This is a test sentence to verify tokenization works in Docker."
print(f"Test text: {test_text}")
try:
print("Tokenizing...")
inputs = tokenizer(test_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
print(f"β
Tokenization successful")
print(f" Input IDs shape: {inputs['input_ids'].shape}")
print(f" Attention mask shape: {inputs['attention_mask'].shape}")
except Exception as e:
print(f"β Error tokenizing: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
Test 3: Move tensors to device
print("\n" + "=" * 60)
print("TEST 3: Moving Tensors to Device")
print("=" * 60)
try:
print(f"Moving tensors to {device}...")
inputs = {k: v.to(device) for k, v in inputs.items()}
print(f"β
Tensors moved successfully")
except Exception as e:
print(f"β Error moving tensors: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
Test 4: Run model inference (THIS IS WHERE IT CRASHES)
print("\n" + "=" * 60)
print("TEST 4: Running Model Inference")
print("=" * 60)
try:
print("Running model.forward()...")
with torch.no_grad():
outputs = model(**inputs)
print(f"β
Model inference successful!")
print(f" Output shape: {outputs.last_hidden_state.shape}")
except Exception as e:
print(f"β Error during inference: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
Test 5: Mean pooling
print("\n" + "=" * 60)
print("TEST 5: Mean Pooling")
print("=" * 60)
try:
print("Applying mean pooling...")
token_embeddings = outputs.last_hidden_state
attention_mask = inputs['attention_mask']
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
embedding = (sum_embeddings / sum_mask).cpu().numpy()[0]
print(f"β
Mean pooling successful!")
print(f" Embedding shape: {embedding.shape}")
print(f" Embedding (first 10 values): {embedding[:10]}")
except Exception as e:
print(f"β Error during mean pooling: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
All tests passed!
print("\n" + "=" * 60)
print("π ALL TESTS PASSED!")
print("=" * 60)
print("\nTransformers work correctly in this Docker environment!")