unable to run on docker container

#142
by hungnh1201 - opened

Have any one encounter this issue and please advice me with the solution
when i try to
run a small python project using

Load model directly

from transformers import AutoTokenizer, AutoModel

tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
and build a docker and run
it silently crash without any error
i asked AI and it saying that PyTorch + Docker incompatibility
i'm build docker on my mac device

this is requirements:
torch==2.1.0
transformers==4.35.2
numpy==1.26.4
Docker:

Minimal Dockerfile to test transformers in Docker

FROM python:3.11-slim

WORKDIR /app

Install system dependencies

RUN apt-get update && apt-get install -y
curl
&& rm -rf /var/lib/apt/lists/*

Copy requirements

COPY requirements.txt .

Upgrade pip

RUN pip install --no-cache-dir --upgrade pip

Install PyTorch CPU version

RUN pip install --no-cache-dir
--index-url https://download.pytorch.org/whl/cpu
torch==2.1.0

Install transformers and dependencies

RUN pip install --no-cache-dir
transformers==4.35.2
numpy==1.26.4

Pre-download the model to avoid download during runtime

RUN python3 -c "from transformers import AutoTokenizer, AutoModel;
AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2');
AutoModel.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')"

Copy test script

COPY test_embedding.py .

Set environment variables to prevent threading issues

ENV TOKENIZERS_PARALLELISM=false
ENV OMP_NUM_THREADS=1
ENV MKL_NUM_THREADS=1
ENV OPENBLAS_NUM_THREADS=1

Run the test

CMD ["python3", "test_embedding.py"]
and
test file:
"""
Simple test script to verify transformers work in Docker
Tests both loading and inference with sentence transformers
"""

import os
import sys
import torch
from transformers import AutoTokenizer, AutoModel

print("=" * 60)
print("Testing Transformers in Docker")
print("=" * 60)

Print environment info

print(f"\nPython version: {sys.version}")
print(f"PyTorch version: {torch.version}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Device: cpu")

Environment variables

print(f"\nEnvironment Variables:")
print(f" OMP_NUM_THREADS: {os.environ.get('OMP_NUM_THREADS', 'not set')}")
print(f" MKL_NUM_THREADS: {os.environ.get('MKL_NUM_THREADS', 'not set')}")
print(f" TOKENIZERS_PARALLELISM: {os.environ.get('TOKENIZERS_PARALLELISM', 'not set')}")

Test 1: Load model

print("\n" + "=" * 60)
print("TEST 1: Loading Model")
print("=" * 60)

try:
model_name = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
print(f"Loading tokenizer: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
print("βœ… Tokenizer loaded successfully")

print(f"Loading model: {model_name}")
model = AutoModel.from_pretrained(model_name)
print("βœ… Model loaded successfully")

# Move to CPU explicitly
device = torch.device('cpu')
model = model.to(device)
model.eval()
print(f"βœ… Model moved to {device} and set to eval mode")

except Exception as e:
print(f"❌ Error loading model: {e}")
import traceback
traceback.print_exc()
sys.exit(1)

Test 2: Tokenize text

print("\n" + "=" * 60)
print("TEST 2: Tokenizing Text")
print("=" * 60)

test_text = "This is a test sentence to verify tokenization works in Docker."
print(f"Test text: {test_text}")

try:
print("Tokenizing...")
inputs = tokenizer(test_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
print(f"βœ… Tokenization successful")
print(f" Input IDs shape: {inputs['input_ids'].shape}")
print(f" Attention mask shape: {inputs['attention_mask'].shape}")
except Exception as e:
print(f"❌ Error tokenizing: {e}")
import traceback
traceback.print_exc()
sys.exit(1)

Test 3: Move tensors to device

print("\n" + "=" * 60)
print("TEST 3: Moving Tensors to Device")
print("=" * 60)

try:
print(f"Moving tensors to {device}...")
inputs = {k: v.to(device) for k, v in inputs.items()}
print(f"βœ… Tensors moved successfully")
except Exception as e:
print(f"❌ Error moving tensors: {e}")
import traceback
traceback.print_exc()
sys.exit(1)

Test 4: Run model inference (THIS IS WHERE IT CRASHES)

print("\n" + "=" * 60)
print("TEST 4: Running Model Inference")
print("=" * 60)

try:
print("Running model.forward()...")
with torch.no_grad():
outputs = model(**inputs)
print(f"βœ… Model inference successful!")
print(f" Output shape: {outputs.last_hidden_state.shape}")
except Exception as e:
print(f"❌ Error during inference: {e}")
import traceback
traceback.print_exc()
sys.exit(1)

Test 5: Mean pooling

print("\n" + "=" * 60)
print("TEST 5: Mean Pooling")
print("=" * 60)

try:
print("Applying mean pooling...")
token_embeddings = outputs.last_hidden_state
attention_mask = inputs['attention_mask']
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
embedding = (sum_embeddings / sum_mask).cpu().numpy()[0]
print(f"βœ… Mean pooling successful!")
print(f" Embedding shape: {embedding.shape}")
print(f" Embedding (first 10 values): {embedding[:10]}")
except Exception as e:
print(f"❌ Error during mean pooling: {e}")
import traceback
traceback.print_exc()
sys.exit(1)

All tests passed!

print("\n" + "=" * 60)
print("πŸŽ‰ ALL TESTS PASSED!")
print("=" * 60)
print("\nTransformers work correctly in this Docker environment!")

Sign up or log in to comment