{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "N6h-mIZJMORe",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "N6h-mIZJMORe",
"outputId": "06d56444-ba70-48b2-eac0-daf28f5ab507"
},
"outputs": [],
"source": [
"%pip install sacrebleu sentencepiece torch datasets==3.6.0 scipy tqdm numpy tensorboard optuna"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef4ec7ec",
"metadata": {
"id": "ef4ec7ec"
},
"outputs": [],
"source": [
"# imports\n",
"\n",
"from __future__ import annotations\n",
"\n",
"import json\n",
"import math\n",
"import random\n",
"import time\n",
"from datetime import timedelta\n",
"from pathlib import Path\n",
"from typing import List, Tuple\n",
"\n",
"import numpy as np\n",
"import sacrebleu\n",
"import sentencepiece as spm\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import torch.optim as optim\n",
"import torch.utils.data as tud\n",
"from torch.optim.lr_scheduler import StepLR\n",
"from torch.optim.lr_scheduler import LambdaLR\n",
"from torch.utils.tensorboard import SummaryWriter\n",
"from datasets import load_dataset\n",
"from scipy import stats\n",
"from tqdm.auto import tqdm\n",
"import optuna"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5862304a",
"metadata": {
"id": "5862304a"
},
"outputs": [],
"source": [
"# Model definitions\n",
"\n",
"class LuongAttention(nn.Module):\n",
" def __init__(self, hidden_size: int):\n",
" super().__init__()\n",
" self.scale = 1.0 / math.sqrt(hidden_size)\n",
"\n",
" def forward(self, query, keys, values, mask=None):\n",
" # query: (B, 1, H); keys: (B, T, H)\n",
" scores = torch.bmm(query, keys.transpose(1, 2)) * self.scale # (B,1,T)\n",
" if mask is not None:\n",
" scores = scores.masked_fill_(~mask[:, None, :], -1e9)\n",
" attn = torch.softmax(scores, dim=-1) # (B,1,T)\n",
" context = torch.bmm(attn, values) # (B,1,H)\n",
" return context, attn.squeeze(1)\n",
"\n",
"class BiLSTMTranslator(nn.Module):\n",
" \"\"\"\n",
" 2-layer bidirectional LSTM encoder + 2-layer unidirectional LSTM decoder\n",
" with Luong global attention.\n",
"\n",
" The final forward & backward encoder states are concatenated, then\n",
" replicated across decoder layers so the initial (h_0, c_0) have shape\n",
" (num_layers, batch, hidden_size), as required by nn.LSTM.\n",
" \"\"\"\n",
" def __init__(\n",
" self,\n",
" # These arguments will be supplied by Optuna. Values here are placeholders\n",
" vocab_size: int,\n",
" emb_size: int = 512,\n",
" hidden_size: int = 512,\n",
" num_layers: int = 2,\n",
" dropout: float = 0.1,\n",
" **kwargs: dict,\n",
" ):\n",
" super().__init__()\n",
" self.embedding = nn.Embedding(vocab_size, emb_size, padding_idx=0)\n",
" self.encoder = nn.LSTM(\n",
" input_size=emb_size,\n",
" hidden_size=hidden_size // 2,\n",
" num_layers=num_layers,\n",
" dropout=dropout,\n",
" bidirectional=True,\n",
" batch_first=True,\n",
" )\n",
"\n",
" self.decoder = nn.LSTM(\n",
" input_size=emb_size,\n",
" hidden_size=hidden_size,\n",
" num_layers=num_layers,\n",
" dropout=dropout,\n",
" batch_first=True,\n",
" )\n",
"\n",
" self.attn = LuongAttention(hidden_size)\n",
" self.out = nn.Linear(hidden_size * 2, vocab_size)\n",
" self.dropout = nn.Dropout(dropout)\n",
" \n",
" def forward(self, src, src_lens, tgt):\n",
" # encoder\n",
" emb_src = self.dropout(self.embedding(src))\n",
" packed_src = nn.utils.rnn.pack_padded_sequence(\n",
" emb_src, src_lens.cpu(), batch_first=True, enforce_sorted=False\n",
" )\n",
" enc_out, (h_enc, c_enc) = self.encoder(packed_src)\n",
" enc_out, _ = nn.utils.rnn.pad_packed_sequence(enc_out, batch_first=True)\n",
" # h_enc & c_enc: (num_layers*2, batch, hidden_size//2)\n",
" \n",
" # Concatenate last forward & backward states -> (batch, hidden_size)\n",
" h_final = torch.cat([h_enc[-2], h_enc[-1]], dim=-1)\n",
" c_final = torch.cat([c_enc[-2], c_enc[-1]], dim=-1)\n",
" \n",
" # Expand to match decoder layers: (num_layers, batch, hidden_size)\n",
" num_dec_layers = self.decoder.num_layers\n",
" h0 = h_final.unsqueeze(0).repeat(num_dec_layers, 1, 1)\n",
" c0 = c_final.unsqueeze(0).repeat(num_dec_layers, 1, 1)\n",
" \n",
" # decoder\n",
" emb_tgt = self.dropout(self.embedding(tgt))\n",
" dec_out, _ = self.decoder(emb_tgt, (h0, c0)) # (B, T, H)\n",
" \n",
" # attention\n",
" context, _ = self.attn(dec_out, enc_out, enc_out) # (B, T, H)\n",
" concat = torch.cat([dec_out, context], dim=-1) # (B, T, 2H)\n",
" logits = self.out(concat) # (B, T, V)\n",
" return logits\n",
"\n",
"\n",
"class PositionalEncoding(nn.Module):\n",
" \"\"\"\n",
" Implements sinusoidal positional encoding as described in \"Attention Is All You Need\".\n",
" \"\"\"\n",
" def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):\n",
" super().__init__()\n",
" self.dropout = nn.Dropout(dropout)\n",
"\n",
" # Create constant \"pe\" matrix with values dependent on\n",
" # pos and i\n",
" pe = torch.zeros(max_len, d_model)\n",
" position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)\n",
" div_term = torch.exp(\n",
" torch.arange(0, d_model, 2, dtype=torch.float)\n",
" * (-math.log(10000.0) / d_model)\n",
" )\n",
" pe[:, 0::2] = torch.sin(position * div_term)\n",
" pe[:, 1::2] = torch.cos(position * div_term)\n",
" pe = pe.unsqueeze(0) # shape (1, max_len, d_model)\n",
"\n",
" # Register as buffer so it's saved/loaded but not trained\n",
" self.register_buffer(\"pe\", pe)\n",
"\n",
" def forward(self, x: torch.Tensor) -> torch.Tensor:\n",
" \"\"\"\n",
" Args:\n",
" x: Tensor, shape (batch_size, seq_len, d_model)\n",
" \"\"\"\n",
" x = x + self.pe[:, : x.size(1)]\n",
" return self.dropout(x)\n",
"\n",
"\n",
"class TransformerTranslator(nn.Module):\n",
" def __init__(\n",
" self,\n",
" # These arguments will be supplied by Optuna. Values here are placeholders\n",
" vocab_size: int,\n",
" d_model: int = 256,\n",
" nhead: int = 8,\n",
" num_layers: int = 4,\n",
" dropout: float = 0.1,\n",
" max_len: int = 5000,\n",
" **kwargs\n",
" ):\n",
" super().__init__()\n",
" self.d_model = d_model\n",
"\n",
" # Token embedding + positional encoding\n",
" self.embedding = nn.Embedding(vocab_size, d_model, padding_idx=0)\n",
" self.pos_enc = PositionalEncoding(d_model, dropout, max_len)\n",
"\n",
" # Encoder and decoder stacks\n",
" encoder_layer = nn.TransformerEncoderLayer(\n",
" d_model=d_model,\n",
" nhead=nhead,\n",
" dim_feedforward=d_model * 4,\n",
" dropout=dropout,\n",
" batch_first=True,\n",
" )\n",
" self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)\n",
"\n",
" decoder_layer = nn.TransformerDecoderLayer(\n",
" d_model=d_model,\n",
" nhead=nhead,\n",
" dim_feedforward=d_model * 4,\n",
" dropout=dropout,\n",
" batch_first=True,\n",
" )\n",
" self.decoder = nn.TransformerDecoder(decoder_layer, num_layers=num_layers)\n",
"\n",
" # Final linear projection\n",
" self.out = nn.Linear(d_model, vocab_size)\n",
"\n",
" def forward(\n",
" self,\n",
" src: torch.Tensor,\n",
" src_lens,\n",
" tgt: torch.Tensor,\n",
" ) -> torch.Tensor:\n",
" \"\"\"\n",
" Args:\n",
" src: (batch_size, src_seq_len)\n",
" src_lens: (unused here, but kept for compatibility)\n",
" tgt: (batch_size, tgt_seq_len)\n",
" Returns:\n",
" logits: (batch_size, tgt_seq_len, vocab_size)\n",
" \"\"\"\n",
" # Padding masks\n",
" src_key_padding_mask = src == 0 # True at padding positions\n",
" tgt_key_padding_mask = tgt == 0\n",
"\n",
" # Embedding + scaling + positional encoding\n",
" emb_src = self.embedding(src) * math.sqrt(self.d_model)\n",
" emb_src = self.pos_enc(emb_src)\n",
"\n",
" emb_tgt = self.embedding(tgt) * math.sqrt(self.d_model)\n",
" emb_tgt = self.pos_enc(emb_tgt)\n",
"\n",
" # Encoder forward\n",
" memory = self.encoder(\n",
" emb_src,\n",
" src_key_padding_mask=src_key_padding_mask,\n",
" )\n",
"\n",
" # Create causal mask for decoder (prevent attending to future tokens)\n",
" tgt_seq_len = tgt.size(1)\n",
" causal_mask = nn.Transformer.generate_square_subsequent_mask(tgt_seq_len).to(\n",
" src.device\n",
" )\n",
"\n",
" causal_mask = causal_mask.to(torch.bool)\n",
" output = self.decoder(\n",
" emb_tgt,\n",
" memory,\n",
" tgt_mask=causal_mask,\n",
" tgt_key_padding_mask=tgt_key_padding_mask,\n",
" memory_key_padding_mask=src_key_padding_mask,\n",
" )\n",
"\n",
" # Project to vocabulary\n",
" logits = self.out(output)\n",
" return logits\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38eac73b",
"metadata": {
"id": "38eac73b"
},
"outputs": [],
"source": [
"# Data loading & utilities\n",
"\n",
"def set_seed(seed: int) -> None:\n",
" random.seed(seed)\n",
" np.random.seed(seed)\n",
" torch.manual_seed(seed)\n",
" torch.cuda.manual_seed_all(seed)\n",
" torch.backends.cudnn.deterministic = True\n",
" torch.backends.cudnn.benchmark = False\n",
"\n",
"\n",
"class Timer:\n",
" \"\"\"Context manager that measures (wall-clock) seconds.\"\"\"\n",
"\n",
" def __enter__(self):\n",
" self.start = time.time()\n",
" return self\n",
"\n",
" def __exit__(self, exc_type, exc_val, exc_tb):\n",
" self.end = time.time()\n",
" self.elapsed = self.end - self.start\n",
"\n",
"\n",
"BOS, EOS, PAD, UNK = \"\", \"\", \"\", \"\"\n",
"\n",
"\n",
"def download_iwslt17_de_en(data_dir: Path) -> Tuple[Path, Path, Path]:\n",
" dataset = load_dataset(\"iwslt2017\", \"iwslt2017-de-en\")\n",
" splits = {}\n",
" for split in (\"train\", \"validation\", \"test\"):\n",
" lines = [\n",
" f\"{ex['translation']['de']}\\t{ex['translation']['en']}\"\n",
" for ex in dataset[split]\n",
" ]\n",
" out_path = data_dir / f\"{split}.tsv\"\n",
" out_path.write_text(\"\\n\".join(lines) + \"\\n\", encoding=\"utf-8\")\n",
" splits[split] = out_path\n",
" return splits[\"train\"], splits[\"validation\"], splits[\"test\"]\n",
"\n",
"\n",
"def train_sentencepiece(input_paths: List[Path], model_prefix: str, vocab_size: int = 8000) -> Path:\n",
" input_text = \"\\n\".join([p.read_text(encoding=\"utf-8\") for p in input_paths])\n",
" tmp = Path(f\"{model_prefix}_corpus.txt\")\n",
" tmp.write_text(input_text, encoding=\"utf-8\")\n",
" spm.SentencePieceTrainer.train(\n",
" input=str(tmp), model_prefix=model_prefix, vocab_size=vocab_size,\n",
" character_coverage=1.0, model_type=\"bpe\",\n",
" pad_id=0, unk_id=1, bos_id=2, eos_id=3, user_defined_symbols=\"\" # PAD, UNK, BOS, EOS\n",
" )\n",
" tmp.unlink() # cleanup\n",
" return Path(f\"{model_prefix}.model\")\n",
"\n",
"\n",
"def encode_file(sp: spm.SentencePieceProcessor, in_path: Path, out_path: Path) -> None:\n",
" with in_path.open(\"r\", encoding=\"utf-8\") as fi, out_path.open(\"w\", encoding=\"utf-8\") as fo:\n",
" for line in fi:\n",
" src, tgt = line.rstrip().split(\"\\t\")\n",
" pieces_src = sp.encode(src, out_type=str)\n",
" pieces_tgt = sp.encode(tgt, out_type=str)\n",
" fo.write(\" \".join(pieces_src) + \"\\t\" + \" \".join(pieces_tgt) + \"\\n\")\n",
"\n",
"\n",
"class ParallelDataset(tud.Dataset):\n",
" def __init__(self, path: Path, sp: spm.SentencePieceProcessor, max_len: int = 100):\n",
" self.samples = []\n",
" BOS_ID, EOS_ID = sp.bos_id(), sp.eos_id()\n",
"\n",
" with path.open(\"r\", encoding=\"utf-8\") as fh:\n",
" for ln in fh:\n",
" if \"\\t\" not in ln:\n",
" continue\n",
" src_txt, tgt_txt = ln.rstrip().split(\"\\t\", maxsplit=1)\n",
"\n",
" # Tokens already split, just convert to IDs directly\n",
" src_ids = [BOS_ID] + sp.piece_to_id(src_txt.split()) + [EOS_ID]\n",
" tgt_ids = [BOS_ID] + sp.piece_to_id(tgt_txt.split()) + [EOS_ID]\n",
"\n",
" if len(src_ids) <= max_len and len(tgt_ids) <= max_len:\n",
" self.samples.append(\n",
" (torch.LongTensor(src_ids), torch.LongTensor(tgt_ids))\n",
" )\n",
"\n",
" def __len__(self):\n",
" return len(self.samples)\n",
"\n",
" def __getitem__(self, idx):\n",
" return self.samples[idx]\n",
"\n",
"\n",
"def collate_fn(batch):\n",
" srcs, tgts = zip(*batch)\n",
" src_lens = [len(x) for x in srcs]\n",
" tgt_lens = [len(x) for x in tgts]\n",
" max_src, max_tgt = max(src_lens), max(tgt_lens)\n",
" src_pad = torch.zeros(len(batch), max_src, dtype=torch.long)\n",
" tgt_pad = torch.zeros(len(batch), max_tgt, dtype=torch.long)\n",
" for i, (src, tgt) in enumerate(zip(srcs, tgts)):\n",
" src_pad[i, : len(src)] = src\n",
" tgt_pad[i, : len(tgt)] = tgt\n",
" return src_pad, torch.tensor(src_lens), tgt_pad, torch.tensor(tgt_lens)\n",
"\n",
"\n",
"def get_noam_scheduler(optimizer, d_model, warmup_steps, lr_scale=1.0):\n",
" def lr_lambda(step):\n",
" t = step + 1 \n",
" scale = d_model ** -0.5\n",
" return lr_scale * scale * min(t**-0.5, t * warmup_steps**-1.5)\n",
" return LambdaLR(optimizer, lr_lambda)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c4022c0",
"metadata": {
"id": "6c4022c0"
},
"outputs": [],
"source": [
"# Training & Evaluation\n",
"\n",
"def label_smoothing_loss(logits, targets, pad_idx: int = 0, smoothing: float = 0.1):\n",
" \"\"\"\n",
" Cross-entropy with uniform label smoothing.\n",
" Args\n",
" logits : (B, T, V) - raw scores from the model\n",
" targets : (B, T) - ground-truth token IDs\n",
" \"\"\"\n",
" vocab = logits.size(-1)\n",
"\n",
" \n",
" logits_flat = logits.contiguous().view(-1, vocab) # (B*T, V)\n",
" targets_flat = targets.contiguous().view(-1) # (B*T)\n",
"\n",
" # Standard CE per token\n",
" nll = torch.nn.functional.cross_entropy(\n",
" logits_flat,\n",
" targets_flat,\n",
" ignore_index=pad_idx,\n",
" reduction=\"none\",\n",
" )\n",
"\n",
" # Apply smoothing\n",
" loss = (1.0 - smoothing) * nll + smoothing / vocab\n",
"\n",
" # Remove padding positions\n",
" loss = loss[targets_flat != pad_idx]\n",
"\n",
" return loss.mean()\n",
"\n",
"\n",
"def train_epoch(model, iterator, optimizer, device, scheduler, clip_norm=1.0):\n",
" model.train()\n",
" total_loss = 0.0\n",
"\n",
" for src, src_lens, tgt, _ in tqdm(\n",
" iterator, desc=\"Train batches\", leave=False\n",
" ):\n",
" src, src_lens = src.to(device), src_lens.to(device)\n",
" tgt_inp, tgt_out = tgt[:, :-1].to(device), tgt[:, 1:].to(device)\n",
"\n",
" optimizer.zero_grad()\n",
" logits = model(src, src_lens, tgt_inp)\n",
" loss = label_smoothing_loss(logits, tgt_out)\n",
" loss.backward()\n",
" torch.nn.utils.clip_grad_norm_(model.parameters(), clip_norm)\n",
" optimizer.step()\n",
" scheduler.step()\n",
"\n",
" total_loss += loss.item() * src.size(0)\n",
"\n",
" return total_loss / len(iterator.dataset)\n",
"\n",
"\n",
"def greedy_translate_ids(model, sp, src, src_lens, device, max_len: int = 60):\n",
" \"\"\"\n",
" src : (B, S) padded batch on CPU or GPU\n",
" src_lens : (B,) true lengths\n",
" returns : List[List[int]] – token-id sequences (B of them)\n",
" \"\"\"\n",
" BOS, EOS = sp.bos_id(), sp.eos_id()\n",
" model.eval()\n",
"\n",
" with torch.no_grad():\n",
" src, src_lens = src.to(device), src_lens.to(device)\n",
" B = src.size(0)\n",
"\n",
" tgt = torch.full((B, 1), BOS, dtype=torch.long, device=device)\n",
" finished = torch.zeros(B, dtype=torch.bool, device=device)\n",
"\n",
" for _ in range(max_len):\n",
" logits = model(src, src_lens, tgt) # (B, T, V)\n",
" next_tok = logits[:, -1].argmax(-1, keepdim=True)\n",
"\n",
" tgt = torch.cat([tgt, next_tok], dim=1)\n",
" finished |= (next_tok.squeeze(1) == EOS)\n",
" if finished.all():\n",
" break\n",
"\n",
" # strip BOS/EOS and move to Python lists\n",
" out = []\n",
" for row in tgt.tolist():\n",
" ids = row[1:]\n",
" if EOS in ids:\n",
" ids = ids[: ids.index(EOS)]\n",
" out.append(ids)\n",
" return out\n",
"\n",
"def beam_translate_ids(model, sp, src, src_lens, device, max_len: int = 60, beam_width: int = 4):\n",
" \"\"\"\n",
" Batched and efficient beam search implementation (without separate encode/decode methods).\n",
" \"\"\"\n",
" BOS, EOS = sp.bos_id(), sp.eos_id()\n",
" model.eval()\n",
"\n",
" with torch.no_grad():\n",
" src, src_lens = src.to(device), src_lens.to(device)\n",
" B = src.size(0)\n",
"\n",
" # Repeat source inputs for beam search\n",
" src = src.repeat_interleave(beam_width, dim=0)\n",
" src_lens = src_lens.repeat_interleave(beam_width, dim=0)\n",
"\n",
" # Initialize target tokens with BOS\n",
" tgt = torch.full((B * beam_width, 1), BOS, dtype=torch.long, device=device)\n",
" beam_scores = torch.zeros(B, beam_width, device=device)\n",
" beam_scores[:, 1:] = -1e9 # Initially deactivate all beams except first\n",
" beam_scores = beam_scores.view(-1)\n",
"\n",
" finished = torch.zeros(B * beam_width, dtype=torch.bool, device=device)\n",
"\n",
" for _ in range(max_len):\n",
" logits = model(src, src_lens, tgt) # (B*beam_width, T, V)\n",
" log_probs = F.log_softmax(logits[:, -1, :], dim=-1) # (B*beam_width, V)\n",
"\n",
" scores = beam_scores.unsqueeze(1) + log_probs # (B*beam_width, V)\n",
" scores = scores.view(B, -1) # (B, beam_width*V)\n",
"\n",
" top_scores, top_ids = scores.topk(beam_width, dim=-1) # (B, beam_width)\n",
"\n",
" beam_indices = top_ids // log_probs.size(-1)\n",
" token_indices = top_ids % log_probs.size(-1)\n",
"\n",
" # Reorder beams\n",
" tgt = tgt.view(B, beam_width, -1)\n",
" next_tgt = []\n",
" for batch_idx in range(B):\n",
" next_tgt.append(tgt[batch_idx, beam_indices[batch_idx]])\n",
" tgt = torch.stack(next_tgt, dim=0).view(B * beam_width, -1)\n",
"\n",
" # Append tokens\n",
" tgt = torch.cat([tgt, token_indices.view(-1, 1)], dim=-1)\n",
"\n",
" beam_scores = top_scores.view(-1)\n",
"\n",
" # Check EOS\n",
" finished |= (token_indices.view(-1) == EOS)\n",
" if finished.view(B, beam_width).all(dim=1).all():\n",
" break\n",
"\n",
" # Choose best beams\n",
" tgt = tgt.view(B, beam_width, -1)\n",
" best_seqs = tgt[torch.arange(B), beam_scores.view(B, beam_width).argmax(dim=-1)]\n",
"\n",
" out = []\n",
" for seq in best_seqs.tolist():\n",
" if EOS in seq:\n",
" seq = seq[1:seq.index(EOS)]\n",
" else:\n",
" seq = seq[1:]\n",
" out.append(seq)\n",
"\n",
" return out\n",
"\n",
"\n",
"def evaluate(model, data_iter, sp, device):\n",
" hyps, refs = [], []\n",
" for src, src_lens, tgt, tgt_lens in tqdm(data_iter, desc=\"Evaluate\", leave=False):\n",
" # batched generation\n",
" pred_ids = beam_translate_ids(model, sp, src, src_lens, device)\n",
"\n",
" # batched decoding\n",
" hyps.extend([sp.decode(ids) for ids in pred_ids])\n",
"\n",
" # strip BOS/EOS then batch-decode\n",
" ref_ids = [ t[1:l-1].tolist() for t, l in zip(tgt, tgt_lens) ]\n",
" refs.extend([sp.decode(ids) for ids in ref_ids])\n",
"\n",
"\n",
" assert len(hyps) == len(refs), \"Mismatch between #hypotheses and #references!\"\n",
"\n",
" bleu = sacrebleu.corpus_bleu(hyps, [refs])\n",
" chrf = sacrebleu.corpus_chrf(hyps, [refs])\n",
" return bleu.score, chrf.score\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d361d42a",
"metadata": {},
"outputs": [],
"source": [
"sizes = [10_000, 50_000, 75_000, 100_000, 150_000, 200_000]\n",
"trials_per_size = {10_000: 15, 50_000: 20, 75_000: 20, 100_000: 25, 150_000: 25, 200_000: 25}\n",
"epochs_per_size = {10_000: 4, 50_000: 5, 75_000: 5, 100_000: 8, 150_000: 10, 200_000: 10}\n",
"\n",
"current_size = None\n",
"\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "feb87d71",
"metadata": {},
"outputs": [],
"source": [
"data_dir = Path(\"data\")\n",
"data_dir.mkdir(parents=True, exist_ok=True)\n",
"\n",
"tune_dir = Path(\"tune\")\n",
"tune_dir.mkdir(parents=True, exist_ok=True)\n",
"\n",
"train_dir = Path(\"train\")\n",
"train_dir.mkdir(parents=True, exist_ok=True)\n",
"\n",
"log_dir = Path(\"logs\")\n",
"log_dir.mkdir(parents=True, exist_ok=True)\n",
"\n",
"print(\"Downloading IWSLT-2017...\")\n",
"train_raw, valid_raw, test_raw = download_iwslt17_de_en(data_dir)\n",
"\n",
"print(\"Training SentencePiece model...\")\n",
"spm_path = train_sentencepiece([train_raw], str(data_dir / \"bpe8k\"), vocab_size=8000)\n",
"sp = spm.SentencePieceProcessor(model_file=str(spm_path))\n",
"\n",
"# Pre-encode full corpus once to speed up later sampling\n",
"print(\"Encoding full corpus... (this may take a minute)\")\n",
"encoded_train = data_dir / \"train.bpe.tsv\"\n",
"encode_file(sp, train_raw, encoded_train)\n",
"encode_file(sp, valid_raw, data_dir / \"valid.bpe.tsv\")\n",
"encode_file(sp, test_raw, data_dir / \"test.bpe.tsv\")\n",
"\n",
"for size in sizes:\n",
" # Down-sample deterministically for reproducibility\n",
" pairs = encoded_train.read_text().splitlines()\n",
" random.Random(42).shuffle(pairs)\n",
" subset_path = data_dir / f\"train_{size}.bpe.tsv\"\n",
" subset_path.write_text(\"\\n\".join(pairs[: size]) + \"\\n\", encoding=\"utf-8\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff61e07f",
"metadata": {},
"outputs": [],
"source": [
"def suggest_bilstm_params(trial: optuna.Trial) -> dict:\n",
" \"\"\"\n",
" Define a compact search space for a Bi-LSTM MT model with fixed batch size 2048,\n",
" Adam optimizer, and StepLR scheduler.\n",
" \"\"\"\n",
" return {\n",
" # Architecture parameters\n",
" \"emb_size\": trial.suggest_int(\"emb_size\", 128, 512, step=64),\n",
" \"hidden_size\": trial.suggest_int(\"hidden_size\", 256, 1024, step=128),\n",
" \"num_layers\": trial.suggest_int(\"num_layers\", 1, 3),\n",
" \"dropout\": trial.suggest_float(\"dropout\", 0.1, 0.3),\n",
"\n",
" # optimizer parameters\n",
" \"lr\": trial.suggest_float(\"lr\", 3e-4, 5e-3, log=True),\n",
" \"weight_decay\": trial.suggest_float(\"weight_decay\", 1e-6, 1e-4, log=True),\n",
" \"clip_norm\": trial.suggest_float(\"clip_norm\", 0.1, 1.0),\n",
" \"eps\": trial.suggest_float(\"eps\", 1e-9, 1e-6, log=True),\n",
" \"beta1\": trial.suggest_float(\"beta1\", 0.8, 0.99, step=0.01),\n",
" \"beta2\": trial.suggest_float(\"beta2\", 0.9, 0.999, step=0.001),\n",
"\n",
" # scheduler parameters\n",
" \"scheduler_step_size\": trial.suggest_int(\"scheduler_step_size\", 10, 40),\n",
" \"scheduler_gamma\": trial.suggest_float(\"scheduler_gamma\", 0.7, 0.9),\n",
" }\n",
"\n",
"\n",
"def suggest_transformer_params(trial: optuna.Trial) -> dict:\n",
" \"\"\"\n",
" Define a compact search space for a Transformer MT model with fixed batch size 2048,\n",
" Adam optimizer, and StepLR scheduler.\n",
" \"\"\"\n",
" return {\n",
" # Architecture parameters\n",
" \"d_model\": trial.suggest_int(\"d_model\", 256, 512, step=128),\n",
" \"nhead\": trial.suggest_categorical(\"nhead\", [4, 8]),\n",
" \"num_layers\": trial.suggest_int(\"num_layers\", 2, 4),\n",
" \"dropout\": trial.suggest_float(\"dropout\", 0.1, 0.3),\n",
"\n",
" # optimizer parameters\n",
" \"lr\": 1.0,\n",
" \"lr_scale\": trial.suggest_float(\"lr_scale\", 0.2, 2.0, log=True),\n",
" \"weight_decay\": trial.suggest_float(\"weight_decay\", 1e-6, 1e-4, log=True),\n",
" \"clip_norm\": trial.suggest_float(\"clip_norm\", 0.1, 1.0),\n",
" \"eps\": trial.suggest_float(\"eps\", 1e-9, 1e-6, log=True),\n",
" \"beta1\": trial.suggest_float(\"beta1\", 0.8, 0.99, step=0.01),\n",
" \"beta2\": trial.suggest_float(\"beta2\", 0.9, 0.999, step=0.001),\n",
"\n",
" \"warmup_steps\": trial.suggest_int(\"warmup_steps\", 400, 800, step=50)\n",
" }\n",
"\n",
"\n",
"def make_objective(model_class, train_iter, valid_iter):\n",
" def objective(trial):\n",
" global current_size, device\n",
" is_bilstm = (model_class.__name__ == 'BiLSTMTranslator')\n",
" params = suggest_bilstm_params(trial) if is_bilstm else suggest_transformer_params(trial)\n",
"\n",
" # Build model\n",
" if is_bilstm:\n",
" model = model_class(8000,\n",
" hidden_size=params['hidden_size'],\n",
" num_layers=params['num_layers'],\n",
" dropout=params['dropout'])\n",
" else:\n",
" model = model_class(8000,\n",
" d_model=params['d_model'],\n",
" nhead=params['nhead'],\n",
" num_layers=params['num_layers'],\n",
" dropout=params['dropout'])\n",
"\n",
" model.to(device)\n",
" optimizer = optim.Adam(model.parameters(), lr=params['lr'], weight_decay=params['weight_decay'], eps=params['eps'], betas=(params['beta1'], params['beta2']))\n",
" if is_bilstm:\n",
" scheduler = StepLR(optimizer, step_size=params['scheduler_step_size'], gamma=params['scheduler_gamma'])\n",
" else:\n",
" scheduler = get_noam_scheduler(optimizer, params['d_model'], warmup_steps=params['warmup_steps'], lr_scale=params['lr_scale'])\n",
"\n",
" max_epochs = epochs_per_size[current_size]\n",
" for epoch in range(1, max_epochs + 1):\n",
" train_epoch(model, train_iter, optimizer, device, scheduler, clip_norm=params['clip_norm'])\n",
" bleu, _ = evaluate(model, valid_iter, sp, device)\n",
" trial.report(bleu, epoch)\n",
" if trial.should_prune():\n",
" raise optuna.TrialPruned()\n",
" return bleu\n",
" return objective\n",
"\n",
"pruner = optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=2)\n",
"best_params = {}\n",
"\n",
"for model_class in [BiLSTMTranslator, TransformerTranslator]:\n",
" model_name = model_class.__name__\n",
" prev_params = None\n",
" print(f\"\\nTuning {model_name} across dataset sizes...\")\n",
"\n",
" batch_size = 2048 if model_class == BiLSTMTranslator else 1024\n",
" max_steps = 2000 if model_class == BiLSTMTranslator else 4000\n",
" print(f\"Using\\t Batch size (train & tune): {batch_size}\\t Max steps (train): {max_steps}\")\n",
"\n",
" for size in sizes:\n",
" current_size = size\n",
" print(f\"\\nDataset size: {size}\")\n",
"\n",
" # Load data slice\n",
" train_ds = ParallelDataset(data_dir / f\"train_{size}.bpe.tsv\", sp)\n",
" valid_ds = ParallelDataset(data_dir / \"valid.bpe.tsv\", sp)\n",
" test_ds = ParallelDataset(data_dir / \"test.bpe.tsv\", sp)\n",
"\n",
" train_iter = tud.DataLoader(train_ds, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)\n",
" valid_iter = tud.DataLoader(valid_ds, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)\n",
" test_iter = tud.DataLoader(test_ds, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)\n",
"\n",
" # Create or reuse study\n",
" study_name = f\"{model_name}_{size}\"\n",
" study = optuna.create_study(storage=f'sqlite:///{tune_dir / f\"{study_name}.db\"}', direction='maximize', pruner=pruner, study_name=study_name, load_if_exists=False)\n",
"\n",
" # Warm-start with previous size's best\n",
" if prev_params:\n",
" study.enqueue_trial(prev_params)\n",
"\n",
" # Optimize\n",
" study.optimize(make_objective(model_class, train_iter, valid_iter), n_trials=trials_per_size[size], gc_after_trial=True, show_progress_bar=True)\n",
"\n",
" # Record and carry forward\n",
" best = study.best_params\n",
" best_params[size] = best\n",
" prev_params = best\n",
" # write best params to file\n",
" with open(tune_dir / f\"{model_name}_{size}_best.json\", \"w\") as f:\n",
" json.dump(best, f, indent=2)\n",
"\n",
" print(f\"Tuning done for {model_name} @ {size}!\")\n",
" print(f\"Now training best model\")\n",
"\n",
" model = model_class(8000, **best)\n",
" model.to(device)\n",
"\n",
" optimizer = optim.Adam(model.parameters(), lr=best.get('lr', 1.0), weight_decay=best['weight_decay'], eps=best['eps'], betas=(best['beta1'], best['beta2']))\n",
"\n",
" if model_class == TransformerTranslator:\n",
" scheduler = get_noam_scheduler(optimizer, best['d_model'], warmup_steps=best['warmup_steps'], lr_scale=best['lr_scale'])\n",
" else:\n",
" scheduler = StepLR(optimizer, step_size=best['scheduler_step_size'], gamma=best['scheduler_gamma'])\n",
"\n",
" step = 0\n",
" best_bleu = 0.0\n",
" start_time = time.time()\n",
" p_bar = tqdm(leave=False, dynamic_ncols=True, desc=\"Training\", unit=\"it\", total=max_steps)\n",
" i = 0\n",
" write = SummaryWriter(log_dir / f\"{model_name}_{size}\")\n",
" cfg_name = f\"{model_name}_{size}\"\n",
" with Timer() as run_timer:\n",
" while step < max_steps and (time.time() - start_time) < 3600:\n",
" epoch_loss = train_epoch(model, train_iter, optimizer, device, scheduler, clip_norm=best['clip_norm'])\n",
" step += len(train_iter)\n",
" i += 1\n",
" p_bar.set_postfix(loss=epoch_loss)\n",
" p_bar.update(len(train_iter))\n",
" bleu, chrf = evaluate(model, valid_iter, sp, device)\n",
" if bleu > best_bleu:\n",
" best_bleu = bleu\n",
" torch.save(model.state_dict(), train_dir / f\"{cfg_name}_best.pt\")\n",
" with open(train_dir / f\"{cfg_name}_best_num_steps.txt\", \"w\") as f:\n",
" f.write(str(step))\n",
" write.add_scalar(\"loss/train\", epoch_loss, step)\n",
" write.add_scalar(\"bleu/valid\", bleu, step)\n",
" write.add_scalar(\"chrf/valid\", chrf, step)\n",
" print(f\"[{cfg_name}] step={step} loss={epoch_loss:.3f} BLEU={bleu:.2f} ChrF={chrf:.2f}\")\n",
" \n",
" write.add_hparams(best, {\"bleu\": best_bleu, \"chrf\": chrf, \"steps\": step, \"time\": run_timer.elapsed})\n",
" write.close()\n",
" torch.save(model.state_dict(), train_dir / f\"{cfg_name}_final.pt\")\n",
" print(f\"Training complete for {model_name} @ {size} in {timedelta(seconds=run_timer.elapsed)}\")\n",
" print(f\"Best BLEU: {best_bleu:.2f}, ChrF: {chrf:.2f}\")\n",
" p_bar.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa8704de",
"metadata": {},
"outputs": [],
"source": [
"# # Welch T test\n",
"# for size in sizes:\n",
"# bilstm_path = train_dir / f\"BiLSTMTranslator_{size}_best.pt\"\n",
"# transformer_path = train_dir / f\"TransformerTranslator_{size}_best.pt\"\n",
"# if bilstm_path.exists() and transformer_path.exists():\n",
"# bilstm_model = BiLSTMTranslator(8000, **best_params[size])\n",
"# bilstm_model.load_state_dict(torch.load(bilstm_path, map_location=device))\n",
"# bilstm_model.to(device)\n",
"\n",
"# transformer_model = TransformerTranslator(8000, **best_params[size])\n",
"# transformer_model.load_state_dict(torch.load(transformer_path, map_location=device))\n",
"# transformer_model.to(device)\n",
"\n",
"# bilstm_bleu, _ = evaluate(bilstm_model, valid_iter, sp, device)\n",
"# transformer_bleu, _ = evaluate(transformer_model, valid_iter, sp, device)\n",
"\n",
"# t_stat, p_value = stats.ttest_ind_from_stats(\n",
"# mean1=bilstm_bleu, std1=0.0, nobs1=1,\n",
"# mean2=transformer_bleu, std2=0.0, nobs2=1\n",
"# )\n",
"# print(f\"Size {size}: BiLSTM BLEU={bilstm_bleu:.2f}, Transformer BLEU={transformer_bleu:.2f}, T-stat={t_stat:.3f}, p-value={p_value:.3e}\")\n",
"# else:\n",
"# print(f\"Skipping size {size} due to missing model files.\")\n",
"# continue"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": []
},
"kernelspec": {
"display_name": "Python3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"01f4a86d8d0a41da80724fee864ef5d9": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"04afe87465e74636a054a0c921e7e413": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"077f8471c2c343e5b7799580075504de": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_d35991b3a32a404db943a5a89f179fe4",
"IPY_MODEL_5c870d64c98a42768cfe8d91706200a3",
"IPY_MODEL_f823b931c6e64c01ab47c50f0786f169"
],
"layout": "IPY_MODEL_b173c10c27404604b98c98f8dcf78316"
}
},
"0a5382cf512d4cc69ed7cbf619068388": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"0bb73c18c0e146f2a8017663ab5f5242": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b9e69bd600204476ba1266acf823599d",
"placeholder": "",
"style": "IPY_MODEL_6af1dab14bf748e5a5242bab9f1dcb41",
"value": "Training: 25%"
}
},
"0cde4d604b0e4414b4e2644452723c25": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"112cb434a4cf41008f5518ee63601621": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b407ef0801b2458f81f8db8453f41a25",
"placeholder": "",
"style": "IPY_MODEL_ee29ef97e65d490b918cf9a0eca38300",
"value": "Evaluate: 100%"
}
},
"11d95e00113f4b288f1c14cec95a7eb5": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"1381a036b85141b78f003133b1824757": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_6e12c1563ad7490ebe8430c9e6da5f2f",
"placeholder": "",
"style": "IPY_MODEL_2a72089e964d40b48aaf6a9341e6f2da",
"value": " 1231/1231 [05:52<00:00, 3.99it/s]"
}
},
"15d095519a0c453c891c238c47e5013a": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"192a37d676b845838756267c5276fcaf": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"1959babac5454df2b1a49865bc621d57": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"1beff3c5d0f5436e94720e84757ceb64": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_9d04f2546e794cf4ba92d8507e89ec6d",
"IPY_MODEL_dfe78f68496f4f60a59814904df87561",
"IPY_MODEL_44241262558f400583ec5d11f19aef22"
],
"layout": "IPY_MODEL_df1d72450f7440888ae80969b84c013c"
}
},
"238ee4c02f0d47788b7a12924bf30f4f": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_29e36bcb96b2451fbf42c5483aa32d56",
"placeholder": "",
"style": "IPY_MODEL_192a37d676b845838756267c5276fcaf",
"value": "Train batches: 100%"
}
},
"26c0eac839024165a5170e35e7349812": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_c11e530543e34ea18cfb0a383ac47ac4",
"IPY_MODEL_6d0d5bca7fbc483f95e3e4098832f37e",
"IPY_MODEL_50dafbafbeeb4e2d9f13f79d68a12680"
],
"layout": "IPY_MODEL_35e7fd956631473b8d0330746e4539b5"
}
},
"26e03a78a3c14b39a83130bfe622d393": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"29e36bcb96b2451fbf42c5483aa32d56": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2a135019061146fdac2235fabdf92295": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_ae05b1da8a694c8ebb83ab489d0fe16c",
"max": 1231,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_3d5ef1e58d3c43068d7b87b7a3baf050",
"value": 69
}
},
"2a72089e964d40b48aaf6a9341e6f2da": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"2b52eb6c12f84bd2a4c5bc0e46cd60b1": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2e3d78c64db743c8a9b2827ec77849a1": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"333b3dbd14de4a8c948ada2344fa517c": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": "inline-flex",
"flex": null,
"flex_flow": "row wrap",
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": "100%"
}
},
"35e7fd956631473b8d0330746e4539b5": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": "hidden",
"width": null
}
},
"38086b9b8d184fbc8ce7264108d715f2": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"38c078d8654e4da5ab8c611ab225ef5a": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"38f1374f6d994571bdcf1df794c1d7d6": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3ae0bbf2aea64ad2873b4a0d3c51f4a0": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_0bb73c18c0e146f2a8017663ab5f5242",
"IPY_MODEL_d6b5c01dce054506871261b1c079aa57",
"IPY_MODEL_72e10b1265734d97844ce09c8147c59d"
],
"layout": "IPY_MODEL_333b3dbd14de4a8c948ada2344fa517c"
}
},
"3d0461bfa359482f8888879333a06a33": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_fed9ad4bd38a4770b621709b2fb619f3",
"IPY_MODEL_b56ec14742084dc482f8c2681bce969e",
"IPY_MODEL_87fc31c52d074065a6e3c8d991509361"
],
"layout": "IPY_MODEL_972f1e15b3a04877a894efb66744064a"
}
},
"3d5ef1e58d3c43068d7b87b7a3baf050": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"41e2835ed4de46b3a237e5c78b93ce2c": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"437d33c4b05846fdba3538d2b26894f4": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": "2",
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"44241262558f400583ec5d11f19aef22": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_01f4a86d8d0a41da80724fee864ef5d9",
"placeholder": "",
"style": "IPY_MODEL_1959babac5454df2b1a49865bc621d57",
"value": " 4/4 [00:16<00:00, 3.64s/it]"
}
},
"45ef2def0414411395c45b02dbed3a27": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"486e068721bf4ab6b7c3356294968ffb": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"48c2191f08374f9a9015a7e410e202ae": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_e6ca3ea9308240dea4e54b3d15d32dd5",
"max": 1231,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_b5d257615ecd4e5fa975b7dda14fd2f4",
"value": 1231
}
},
"50dafbafbeeb4e2d9f13f79d68a12680": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_41e2835ed4de46b3a237e5c78b93ce2c",
"placeholder": "",
"style": "IPY_MODEL_a23ed94fa9bd47a8bb8fd3bbfe28b7e7",
"value": " 1231/1231 [05:55<00:00, 3.84it/s]"
}
},
"5177adba049e426fa35f1e7d338bcb2d": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"5308939bf9784c3b8764818f0c43a02f": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"53c8049356c14057a9ba4045a1c9aab4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"55dbd911d2b74907958e6fbedda1ea55": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_238ee4c02f0d47788b7a12924bf30f4f",
"IPY_MODEL_48c2191f08374f9a9015a7e410e202ae",
"IPY_MODEL_1381a036b85141b78f003133b1824757"
],
"layout": "IPY_MODEL_f0e26509b3494113b369eb3844b97d1e"
}
},
"5c870d64c98a42768cfe8d91706200a3": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f6cc0256641c41a9b1cb1e3643e14469",
"max": 4,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_5308939bf9784c3b8764818f0c43a02f",
"value": 4
}
},
"61b2bd1e2f28451ca68bafcacc7f0d2d": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"683ac0f0aee6408ab07e315ec061dbdd": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6af1dab14bf748e5a5242bab9f1dcb41": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"6d0d5bca7fbc483f95e3e4098832f37e": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_683ac0f0aee6408ab07e315ec061dbdd",
"max": 1231,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_b19c429b4932434b8bc04de6a07bd500",
"value": 1231
}
},
"6e12c1563ad7490ebe8430c9e6da5f2f": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6e2c5dd6292043c5952794d262e47497": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"6fed606f488c43ce88a16a011586ff54": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_6e2c5dd6292043c5952794d262e47497",
"placeholder": "",
"style": "IPY_MODEL_486e068721bf4ab6b7c3356294968ffb",
"value": "Train batches: 6%"
}
},
"72e10b1265734d97844ce09c8147c59d": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_45ef2def0414411395c45b02dbed3a27",
"placeholder": "",
"style": "IPY_MODEL_53c8049356c14057a9ba4045a1c9aab4",
"value": " 3693/15000 [18:16<56:17, 3.35it/s, loss=2.52]"
}
},
"83a28591206e4aa8822e63e1f16ed274": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"87fc31c52d074065a6e3c8d991509361": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_2b52eb6c12f84bd2a4c5bc0e46cd60b1",
"placeholder": "",
"style": "IPY_MODEL_dcb7aafa07fe4c3cb79c6ee9fcfcf97e",
"value": " 1231/1231 [05:55<00:00, 3.53it/s]"
}
},
"8bb91f146d394c7696539902b1ab66d9": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"8e546cc85b3f483d8ae3dbfdde1698ed": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9346552d6801429b86785f5dcdfbe76e": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_112cb434a4cf41008f5518ee63601621",
"IPY_MODEL_adb4f44dfe784bb18981522596aeb33e",
"IPY_MODEL_baba00225d774c029cf22d0409737b64"
],
"layout": "IPY_MODEL_f93d9a2ceb604c4f9a9186a80e698d02"
}
},
"9622f2bb692c4fb58e5ac9c156c550ef": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"972f1e15b3a04877a894efb66744064a": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": "hidden",
"width": null
}
},
"9d04f2546e794cf4ba92d8507e89ec6d": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8e546cc85b3f483d8ae3dbfdde1698ed",
"placeholder": "",
"style": "IPY_MODEL_c91f0409857340c48740f6f2f92c1056",
"value": "Evaluate: 100%"
}
},
"a0837400890d458fa430d88958f20906": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a23ed94fa9bd47a8bb8fd3bbfe28b7e7": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"adb4f44dfe784bb18981522596aeb33e": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_b00e2ea27a0147368eb64ca12ffd9a2a",
"max": 4,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_8bb91f146d394c7696539902b1ab66d9",
"value": 4
}
},
"ae05b1da8a694c8ebb83ab489d0fe16c": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"afe6194065264f7c82c1cfd7a9a51570": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"b00e2ea27a0147368eb64ca12ffd9a2a": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b173c10c27404604b98c98f8dcf78316": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": "hidden",
"width": null
}
},
"b19c429b4932434b8bc04de6a07bd500": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"b407ef0801b2458f81f8db8453f41a25": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b56ec14742084dc482f8c2681bce969e": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_2e3d78c64db743c8a9b2827ec77849a1",
"max": 1231,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_11d95e00113f4b288f1c14cec95a7eb5",
"value": 1231
}
},
"b5d257615ecd4e5fa975b7dda14fd2f4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"b9e69bd600204476ba1266acf823599d": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"baba00225d774c029cf22d0409737b64": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_04afe87465e74636a054a0c921e7e413",
"placeholder": "",
"style": "IPY_MODEL_38086b9b8d184fbc8ce7264108d715f2",
"value": " 4/4 [00:16<00:00, 3.66s/it]"
}
},
"c11e530543e34ea18cfb0a383ac47ac4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_38f1374f6d994571bdcf1df794c1d7d6",
"placeholder": "",
"style": "IPY_MODEL_afe6194065264f7c82c1cfd7a9a51570",
"value": "Train batches: 100%"
}
},
"c91f0409857340c48740f6f2f92c1056": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"ccb92b52980c4cbf9013d22e1cbb94da": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d35991b3a32a404db943a5a89f179fe4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a0837400890d458fa430d88958f20906",
"placeholder": "",
"style": "IPY_MODEL_38c078d8654e4da5ab8c611ab225ef5a",
"value": "Evaluate: 100%"
}
},
"d6b5c01dce054506871261b1c079aa57": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_437d33c4b05846fdba3538d2b26894f4",
"max": 15000,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_9622f2bb692c4fb58e5ac9c156c550ef",
"value": 3693
}
},
"daf3a2863edd416fa042388c1c3e34a9": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"dcb7aafa07fe4c3cb79c6ee9fcfcf97e": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"df1d72450f7440888ae80969b84c013c": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": "hidden",
"width": null
}
},
"dfe78f68496f4f60a59814904df87561": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_0a5382cf512d4cc69ed7cbf619068388",
"max": 4,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_5177adba049e426fa35f1e7d338bcb2d",
"value": 4
}
},
"e6ca3ea9308240dea4e54b3d15d32dd5": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"ee29ef97e65d490b918cf9a0eca38300": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"f0e26509b3494113b369eb3844b97d1e": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": "hidden",
"width": null
}
},
"f6cc0256641c41a9b1cb1e3643e14469": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"f823b931c6e64c01ab47c50f0786f169": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_26e03a78a3c14b39a83130bfe622d393",
"placeholder": "",
"style": "IPY_MODEL_61b2bd1e2f28451ca68bafcacc7f0d2d",
"value": " 4/4 [00:16<00:00, 3.67s/it]"
}
},
"f85221e202864031bf2baa442da61afd": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_daf3a2863edd416fa042388c1c3e34a9",
"placeholder": "",
"style": "IPY_MODEL_0cde4d604b0e4414b4e2644452723c25",
"value": " 69/1231 [00:20<05:40, 3.41it/s]"
}
},
"f93d9a2ceb604c4f9a9186a80e698d02": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": "hidden",
"width": null
}
},
"fd5ab0b0a8c1499da6a55dde8b9a4c91": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_6fed606f488c43ce88a16a011586ff54",
"IPY_MODEL_2a135019061146fdac2235fabdf92295",
"IPY_MODEL_f85221e202864031bf2baa442da61afd"
],
"layout": "IPY_MODEL_ccb92b52980c4cbf9013d22e1cbb94da"
}
},
"fed9ad4bd38a4770b621709b2fb619f3": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_83a28591206e4aa8822e63e1f16ed274",
"placeholder": "",
"style": "IPY_MODEL_15d095519a0c453c891c238c47e5013a",
"value": "Train batches: 100%"
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}