Spaces:

vedaco
/

veda-programming

Sleeping

App Files Files Community

vedaco commited on Jan 18

Commit

47c9da0

verified ·

1 Parent(s): 304f12d

Update train.py

Browse files

Files changed (1) hide show

train.py +43 -933

train.py CHANGED Viewed

@@ -1,955 +1,65 @@
-"""Training for Veda Programming Assistant with Distillation Support"""
 import tensorflow as tf
-from tensorflow import keras
-import numpy as np
-import os
-import json
 from model import VedaProgrammingLLM
 from tokenizer import VedaTokenizer
-from database import db
-from config import MODEL_DIR, VOCAB_SIZE, MAX_LENGTH, BATCH_SIZE
-# Training data with proper format (no markdown code blocks - using <CODE> tags)
 TRAINING_DATA = """
-<USER> Hello!
-<ASSISTANT> Hello! Welcome to Veda Programming Assistant! I can help you with coding, explain concepts, or answer programming questions. What would you like to learn today?
-<USER> What is Python?
-<ASSISTANT> Python is a high-level, interpreted programming language created by Guido van Rossum in 1991. It is known for its simple, readable syntax.
-Key features:
-- Easy to learn and read
-- Dynamic typing
-- Large standard library
-- Great for beginners and experts
-Python is used for web development, data science, AI, automation, and more!
-<USER> How do I print hello world?
-<ASSISTANT> To print Hello World in Python, use the print function:
-<CODE>
-print("Hello, World!")
-<ENDCODE>
-When you run this, it displays: Hello, World!
-<USER> Write a function to add two numbers
-<ASSISTANT> Here is a simple function to add two numbers:
-<CODE>
-def add(a, b):
-    return a + b
-result = add(5, 3)
-print(result)
-<ENDCODE>
-Output: 8
-This function takes two parameters and returns their sum.
-<USER> Explain what a function is
-<ASSISTANT> A function is a reusable block of code that performs a specific task. Functions help organize code and avoid repetition.
-Here is the basic structure:
-<CODE>
-def function_name(parameters):
-    # Code block
-    return result
-<ENDCODE>
-Parts of a function:
-- def: keyword to define a function
-- function_name: what you call your function
-- parameters: input values
-- return: sends back a result
-Example:
-<CODE>
-def greet(name):
-    message = f"Hello, {name}!"
-    return message
-print(greet("Alice"))
-<ENDCODE>
-Output: Hello, Alice!
-<USER> What is a list?
-<ASSISTANT> A list is an ordered, mutable collection in Python that can hold items of any type.
-Creating lists:
-<CODE>
-my_list = []
-numbers = [1, 2, 3, 4, 5]
-fruits = ["apple", "banana", "cherry"]
-mixed = [1, "hello", 3.14, True]
-<ENDCODE>
-Common operations:
-<CODE>
-fruits = ["apple", "banana"]
-fruits.append("cherry")
-fruits.insert(0, "apricot")
-print(fruits[0])
-print(fruits[-1])
-fruits.remove("banana")
-fruits.pop()
-print(len(fruits))
-<ENDCODE>
-<USER> Write a fibonacci function
-<ASSISTANT> Here is a Fibonacci function:
-Recursive approach:
-<CODE>
-def fibonacci(n):
-    if n <= 0:
-        return 0
-    if n == 1:
-        return 1
-    return fibonacci(n - 1) + fibonacci(n - 2)
-for i in range(10):
-    print(fibonacci(i), end=" ")
-<ENDCODE>
-Output: 0 1 1 2 3 5 8 13 21 34
-Iterative approach (more efficient):
-<CODE>
-def fibonacci_iterative(n):
-    if n <= 0:
-        return 0
-    if n == 1:
-        return 1
-    a, b = 0, 1
-    for _ in range(2, n + 1):
-        a, b = b, a + b
-    return b
-<ENDCODE>
-<USER> What is recursion?
-<ASSISTANT> Recursion is when a function calls itself to solve a problem.
-Key components:
-1. Base case - stops the recursion
-2. Recursive case - function calls itself
-Example - countdown:
-<CODE>
-def countdown(n):
-    if n <= 0:
-        print("Done!")
-        return
-    print(n)
-    countdown(n - 1)
-countdown(5)
-<ENDCODE>
-Output: 5, 4, 3, 2, 1, Done!
-Factorial example:
-<CODE>
-def factorial(n):
-    if n <= 1:
-        return 1
-    return n * factorial(n - 1)
-print(factorial(5))
-<ENDCODE>
-Output: 120
-<USER> Write bubble sort
-<ASSISTANT> Here is the Bubble Sort algorithm:
-<CODE>
-def bubble_sort(arr):
-    n = len(arr)
-    for i in range(n):
-        swapped = False
-        for j in range(0, n - i - 1):
-            if arr[j] > arr[j + 1]:
-                arr[j], arr[j + 1] = arr[j + 1], arr[j]
-                swapped = True
-        if not swapped:
-            break
-    return arr
-numbers = [64, 34, 25, 12, 22, 11, 90]
-print("Original:", numbers)
-print("Sorted:", bubble_sort(numbers.copy()))
-<ENDCODE>
-Output:
-Original: [64, 34, 25, 12, 22, 11, 90]
-Sorted: [11, 12, 22, 25, 34, 64, 90]
-How it works: repeatedly compares adjacent elements and swaps them if in wrong order. Largest elements bubble up to the end.
-Time complexity: O(n^2)
-<USER> Write binary search
-<ASSISTANT> Here is the Binary Search algorithm:
-<CODE>
-def binary_search(arr, target):
-    left = 0
-    right = len(arr) - 1
-    while left <= right:
-        mid = (left + right) // 2
-        if arr[mid] == target:
-            return mid
-        elif arr[mid] < target:
-            left = mid + 1
-        else:
-            right = mid - 1
-    return -1
-numbers = [1, 3, 5, 7, 9, 11, 13, 15]
-print(binary_search(numbers, 7))
-print(binary_search(numbers, 6))
-<ENDCODE>
-Output:
-3
--1
-Important: The array must be sorted!
-Time complexity: O(log n)
-<USER> What is a class?
-<ASSISTANT> A class is a blueprint for creating objects. It bundles data (attributes) and functions (methods) together.
-<CODE>
-class Dog:
-    def __init__(self, name, age):
-        self.name = name
-        self.age = age
-    def bark(self):
-        return f"{self.name} says Woof!"
-    def get_info(self):
-        return f"{self.name} is {self.age} years old"
-my_dog = Dog("Buddy", 3)
-print(my_dog.bark())
-print(my_dog.get_info())
-<ENDCODE>
-Output:
-Buddy says Woof!
-Buddy is 3 years old
-Key concepts:
-- __init__: constructor, runs when object is created
-- self: refers to the current object
-- Attributes: variables inside the class
-- Methods: functions inside the class
-<USER> How do I read a file?
-<ASSISTANT> Here is how to read files in Python:
-Read entire file:
-<CODE>
-with open("filename.txt", "r") as file:
-    content = file.read()
-    print(content)
-<ENDCODE>
-Read line by line:
-<CODE>
-with open("filename.txt", "r") as file:
-    for line in file:
-        print(line.strip())
-<ENDCODE>
-Read all lines into list:
-<CODE>
-with open("filename.txt", "r") as file:
-    lines = file.readlines()
-<ENDCODE>
-With error handling:
-<CODE>
-def read_file_safely(filename):
-    try:
-        with open(filename, "r") as file:
-            return file.read()
-    except FileNotFoundError:
-        print(f"File not found: {filename}")
-        return None
-content = read_file_safely("myfile.txt")
-<ENDCODE>
-The with statement automatically closes the file when done.
-<USER> How do I write to a file?
-<ASSISTANT> Here is how to write to files in Python:
-Write (overwrites existing content):
-<CODE>
-with open("output.txt", "w") as file:
-    file.write("Hello, World!")
-    file.write("This is a new line")
-<ENDCODE>
-Append (adds to existing content):
-<CODE>
-with open("output.txt", "a") as file:
-    file.write("This is appended")
-<ENDCODE>
-Write multiple lines:
-<CODE>
-lines = ["Line 1", "Line 2", "Line 3"]
-with open("output.txt", "w") as file:
-    for line in lines:
-        file.write(line + "\\n")
-<ENDCODE>
-Or use writelines:
-<CODE>
-lines = ["Line 1\\n", "Line 2\\n", "Line 3\\n"]
-with open("output.txt", "w") as file:
-    file.writelines(lines)
-<ENDCODE>
-<USER> How do I handle errors?
-<ASSISTANT> Python uses try-except blocks for error handling:
-Basic try-except:
-<CODE>
-try:
-    result = 10 / 0
-except ZeroDivisionError:
-    print("Cannot divide by zero!")
-<ENDCODE>
-Multiple exceptions:
-<CODE>
-try:
-    number = int(input("Enter number: "))
-    result = 10 / number
-except ValueError:
-    print("Not a valid number!")
-except ZeroDivisionError:
-    print("Cannot divide by zero!")
-except Exception as e:
-    print(f"Error: {e}")
-<ENDCODE>
-Complete structure:
-<CODE>
-try:
-    result = risky_operation()
-except SomeError:
-    print("Handle error")
-else:
-    print("Success! No error occurred")
-finally:
-    print("This always runs")
-<ENDCODE>
-Common exceptions:
-- ValueError: wrong value
-- TypeError: wrong type
-- IndexError: index out of range
-- KeyError: dictionary key not found
-- FileNotFoundError: file does not exist
-- ZeroDivisionError: division by zero
-<USER> What is a dictionary?
-<ASSISTANT> A dictionary is a collection of key-value pairs in Python.
-Creating dictionaries:
-<CODE>
-empty_dict = {}
-person = {"name": "Alice", "age": 25, "city": "NYC"}
-another = dict(name="Bob", age=30)
-<ENDCODE>
-Accessing values:
-<CODE>
-person = {"name": "Alice", "age": 25}
-print(person["name"])
-print(person.get("name"))
-print(person.get("job", "Unknown"))
-<ENDCODE>
-Modifying:
-<CODE>
-person = {"name": "Alice", "age": 25}
-person["city"] = "Boston"
-person["age"] = 26
-del person["city"]
-age = person.pop("age")
-<ENDCODE>
-Iterating:
-<CODE>
-person = {"name": "Alice", "age": 25}
-for key in person:
-    print(key)
-for value in person.values():
-    print(value)
-for key, value in person.items():
-    print(f"{key}: {value}")
-<ENDCODE>
-<USER> What is the difference between list and tuple?
-<ASSISTANT> The main difference is mutability:
-- Lists are mutable (can be changed)
-- Tuples are immutable (cannot be changed)
-Syntax:
-<CODE>
-my_list = [1, 2, 3]
-my_tuple = (1, 2, 3)
-<ENDCODE>
-Mutability difference:
-<CODE>
-my_list = [1, 2, 3]
-my_list[0] = 100
-my_list.append(4)
-print(my_list)
-my_tuple = (1, 2, 3)
-# my_tuple[0] = 100  # ERROR! Cannot modify
-# my_tuple.append(4)  # ERROR! No append method
-<ENDCODE>
-When to use:
-- List: data that will change (shopping cart, scores)
-- Tuple: fixed data (coordinates, RGB colors)
-Examples:
-<CODE>
-coordinates = (10, 20)
-rgb_color = (255, 128, 0)
-shopping_cart = ["apple", "bread"]
-shopping_cart.append("milk")
-<ENDCODE>
-Other differences:
-- Tuples are slightly faster
-- Tuples use less memory
-- Tuples can be dictionary keys
-<USER> How do I use list comprehension?
-<ASSISTANT> List comprehension is a concise way to create lists.
-Basic syntax:
-<CODE>
-new_list = [expression for item in iterable]
-<ENDCODE>
-Examples:
-<CODE>
-squares = [x ** 2 for x in range(5)]
-print(squares)
-<ENDCODE>
-Output: [0, 1, 4, 9, 16]
-With condition:
-<CODE>
-evens = [x for x in range(10) if x % 2 == 0]
-print(evens)
-<ENDCODE>
-Output: [0, 2, 4, 6, 8]
-Transform strings:
-<CODE>
-words = ["hello", "world"]
-upper_words = [word.upper() for word in words]
-print(upper_words)
-<ENDCODE>
-Output: ["HELLO", "WORLD"]
-With if-else:
-<CODE>
-numbers = [1, 2, 3, 4, 5]
-labels = ["even" if x % 2 == 0 else "odd" for x in numbers]
-print(labels)
-<ENDCODE>
-Output: ["odd", "even", "odd", "even", "odd"]
-<USER> Write quicksort
-<ASSISTANT> Here is the QuickSort algorithm:
-<CODE>
-def quicksort(arr):
-    if len(arr) <= 1:
-        return arr
-    pivot = arr[len(arr) // 2]
-    left = [x for x in arr if x < pivot]
-    middle = [x for x in arr if x == pivot]
-    right = [x for x in arr if x > pivot]
-    return quicksort(left) + middle + quicksort(right)
-numbers = [64, 34, 25, 12, 22, 11, 90]
-print("Original:", numbers)
-print("Sorted:", quicksort(numbers))
-<ENDCODE>
-Output:
-Original: [64, 34, 25, 12, 22, 11, 90]
-Sorted: [11, 12, 22, 25, 34, 64, 90]
-In-place version:
-<CODE>
-def quicksort_inplace(arr, low, high):
-    if low < high:
-        pivot_idx = partition(arr, low, high)
-        quicksort_inplace(arr, low, pivot_idx - 1)
-        quicksort_inplace(arr, pivot_idx + 1, high)
-def partition(arr, low, high):
-    pivot = arr[high]
-    i = low - 1
-    for j in range(low, high):
-        if arr[j] <= pivot:
-            i += 1
-            arr[i], arr[j] = arr[j], arr[i]
-    arr[i + 1], arr[high] = arr[high], arr[i + 1]
-    return i + 1
-numbers = [64, 34, 25, 12, 22, 11, 90]
-quicksort_inplace(numbers, 0, len(numbers) - 1)
-print(numbers)
-<ENDCODE>
-Time complexity:
-- Average: O(n log n)
-- Worst: O(n^2)
-<USER> Write merge sort
-<ASSISTANT> Here is the Merge Sort algorithm:
-<CODE>
-def merge_sort(arr):
-    if len(arr) <= 1:
-        return arr
-    mid = len(arr) // 2
-    left = merge_sort(arr[:mid])
-    right = merge_sort(arr[mid:])
-    return merge(left, right)
-def merge(left, right):
-    result = []
-    i = j = 0
-    while i < len(left) and j < len(right):
-        if left[i] <= right[j]:
-            result.append(left[i])
-            i += 1
-        else:
-            result.append(right[j])
-            j += 1
-    result.extend(left[i:])
-    result.extend(right[j:])
-    return result
-numbers = [64, 34, 25, 12, 22, 11, 90]
-print("Original:", numbers)
-print("Sorted:", merge_sort(numbers))
-<ENDCODE>
-Output:
-Original: [64, 34, 25, 12, 22, 11, 90]
-Sorted: [11, 12, 22, 25, 34, 64, 90]
-How it works:
-1. Divide array into two halves
-2. Recursively sort each half
-3. Merge the sorted halves
-Time complexity: O(n log n) always
-Space complexity: O(n)
-<USER> What is a lambda function?
-<ASSISTANT> A lambda function is a small anonymous function defined in one line.
-Syntax:
-<CODE>
-lambda arguments: expression
-<ENDCODE>
-Examples:
-<CODE>
-add = lambda x, y: x + y
-print(add(5, 3))
-square = lambda x: x ** 2
-print(square(4))
-is_even = lambda x: x % 2 == 0
-print(is_even(4))
-<ENDCODE>
-Output:
-8
-16
-True
-Common uses:
-With sorted():
-<CODE>
-students = [("Alice", 85), ("Bob", 92), ("Charlie", 78)]
-sorted_by_score = sorted(students, key=lambda x: x[1])
-print(sorted_by_score)
-<ENDCODE>
-With map():
-<CODE>
-numbers = [1, 2, 3, 4, 5]
-squares = list(map(lambda x: x ** 2, numbers))
-print(squares)
-<ENDCODE>
-With filter():
-<CODE>
-numbers = [1, 2, 3, 4, 5, 6]
-evens = list(filter(lambda x: x % 2 == 0, numbers))
-print(evens)
-<ENDCODE>
-<USER> What are decorators?
-<ASSISTANT> Decorators are functions that modify the behavior of other functions.
-Basic decorator:
-<CODE>
-def my_decorator(func):
-    def wrapper():
-        print("Before function")
-        func()
-        print("After function")
-    return wrapper
-@my_decorator
-def say_hello():
-    print("Hello!")
-say_hello()
-<ENDCODE>
-Output:
-Before function
-Hello!
-After function
-With arguments:
-<CODE>
-def my_decorator(func):
-    def wrapper(*args, **kwargs):
-        print("Before function")
-        result = func(*args, **kwargs)
-        print("After function")
-        return result
-    return wrapper
-@my_decorator
 def add(a, b):
     return a + b
-print(add(5, 3))
-<ENDCODE>
-Practical example - timer:
-<CODE>
-import time
-def timer(func):
-    def wrapper(*args, **kwargs):
-        start = time.time()
-        result = func(*args, **kwargs)
-        end = time.time()
-        print(f"{func.__name__} took {end - start:.4f} seconds")
-        return result
-    return wrapper
-@timer
-def slow_function():
-    time.sleep(1)
-    return "Done"
-slow_function()
-<ENDCODE>
-<USER> Thanks!
-<ASSISTANT> You are welcome! I am glad I could help. Feel free to ask me anything anytime. Happy coding!
-<USER> Hi
-<ASSISTANT> Hello! How can I help you today? I can write code, explain concepts, or answer your programming questions!
-<USER> What can you do?
-<ASSISTANT> I am Veda Programming Assistant! I can help you with:
-1. Writing Python code
-2. Explaining programming concepts
-3. Debugging code
-4. Answering questions about algorithms
-5. Teaching data structures
-6. Helping with file handling
-7. Explaining object-oriented programming
-8. And much more!
-Just ask me anything about programming and I will do my best to help you!
-<USER> 2+2=?
-<ASSISTANT> 4
-<USER> What is 10 times 5?
-<ASSISTANT> 10 times 5 equals 50.
-<USER> Calculate 100 divided by 4
-<ASSISTANT> 100 divided by 4 equals 25.
 """
 class VedaTrainer:
-    """Trainer for Veda Programming Assistant with Distillation Support"""
-    def __init__(self, vocab_size: int = 8000, max_length: int = 512, batch_size: int = 4):
-        self.vocab_size = vocab_size
-        self.max_length = max_length
-        self.batch_size = batch_size
-        self.tokenizer = VedaTokenizer(vocab_size=vocab_size)
         self.model = None
-    def prepare_data(self, extra_data: str = "", distillation_data: str = ""):
-        """Prepare training data including distillation data"""
-        data = TRAINING_DATA
-        if extra_data:
-            data += "\n\n" + extra_data
-        if distillation_data:
-            data += "\n\n" + distillation_data
-        if os.path.exists("programming.txt"):
-            try:
-                with open("programming.txt", "r", encoding="utf-8") as f:
-                    code_data = f.read()
-                data += "\n\n" + code_data
-            except Exception as e:
-                print(f"Warning: Could not read programming.txt: {e}")
         self.tokenizer.fit([data])
-        all_tokens = self.tokenizer.encode(data)
-        print(f"Total tokens: {len(all_tokens)}")
-        sequences = []
-        stride = self.max_length // 2
-        for i in range(0, len(all_tokens) - self.max_length - 1, stride):
-            seq = all_tokens[i : i + self.max_length + 1]
-            if len(seq) == self.max_length + 1:
-                sequences.append(seq)
-        if len(sequences) < 10:
-            stride = self.max_length // 4
-            sequences = []
-            for i in range(0, len(all_tokens) - self.max_length - 1, stride):
-                seq = all_tokens[i : i + self.max_length + 1]
-                if len(seq) == self.max_length + 1:
-                    sequences.append(seq)
-        print(f"Created {len(sequences)} training sequences")
-        if len(sequences) == 0:
-            print("Warning: No sequences created. Using minimal sequence.")
-            min_seq = all_tokens[:self.max_length + 1]
-            while len(min_seq) < self.max_length + 1:
-                min_seq.append(0)
-            sequences = [min_seq]
-        sequences = np.array(sequences)
-        X = sequences[:, :-1]
-        y = sequences[:, 1:]
-        dataset = tf.data.Dataset.from_tensor_slices((X, y))
-        dataset = dataset.shuffle(1000).batch(self.batch_size).prefetch(1)
-        return dataset
-    def build_model(self):
-        """Build the model"""
-        self.model = VedaProgrammingLLM(
-            vocab_size=self.tokenizer.vocabulary_size,
-            max_length=self.max_length,
-            d_model=256,
-            num_heads=8,
-            num_layers=4,
-            ff_dim=512,
-        )
-        self.model.compile(
-            optimizer=keras.optimizers.Adam(learning_rate=1e-4),
-            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
-            metrics=["accuracy"],
-        )
-        dummy = tf.zeros((1, self.max_length), dtype=tf.int32)
-        self.model(dummy)
-        return self.model
-    def train(
-        self,
-        epochs: int = 15,
-        save_path: str = None,
-        extra_data: str = "",
-        distillation_data: str = "",
-    ):
-        """Train the model"""
-        if save_path is None:
-            save_path = MODEL_DIR
-        dataset = self.prepare_data(extra_data, distillation_data)
-        self.build_model()
-        self.model.summary()
-        os.makedirs(save_path, exist_ok=True)
-        history = self.model.fit(dataset, epochs=epochs, verbose=1)
-        # Save weights
-        self.model.save_weights(os.path.join(save_path, "weights.h5"))
-        # Save tokenizer
-        self.tokenizer.save(os.path.join(save_path, "tokenizer.json"))
-        # Save config
-        config = self.model.get_config()
-        with open(os.path.join(save_path, "config.json"), "w") as f:
-            json.dump(config, f, indent=2)
-        print(f"Model saved to {save_path}")
-        return history
-    def generate_response(
-        self, user_input: str, max_tokens: int = 200, temperature: float = 0.7
-    ) -> str:
-        """Generate a response"""
-        if self.model is None:
-            return "Model not loaded."
-        prompt = f"<USER> {user_input}\n<ASSISTANT>"
-        tokens = self.tokenizer.encode(prompt)
-        generated = self.model.generate(
-            tokens,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            repetition_penalty=1.2,
-        )
-        response = self.tokenizer.decode(generated)
-        if "<ASSISTANT>" in response:
-            response = response.split("<ASSISTANT>")[-1].strip()
-        if "<USER>" in response:
-            response = response.split("<USER>")[0].strip()
-        return response
 if __name__ == "__main__":
-    print("=" * 50)
-    print("Training Veda Programming Assistant")
-    print("=" * 50)
-    trainer = VedaTrainer()
-    trainer.train(epochs=20)
-    print("\n" + "=" * 50)
-    print("Testing the model:")
-    print("=" * 50)
-    test_prompts = [
-        "Hello!",
-        "What is a function?",
-        "Write a function to reverse a string",
-        "2+2=?",
-    ]
-    for prompt in test_prompts:
-        print(f"\nUser: {prompt}")
-        response = trainer.generate_response(prompt)
-        print(f"Assistant: {response}")

 import tensorflow as tf
 from model import VedaProgrammingLLM
 from tokenizer import VedaTokenizer
+from config import MODEL_DIR, VOCAB_SIZE, MAX_LENGTH
+import os
+import json
 TRAINING_DATA = """
+<USER> Hi
+<ASSISTANT> Hello! How can I help you today?
+<USER> Who are you?
+<ASSISTANT> I am Veda, an AI programming assistant.
+<USER> What is Python?
+<ASSISTANT> Python is a high-level programming language known for its simplicity and readability.
+<USER> Write hello world in python
+<ASSISTANT> <CODE>print("Hello, World!")<ENDCODE>
+<USER> Write a function to add numbers
+<ASSISTANT> <CODE>
 def add(a, b):
     return a + b
+</CODE>
+<USER> Explain recursion
+<ASSISTANT> Recursion is when a function calls itself to solve a problem. It needs a base case to stop.
 """
 class VedaTrainer:
+    def __init__(self):
+        self.tokenizer = VedaTokenizer(VOCAB_SIZE)
         self.model = None
+    def train(self, epochs=10, extra_data=""):
+        data = TRAINING_DATA + "\n" + extra_data
         self.tokenizer.fit([data])
+        tokens = self.tokenizer.encode(data)
+        # Create dataset
+        seqs = []
+        for i in range(0, len(tokens)-MAX_LENGTH, 50):
+            seqs.append(tokens[i:i+MAX_LENGTH+1])
+        import numpy as np
+        if not seqs: seqs = [tokens[:MAX_LENGTH+1]]
+        arr = np.array(seqs)
+        ds = tf.data.Dataset.from_tensor_slices((arr[:, :-1], arr[:, 1:])).batch(4)
+        self.model = VedaProgrammingLLM(self.tokenizer.vocabulary_size)
+        self.model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))
+        # Build model
+        self.model(tf.zeros((1, MAX_LENGTH)))
+        self.model.fit(ds, epochs=epochs)
+        # Save
+        self.model.save_weights(os.path.join(MODEL_DIR, "weights.h5"))
+        self.tokenizer.save(os.path.join(MODEL_DIR, "tokenizer.json"))
+        with open(os.path.join(MODEL_DIR, "config.json"), 'w') as f:
+            json.dump(self.model.get_config(), f)
 if __name__ == "__main__":
+    VedaTrainer().train(epochs=20)