Spaces:

Martico2432
/

DatasetCreator

Sleeping

App Files Files Community

Martico2432 commited on Dec 26, 2025

Commit

c170dd7

verified ·

1 Parent(s): 0bf3f29

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +54 -56

src/streamlit_app.py CHANGED Viewed

@@ -1,47 +1,43 @@
 import streamlit as st
 import json
-import os
-# Configuration
-DATASET_FILE = "my_llm_dataset.jsonl"
-def save_to_dataset(conversation_messages):
-    """
-    Appends the conversation to a JSONL file in the format:
-    {"messages": [{"role": "...", "content": "..."}]}
-    """
-    # Standard format for most LLM trainers
-    record = {"messages": conversation_messages}
-    with open(DATASET_FILE, "a", encoding="utf-8") as f:
-        json_record = json.dumps(record, ensure_ascii=False)
-        f.write(json_record + "\n")
 def main():
-    st.set_page_config(page_title="LLM Dataset Builder", layout="wide")
-    st.title("Thinking & Tool Dataset Creator")
-    # Initialize session state for the conversation
-    if "messages" not in st.session_state:
-        st.session_state.messages = []
-    # Sidebar: Stats & File Management
     with st.sidebar:
-        st.header("Dataset Overview")
-        if os.path.exists(DATASET_FILE):
-            with open(DATASET_FILE, "r", encoding="utf-8") as f:
-                lines = f.readlines()
-                count = len(lines)
-            st.success(f"Entries in file: {count}")
-            # Download button for convenience
-            with open(DATASET_FILE, "rb") as f:
-                st.download_button("Download .jsonl", f, file_name=DATASET_FILE)
-        else:
-            st.info("No dataset file found yet.")
-        if st.button("Clear Draft"):
-            st.session_state.messages = []
             st.rerun()
     # Layout: Input (Left) and Preview (Right)
@@ -49,7 +45,6 @@ def main():
     with col1:
         st.subheader("Add Message")
-        # Standard roles: user (for content), assistant, and tool
         role_map = {
             "User": "user",
             "Assistant": "assistant",
@@ -58,40 +53,43 @@ def main():
         selected_label = st.selectbox("Role", list(role_map.keys()))
         actual_role = role_map[selected_label]
         content = st.text_area(
             "Content",
-            placeholder="Text, <think> tags, LaTeX text, or code blocks here...",
-            height=400
         )
-        if st.button("Add Message"):
             if content.strip():
-                st.session_state.messages.append({"role": actual_role, "content": content})
                 st.rerun()
     with col2:
-        st.subheader("Conversation Preview")
-        if not st.session_state.messages:
-            st.write("No messages added yet. Start by adding a User Prompt.")
-        for idx, msg in enumerate(st.session_state.messages):
             with st.chat_message(msg["role"]):
-                st.write(f"**Role: {msg['role']}**")
                 st.code(msg["content"], language="markdown")
                 if st.button(f"Delete msg {idx}", key=f"del_{idx}"):
-                    st.session_state.messages.pop(idx)
                     st.rerun()
-        if len(st.session_state.messages) > 0:
             st.divider()
-            if st.button("SAVE CONVERSATION & START NEW", type="primary", use_container_width=True):
-                save_to_dataset(st.session_state.messages)
-                st.session_state.messages = [] # Reset for next entry
-                # Clear content text area text
-                #TODO: content.value = ""
-                st.rerun()
-                st.toast("Saved to dataset!")
                 st.rerun()
 if __name__ == "__main__":

 import streamlit as st
 import json
+import io
 def main():
+    st.set_page_config(page_title="Private LLM Dataset Builder", layout="wide")
+    st.title("🧠 Private Dataset Creator")
+    st.info("Everything is stored in your browser RAM. Refreshing the page will clear your progress.")
+    # 1. Initialize session states
+    if "full_dataset" not in st.session_state:
+        st.session_state.full_dataset = []  # This holds all completed conversations
+    if "current_conversation" not in st.session_state:
+        st.session_state.current_conversation = [] # This holds the active draft
+    # Sidebar: Stats & Download
     with st.sidebar:
+        st.header("Your Session Dataset")
+        count = len(st.session_state.full_dataset)
+        st.metric("Conversations Saved", count)
+        if count > 0:
+            # Create the JSONL content in memory
+            jsonl_str = ""
+            for conv in st.session_state.full_dataset:
+                jsonl_str += json.dumps({"messages": conv}, ensure_ascii=False) + "\n"
+            # Download button using an in-memory buffer
+            st.download_button(
+                label="📥 Download My Dataset (.jsonl)",
+                data=jsonl_str,
+                file_name="my_private_dataset.jsonl",
+                mime="application/jsonl",
+                type="primary"
+            )
+        if st.button("🗑️ Wipe All Data"):
+            st.session_state.full_dataset = []
+            st.session_state.current_conversation = []
             st.rerun()
     # Layout: Input (Left) and Preview (Right)
     with col1:
         st.subheader("Add Message")
         role_map = {
             "User": "user",
             "Assistant": "assistant",
         selected_label = st.selectbox("Role", list(role_map.keys()))
         actual_role = role_map[selected_label]
+        # Use a key for the text area to allow manual clearing if needed
         content = st.text_area(
             "Content",
+            placeholder="Text, <think> tags, or code blocks...",
+            height=300,
+            key="input_text"
         )
+        if st.button("Add Message to Draft"):
             if content.strip():
+                st.session_state.current_conversation.append({
+                    "role": actual_role,
+                    "content": content
+                })
                 st.rerun()
     with col2:
+        st.subheader("Current Draft Preview")
+        if not st.session_state.current_conversation:
+            st.write("Draft is empty.")
+        for idx, msg in enumerate(st.session_state.current_conversation):
             with st.chat_message(msg["role"]):
+                st.markdown(f"**{msg['role'].upper()}**")
                 st.code(msg["content"], language="markdown")
                 if st.button(f"Delete msg {idx}", key=f"del_{idx}"):
+                    st.session_state.current_conversation.pop(idx)
                     st.rerun()
+        if len(st.session_state.current_conversation) > 0:
             st.divider()
+            if st.button("✅ SAVE CONVERSATION TO SESSION", use_container_width=True):
+                # Move current draft to the full dataset list
+                st.session_state.full_dataset.append(list(st.session_state.current_conversation))
+                # Clear draft
+                st.session_state.current_conversation = []
+                st.toast("Saved to session memory!")
                 st.rerun()
 if __name__ == "__main__":