Martico2432 commited on
Commit
c170dd7
·
verified ·
1 Parent(s): 0bf3f29

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +54 -56
src/streamlit_app.py CHANGED
@@ -1,47 +1,43 @@
1
  import streamlit as st
2
  import json
3
- import os
4
-
5
- # Configuration
6
- DATASET_FILE = "my_llm_dataset.jsonl"
7
-
8
- def save_to_dataset(conversation_messages):
9
- """
10
- Appends the conversation to a JSONL file in the format:
11
- {"messages": [{"role": "...", "content": "..."}]}
12
- """
13
- # Standard format for most LLM trainers
14
- record = {"messages": conversation_messages}
15
-
16
- with open(DATASET_FILE, "a", encoding="utf-8") as f:
17
- json_record = json.dumps(record, ensure_ascii=False)
18
- f.write(json_record + "\n")
19
 
20
  def main():
21
- st.set_page_config(page_title="LLM Dataset Builder", layout="wide")
22
- st.title("Thinking & Tool Dataset Creator")
 
23
 
24
- # Initialize session state for the conversation
25
- if "messages" not in st.session_state:
26
- st.session_state.messages = []
 
 
 
27
 
28
- # Sidebar: Stats & File Management
29
  with st.sidebar:
30
- st.header("Dataset Overview")
31
- if os.path.exists(DATASET_FILE):
32
- with open(DATASET_FILE, "r", encoding="utf-8") as f:
33
- lines = f.readlines()
34
- count = len(lines)
35
- st.success(f"Entries in file: {count}")
36
-
37
- # Download button for convenience
38
- with open(DATASET_FILE, "rb") as f:
39
- st.download_button("Download .jsonl", f, file_name=DATASET_FILE)
40
- else:
41
- st.info("No dataset file found yet.")
42
 
43
- if st.button("Clear Draft"):
44
- st.session_state.messages = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  st.rerun()
46
 
47
  # Layout: Input (Left) and Preview (Right)
@@ -49,7 +45,6 @@ def main():
49
 
50
  with col1:
51
  st.subheader("Add Message")
52
- # Standard roles: user (for content), assistant, and tool
53
  role_map = {
54
  "User": "user",
55
  "Assistant": "assistant",
@@ -58,40 +53,43 @@ def main():
58
  selected_label = st.selectbox("Role", list(role_map.keys()))
59
  actual_role = role_map[selected_label]
60
 
 
61
  content = st.text_area(
62
  "Content",
63
- placeholder="Text, <think> tags, LaTeX text, or code blocks here...",
64
- height=400
 
65
  )
66
 
67
- if st.button("Add Message"):
68
  if content.strip():
69
- st.session_state.messages.append({"role": actual_role, "content": content})
 
 
 
70
  st.rerun()
71
 
72
  with col2:
73
- st.subheader("Conversation Preview")
74
- if not st.session_state.messages:
75
- st.write("No messages added yet. Start by adding a User Prompt.")
76
 
77
- for idx, msg in enumerate(st.session_state.messages):
78
  with st.chat_message(msg["role"]):
79
- st.write(f"**Role: {msg['role']}**")
80
  st.code(msg["content"], language="markdown")
81
  if st.button(f"Delete msg {idx}", key=f"del_{idx}"):
82
- st.session_state.messages.pop(idx)
83
  st.rerun()
84
 
85
- if len(st.session_state.messages) > 0:
86
  st.divider()
87
- if st.button("SAVE CONVERSATION & START NEW", type="primary", use_container_width=True):
88
- save_to_dataset(st.session_state.messages)
89
- st.session_state.messages = [] # Reset for next entry
90
- # Clear content text area text
91
- #TODO: content.value = ""
92
- st.rerun()
93
-
94
- st.toast("Saved to dataset!")
95
  st.rerun()
96
 
97
  if __name__ == "__main__":
 
1
  import streamlit as st
2
  import json
3
+ import io
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  def main():
6
+ st.set_page_config(page_title="Private LLM Dataset Builder", layout="wide")
7
+ st.title("🧠 Private Dataset Creator")
8
+ st.info("Everything is stored in your browser RAM. Refreshing the page will clear your progress.")
9
 
10
+ # 1. Initialize session states
11
+ if "full_dataset" not in st.session_state:
12
+ st.session_state.full_dataset = [] # This holds all completed conversations
13
+
14
+ if "current_conversation" not in st.session_state:
15
+ st.session_state.current_conversation = [] # This holds the active draft
16
 
17
+ # Sidebar: Stats & Download
18
  with st.sidebar:
19
+ st.header("Your Session Dataset")
20
+ count = len(st.session_state.full_dataset)
21
+ st.metric("Conversations Saved", count)
 
 
 
 
 
 
 
 
 
22
 
23
+ if count > 0:
24
+ # Create the JSONL content in memory
25
+ jsonl_str = ""
26
+ for conv in st.session_state.full_dataset:
27
+ jsonl_str += json.dumps({"messages": conv}, ensure_ascii=False) + "\n"
28
+
29
+ # Download button using an in-memory buffer
30
+ st.download_button(
31
+ label="📥 Download My Dataset (.jsonl)",
32
+ data=jsonl_str,
33
+ file_name="my_private_dataset.jsonl",
34
+ mime="application/jsonl",
35
+ type="primary"
36
+ )
37
+
38
+ if st.button("🗑️ Wipe All Data"):
39
+ st.session_state.full_dataset = []
40
+ st.session_state.current_conversation = []
41
  st.rerun()
42
 
43
  # Layout: Input (Left) and Preview (Right)
 
45
 
46
  with col1:
47
  st.subheader("Add Message")
 
48
  role_map = {
49
  "User": "user",
50
  "Assistant": "assistant",
 
53
  selected_label = st.selectbox("Role", list(role_map.keys()))
54
  actual_role = role_map[selected_label]
55
 
56
+ # Use a key for the text area to allow manual clearing if needed
57
  content = st.text_area(
58
  "Content",
59
+ placeholder="Text, <think> tags, or code blocks...",
60
+ height=300,
61
+ key="input_text"
62
  )
63
 
64
+ if st.button("Add Message to Draft"):
65
  if content.strip():
66
+ st.session_state.current_conversation.append({
67
+ "role": actual_role,
68
+ "content": content
69
+ })
70
  st.rerun()
71
 
72
  with col2:
73
+ st.subheader("Current Draft Preview")
74
+ if not st.session_state.current_conversation:
75
+ st.write("Draft is empty.")
76
 
77
+ for idx, msg in enumerate(st.session_state.current_conversation):
78
  with st.chat_message(msg["role"]):
79
+ st.markdown(f"**{msg['role'].upper()}**")
80
  st.code(msg["content"], language="markdown")
81
  if st.button(f"Delete msg {idx}", key=f"del_{idx}"):
82
+ st.session_state.current_conversation.pop(idx)
83
  st.rerun()
84
 
85
+ if len(st.session_state.current_conversation) > 0:
86
  st.divider()
87
+ if st.button("SAVE CONVERSATION TO SESSION", use_container_width=True):
88
+ # Move current draft to the full dataset list
89
+ st.session_state.full_dataset.append(list(st.session_state.current_conversation))
90
+ # Clear draft
91
+ st.session_state.current_conversation = []
92
+ st.toast("Saved to session memory!")
 
 
93
  st.rerun()
94
 
95
  if __name__ == "__main__":