Spaces:
Running
Running
nam pham
commited on
Commit
·
9faf7cc
1
Parent(s):
ffa19f8
feat: fix load from huggingface
Browse files- app.py +27 -17
- data/annotated_data.json +0 -0
app.py
CHANGED
|
@@ -146,6 +146,7 @@ dynamic_dataset = None
|
|
| 146 |
def load_dataset():
|
| 147 |
global dynamic_dataset
|
| 148 |
try:
|
|
|
|
| 149 |
with open("data/annotated_data.json", 'rt') as dataset:
|
| 150 |
ANNOTATED_DATA = json.load(dataset)
|
| 151 |
dynamic_dataset = DynamicDataset(ANNOTATED_DATA)
|
|
@@ -530,11 +531,25 @@ def convert_hf_dataset_to_ner_format(dataset):
|
|
| 530 |
|
| 531 |
return converted_data
|
| 532 |
|
| 533 |
-
def load_from_huggingface(dataset_name: str
|
| 534 |
"""Load dataset from Hugging Face Hub"""
|
| 535 |
try:
|
| 536 |
-
|
| 537 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 538 |
|
| 539 |
# Save the converted data
|
| 540 |
os.makedirs("data", exist_ok=True)
|
|
@@ -543,7 +558,8 @@ def load_from_huggingface(dataset_name: str, split: str = "all"):
|
|
| 543 |
|
| 544 |
return f"Successfully loaded and converted dataset: {dataset_name}"
|
| 545 |
except Exception as e:
|
| 546 |
-
|
|
|
|
| 547 |
|
| 548 |
def load_from_local_file(file_path: str, file_format: str = "json"):
|
| 549 |
"""Load and convert data from local file in various formats"""
|
|
@@ -891,14 +907,7 @@ with gr.Blocks() as demo:
|
|
| 891 |
placeholder="Enter dataset name (e.g., conll2003)",
|
| 892 |
scale=3
|
| 893 |
)
|
| 894 |
-
dataset_split = gr.Dropdown(
|
| 895 |
-
choices=["train", "validation", "test"],
|
| 896 |
-
value="train",
|
| 897 |
-
label="Dataset Split",
|
| 898 |
-
scale=2
|
| 899 |
-
)
|
| 900 |
load_dataset_btn = gr.Button("Load Dataset", scale=1)
|
| 901 |
-
hf_status = gr.Textbox(label="Dataset Loading Status")
|
| 902 |
|
| 903 |
bar = gr.Slider(
|
| 904 |
minimum=0,
|
|
@@ -1002,16 +1011,17 @@ with gr.Blocks() as demo:
|
|
| 1002 |
outputs=[inp_box, bar]
|
| 1003 |
)
|
| 1004 |
|
| 1005 |
-
def load_hf_dataset(name
|
| 1006 |
-
status = load_from_huggingface(name
|
|
|
|
| 1007 |
if "Successfully" in status:
|
| 1008 |
-
return load_dataset()
|
| 1009 |
-
return [status
|
| 1010 |
|
| 1011 |
load_dataset_btn.click(
|
| 1012 |
fn=load_hf_dataset,
|
| 1013 |
-
inputs=[dataset_name
|
| 1014 |
-
outputs=[inp_box, bar
|
| 1015 |
)
|
| 1016 |
|
| 1017 |
apply_btn.click(fn=update_example, inputs=inp_box, outputs=inp_box)
|
|
|
|
| 146 |
def load_dataset():
|
| 147 |
global dynamic_dataset
|
| 148 |
try:
|
| 149 |
+
print('load_dataset')
|
| 150 |
with open("data/annotated_data.json", 'rt') as dataset:
|
| 151 |
ANNOTATED_DATA = json.load(dataset)
|
| 152 |
dynamic_dataset = DynamicDataset(ANNOTATED_DATA)
|
|
|
|
| 531 |
|
| 532 |
return converted_data
|
| 533 |
|
| 534 |
+
def load_from_huggingface(dataset_name: str):
|
| 535 |
"""Load dataset from Hugging Face Hub"""
|
| 536 |
try:
|
| 537 |
+
# Download the JSON file from Hugging Face
|
| 538 |
+
import requests
|
| 539 |
+
import json
|
| 540 |
+
|
| 541 |
+
# Construct the raw URL for the JSON file
|
| 542 |
+
raw_url = f"https://huggingface.co/datasets/{dataset_name}/raw/main/annotated_data.json"
|
| 543 |
+
|
| 544 |
+
# Download the file
|
| 545 |
+
response = requests.get(raw_url)
|
| 546 |
+
if response.status_code == 200:
|
| 547 |
+
print('response status', response.status_code)
|
| 548 |
+
print('response', response.text)
|
| 549 |
+
dataset = json.loads(response.text)
|
| 550 |
+
converted_data = dataset # Data is already in the correct format
|
| 551 |
+
else:
|
| 552 |
+
raise Exception(f"Failed to download dataset: {response.status_code}")
|
| 553 |
|
| 554 |
# Save the converted data
|
| 555 |
os.makedirs("data", exist_ok=True)
|
|
|
|
| 558 |
|
| 559 |
return f"Successfully loaded and converted dataset: {dataset_name}"
|
| 560 |
except Exception as e:
|
| 561 |
+
error_msg = f"Error loading dataset: {str(e)}"
|
| 562 |
+
return error_msg
|
| 563 |
|
| 564 |
def load_from_local_file(file_path: str, file_format: str = "json"):
|
| 565 |
"""Load and convert data from local file in various formats"""
|
|
|
|
| 907 |
placeholder="Enter dataset name (e.g., conll2003)",
|
| 908 |
scale=3
|
| 909 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 910 |
load_dataset_btn = gr.Button("Load Dataset", scale=1)
|
|
|
|
| 911 |
|
| 912 |
bar = gr.Slider(
|
| 913 |
minimum=0,
|
|
|
|
| 1011 |
outputs=[inp_box, bar]
|
| 1012 |
)
|
| 1013 |
|
| 1014 |
+
def load_hf_dataset(name):
|
| 1015 |
+
status = load_from_huggingface(name)
|
| 1016 |
+
print('status', status)
|
| 1017 |
if "Successfully" in status:
|
| 1018 |
+
return load_dataset()
|
| 1019 |
+
return [("Error loading dataset: " + status, None)], gr.update(value=0, maximum=1)
|
| 1020 |
|
| 1021 |
load_dataset_btn.click(
|
| 1022 |
fn=load_hf_dataset,
|
| 1023 |
+
inputs=[dataset_name],
|
| 1024 |
+
outputs=[inp_box, bar]
|
| 1025 |
)
|
| 1026 |
|
| 1027 |
apply_btn.click(fn=update_example, inputs=inp_box, outputs=inp_box)
|
data/annotated_data.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|