Files changed (2) hide show
  1. app.py +194 -68
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,9 +1,95 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import pandas as pd
4
  import json
5
  import plotly.express as px
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def on_confirm(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
8
  # 根据用户选择的参数构建文件路径
9
  num_parts = num_parts_dropdown
@@ -28,7 +114,10 @@ def on_confirm(dataset_radio, num_parts_dropdown, perspective_radio, division_me
28
  # 加载分析报告
29
  analysis_result,_ = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
30
  # AI分析列
31
- df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided."))
 
 
 
32
  return df
33
 
34
  # 生成 CSS 样式
@@ -36,7 +125,6 @@ def generate_css(line_counts, token_counts, cyclomatic_complexity, problem_type,
36
  css = """
37
  #dataframe th {
38
  background-color: #f2f2f2
39
-
40
  }
41
  """
42
  colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"]
@@ -261,70 +349,104 @@ def plot_visualization(dataset_radio, perspective_radio, num_parts, plot_type):
261
 
262
  return fig
263
 
264
- # 旭日图
265
- def plot_recommendation_sunburst(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
266
  import plotly.graph_objects as go
 
267
  _, recommendation_result = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
268
- labels = ['Model Recommendation'] # 根节点
269
- parents = ['']
270
- values = []
271
- customdata = ['Choose your preference model']
272
-
273
- # 统计每个场景下模型数量
274
- scenario_model_count = {}
275
- total_model_count = 0
276
-
277
- for scenario, model_list in recommendation_result.items():
278
- # 处理模型
279
- model_items = []
280
- if isinstance(model_list, dict):
281
- model_items = model_list.items()
282
- elif isinstance(model_list, list):
283
- for d in model_list:
284
- if isinstance(d, dict):
285
- for k, v in d.items():
286
- model_items.append((k, v))
287
-
288
- scenario_model_count[scenario] = len(model_items)
289
- total_model_count += len(model_items)
290
-
291
- # 根节点 value
292
- values.append(total_model_count)
293
-
294
- # 再次遍历,填充 labels/parents/values/customdata
295
- for scenario, model_list in recommendation_result.items():
296
- scenario_words = scenario.split()
297
- short_label = " ".join(scenario_words[:3]) + "..." if len(scenario_words) > 3 else scenario
298
- labels.append(short_label)
299
- parents.append('Model Recommendation')
300
- values.append(scenario_model_count[scenario])
301
  customdata.append(scenario)
302
-
303
- # 处理模型
304
- model_items = []
305
- if isinstance(model_list, dict):
306
- model_items = model_list.items()
307
- elif isinstance(model_list, list):
308
- for d in model_list:
309
- if isinstance(d, dict):
310
- for k, v in d.items():
311
- model_items.append((k, v))
312
-
313
- for model, reason in model_items:
314
- labels.append(model)
315
- parents.append(short_label)
316
- values.append(1)
317
- customdata.append(reason)
318
-
319
- fig = go.Figure(go.Sunburst(
320
- labels=labels,
321
- parents=parents,
322
- values=values,
323
- branchvalues="total",
324
- hovertemplate='%{customdata}<extra></extra>',
325
- customdata=customdata
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  ))
327
- fig.update_layout(margin=dict(t=10, l=10, r=10, b=10), height=500)
 
 
 
 
 
 
 
328
  return fig
329
 
330
  ### Gradio代码部分 ###
@@ -429,7 +551,11 @@ with gr.Blocks(css=custom_css) as iface:
429
  with gr.Tabs():
430
  # 表格
431
  with gr.TabItem("Ranking Table"):
432
- data_table = gr.Dataframe(headers=["Model", "Score","Analysis"],interactive=True)
 
 
 
 
433
  # 可视化
434
  with gr.TabItem("Visualization"):
435
  plot_type = gr.Radio(
@@ -441,9 +567,9 @@ with gr.Blocks(css=custom_css) as iface:
441
  # AI分析
442
  with gr.TabItem("Model selection suggestions"):
443
  with gr.Column():
444
- gr.Markdown("<h2 class='markdown-title'>🎯 Model Recommendation</h2>")
445
  recommendation_plot = gr.Plot()
446
- scenario_legend = gr.Markdown(value="") # 新增图例
447
 
448
  def update_perspective_options(dataset):
449
  if dataset == "MBPP":
@@ -480,8 +606,8 @@ with gr.Blocks(css=custom_css) as iface:
480
  fn=plot_visualization,
481
  inputs=[dataset_radio, perspective_radio, num_parts_slider, plot_type],
482
  outputs=chart
483
- ).then(
484
- fn=plot_recommendation_sunburst,
485
  inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
486
  outputs=[recommendation_plot] # 注意这里是列表
487
  )
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  import json
4
  import plotly.express as px
5
 
6
+ from textblob import TextBlob
7
+ from textblob.download_corpora import download_all
8
+
9
+ # 下载TextBlob所需数据(只需运行一次)
10
+ download_all()
11
+
12
+ # 定义颜色映射
13
+ ADJECTIVE_COLORS = {
14
+ "positive": "#4CAF50", # 绿色
15
+ "negative": "#F44336", # 红色
16
+ "neutral": "#FFC107" # 黄色
17
+ }
18
+
19
+ # 自定义短语情感覆盖规则
20
+ PHRASE_SENTIMENT_OVERRIDES = {
21
+ "significant drop": "negative",
22
+ "significant drops": "negative",
23
+ "sharp decline": "negative",
24
+ "strong performance": "positive",
25
+ "Poor performance": "negative"
26
+ # 可以继续添加更多短语规则...
27
+ }
28
+
29
+ # 负面触发词集合
30
+ NEGATIVE_TRIGGERS = {"drop", "decline", "failure", "loss", "down", "worse", "weak", "poor"}
31
+
32
+ def get_phrase_sentiment(phrase):
33
+ """增强的短语情感分析逻辑"""
34
+ # 1. 优先检查自定义规则
35
+ lower_phrase = phrase.lower()
36
+ if lower_phrase in PHRASE_SENTIMENT_OVERRIDES:
37
+ return PHRASE_SENTIMENT_OVERRIDES[lower_phrase]
38
+
39
+ # 2. 检查负面触发词
40
+ words = TextBlob(phrase).words
41
+ if any(w.lower() in NEGATIVE_TRIGGERS for w in words):
42
+ return "negative"
43
+
44
+ # 3. 默认情感分析
45
+ sentiment = TextBlob(phrase).sentiment.polarity
46
+ if sentiment > 0.1:
47
+ return "positive"
48
+ elif sentiment < -0.1:
49
+ return "negative"
50
+ else:
51
+ return "neutral"
52
+
53
+ def highlight_adjectives(text):
54
+ """高亮形容词短语并根据情感着色"""
55
+ if not isinstance(text, str) or not text.strip():
56
+ return text
57
+
58
+ try:
59
+ blob = TextBlob(text)
60
+ highlighted = []
61
+ i = 0
62
+ tags = blob.tags
63
+
64
+ while i < len(tags):
65
+ word, tag = tags[i]
66
+
67
+ # 检查形容词短语模式 (形容词+名词)
68
+ if tag.startswith('JJ') and i+1 < len(tags) and tags[i+1][1].startswith('NN'):
69
+ phrase = f"{word} {tags[i+1][0]}"
70
+ # 使用增强的情感分析
71
+ sentiment = get_phrase_sentiment(phrase)
72
+
73
+ color = ADJECTIVE_COLORS.get(sentiment, "#000000")
74
+ highlighted.append(f'<span style="color: {color}; font-weight: bold">{phrase}</span>')
75
+ i += 2 # 跳过下一个词,因为已经处理了
76
+ elif tag.startswith('JJ'): # 单独形容词
77
+ sentiment = get_phrase_sentiment(word) # 也能处理单个词
78
+ color = ADJECTIVE_COLORS.get(sentiment, "#000000")
79
+ highlighted.append(f'<span style="color: {color}; font-weight: bold">{word}</span>')
80
+ i += 1
81
+ else:
82
+ highlighted.append(word)
83
+ i += 1
84
+
85
+ # 保留原始空格和标点
86
+ return " ".join(highlighted).replace(" ,", ",").replace(" .", ".").replace(" '", "'")
87
+
88
+ except Exception as e:
89
+ print(f"Error processing text: {e}")
90
+ return text
91
+
92
+
93
  def on_confirm(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
94
  # 根据用户选择的参数构建文件路径
95
  num_parts = num_parts_dropdown
 
114
  # 加载分析报告
115
  analysis_result,_ = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
116
  # AI分析列
117
+ # df["Analysis"] = df["Model"].map(lambda m: analysis_result.get(m, "No analysis provided."))
118
+ df["Analysis"] = df["Model"].map(
119
+ lambda m: highlight_adjectives(analysis_result.get(m, "No analysis provided."))
120
+ )
121
  return df
122
 
123
  # 生成 CSS 样式
 
125
  css = """
126
  #dataframe th {
127
  background-color: #f2f2f2
 
128
  }
129
  """
130
  colors = ["#e6f7ff", "#ffeecc", "#e6ffe6", "#ffe6e6"]
 
349
 
350
  return fig
351
 
352
+ # 桑基图展示推荐模型
353
+ def plot_recommendation_sankey(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio):
354
  import plotly.graph_objects as go
355
+ from plotly.colors import sample_colorscale
356
  _, recommendation_result = load_analysis_report(dataset_radio, num_parts_dropdown, perspective_radio, division_method_radio)
357
+
358
+ # 定义节点层级和颜色方案
359
+ levels = ['Model Recommendation', 'Scenario', 'Model Family', 'Specific Model']
360
+ color_scale = "RdYlBu_r"
361
+
362
+ # 节点和连接数据
363
+ node_labels = [levels[0]] # 根节点
364
+ customdata = ["Root node"]
365
+ sources, targets, values = [], [], []
366
+
367
+ # 节点索引跟踪
368
+ node_indices = {levels[0]: 0}
369
+ current_idx = 1
370
+
371
+ # 处理推荐列表结构 {"场景1": [ {模型1:原因1}, {模型2:原因2} ], ...}
372
+ for scenario, model_dicts in recommendation_result.items():
373
+ # 添加场景节点
374
+ scenario_label = " ".join(scenario.split()[:3]) + ("..." if len(scenario.split()) > 3 else "")
375
+ node_labels.append(scenario_label)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  customdata.append(scenario)
377
+ node_indices[f"scenario_{scenario}"] = current_idx
378
+ current_idx += 1
379
+
380
+ # 根节点 -> 场景节点连接
381
+ sources.append(0)
382
+ targets.append(node_indices[f"scenario_{scenario}"])
383
+ values.append(10)
384
+
385
+ # 处理模型列表 [ {模型1:原因1}, {模型2:原因2} ]
386
+ for model_dict in model_dicts:
387
+ for model, reason in model_dict.items():
388
+ # 提取模型系列 (如"GPT-4" -> "GPT")
389
+ family = model.split('-')[0].split('_')[0]
390
+
391
+ # 添加模型系列节点 (如果不存在)
392
+ if f"family_{family}" not in node_indices:
393
+ node_labels.append(family)
394
+ customdata.append(f"Model family: {family}")
395
+ node_indices[f"family_{family}"] = current_idx
396
+ current_idx += 1
397
+
398
+ # 场景 -> 模型系列连接
399
+ sources.append(node_indices[f"scenario_{scenario}"])
400
+ targets.append(node_indices[f"family_{family}"])
401
+ values.append(8)
402
+
403
+ # 添加具体模型节点 (如果不存在)
404
+ if f"model_{model}" not in node_indices:
405
+ node_labels.append(model)
406
+ customdata.append(f"<b>{model}</b><br>{reason}")
407
+ node_indices[f"model_{model}"] = current_idx
408
+ current_idx += 1
409
+
410
+ # 模型系列 -> 具体模型连接
411
+ sources.append(node_indices[f"family_{family}"])
412
+ targets.append(node_indices[f"model_{model}"])
413
+ values.append(5)
414
+
415
+ # 生成颜色 (确保颜色数量匹配节点数量)
416
+ node_colors = ["#2c7bb6"] # 根节点颜色
417
+ node_colors += sample_colorscale(color_scale, [n/(len(node_labels)-1) for n in range(1, len(node_labels))])
418
+
419
+ # 创建桑基图
420
+ fig = go.Figure(go.Sankey(
421
+ arrangement="perpendicular",
422
+ node=dict(
423
+ pad=20,
424
+ thickness=15,
425
+ line=dict(color="rgba(0,0,0,0.3)", width=0.2),
426
+ label=node_labels,
427
+ color=node_colors,
428
+ hovertemplate='%{label}<extra></extra>',
429
+ x=[0] + [0.33]*len([n for n in node_indices if n.startswith('scenario_')])
430
+ + [0.66]*len([n for n in node_indices if n.startswith('family_')])
431
+ + [1.0]*len([n for n in node_indices if n.startswith('model_')]),
432
+ ),
433
+ link=dict(
434
+ source=sources,
435
+ target=targets,
436
+ value=values,
437
+ color="rgba(180,180,180,0.4)",
438
+ customdata=[customdata[t] for t in targets],
439
+ hovertemplate='%{customdata}<extra></extra>'
440
+ )
441
  ))
442
+
443
+ fig.update_layout(
444
+ title_text="<b>Model Recommendation Flow</b>",
445
+ font_size=11,
446
+ height=700,
447
+ margin=dict(t=80, l=20, r=20, b=20)
448
+ )
449
+
450
  return fig
451
 
452
  ### Gradio代码部分 ###
 
551
  with gr.Tabs():
552
  # 表格
553
  with gr.TabItem("Ranking Table"):
554
+ data_table = gr.Dataframe(headers=["Model", "Score","Analysis"],
555
+ interactive=True,
556
+ datatype="html", # 指定第三列为HTML
557
+ render=True, # 启用HTML渲染
558
+ )
559
  # 可视化
560
  with gr.TabItem("Visualization"):
561
  plot_type = gr.Radio(
 
567
  # AI分析
568
  with gr.TabItem("Model selection suggestions"):
569
  with gr.Column():
570
+ # gr.Markdown("<h2 class='markdown-title'>🎯 Model Recommendation</h2>")
571
  recommendation_plot = gr.Plot()
572
+ # scenario_legend = gr.Markdown(value="") # 新增图例
573
 
574
  def update_perspective_options(dataset):
575
  if dataset == "MBPP":
 
606
  fn=plot_visualization,
607
  inputs=[dataset_radio, perspective_radio, num_parts_slider, plot_type],
608
  outputs=chart
609
+ ).then(
610
+ fn=plot_recommendation_sankey,
611
  inputs=[dataset_radio, num_parts_slider, perspective_radio, division_method_radio],
612
  outputs=[recommendation_plot] # 注意这里是列表
613
  )
requirements.txt CHANGED
@@ -2,4 +2,4 @@ huggingface-hub==0.24.2
2
 
3
  pip==24.0
4
  plotly==5.23.0
5
-
 
2
 
3
  pip==24.0
4
  plotly==5.23.0
5
+ textblob