Spaces:
Running
Running
fix repeat error
Browse files
app.py
CHANGED
|
@@ -107,6 +107,25 @@ def _gc():
|
|
| 107 |
if torch.cuda.is_available():
|
| 108 |
torch.cuda.empty_cache()
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
def _launch_demo(args, model, processor):
|
| 112 |
# 全局变量用于跟踪是否是首次调用
|
|
@@ -278,12 +297,13 @@ def _launch_demo(args, model, processor):
|
|
| 278 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 279 |
)
|
| 280 |
|
|
|
|
| 281 |
total_time = time.time() - start_time
|
| 282 |
print(f"[DEBUG] ========== 全部完成 ==========")
|
| 283 |
print(f"[DEBUG] 总耗时: {total_time:.2f}s")
|
| 284 |
print(f"[DEBUG] 输出长度: {len(output_texts[0])} 字符")
|
| 285 |
print(f"[DEBUG] 输出预览: {output_texts[0][:100]}...")
|
| 286 |
-
|
| 287 |
return output_texts
|
| 288 |
|
| 289 |
|
|
|
|
| 107 |
if torch.cuda.is_available():
|
| 108 |
torch.cuda.empty_cache()
|
| 109 |
|
| 110 |
+
def clean_repeated_substrings(text):
|
| 111 |
+
"""Clean repeated substrings in text"""
|
| 112 |
+
n = len(text)
|
| 113 |
+
if n < 2000:
|
| 114 |
+
return text
|
| 115 |
+
for length in range(2, n // 10 + 1):
|
| 116 |
+
candidate = text[-length:]
|
| 117 |
+
count = 0
|
| 118 |
+
i = n - length
|
| 119 |
+
|
| 120 |
+
while i >= 0 and text[i:i + length] == candidate:
|
| 121 |
+
count += 1
|
| 122 |
+
i -= length
|
| 123 |
+
|
| 124 |
+
if count >= 10:
|
| 125 |
+
return text[:n - length * (count - 1)]
|
| 126 |
+
|
| 127 |
+
return text
|
| 128 |
+
|
| 129 |
|
| 130 |
def _launch_demo(args, model, processor):
|
| 131 |
# 全局变量用于跟踪是否是首次调用
|
|
|
|
| 297 |
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
| 298 |
)
|
| 299 |
|
| 300 |
+
|
| 301 |
total_time = time.time() - start_time
|
| 302 |
print(f"[DEBUG] ========== 全部完成 ==========")
|
| 303 |
print(f"[DEBUG] 总耗时: {total_time:.2f}s")
|
| 304 |
print(f"[DEBUG] 输出长度: {len(output_texts[0])} 字符")
|
| 305 |
print(f"[DEBUG] 输出预览: {output_texts[0][:100]}...")
|
| 306 |
+
output_texts[0] = clean_repeated_substrings(output_texts[0])
|
| 307 |
return output_texts
|
| 308 |
|
| 309 |
|