adrien.aribaut-gaudin commited on
Commit ·
8e58322
1
Parent(s): 3ca15d8
fix: gitignore for the database folder + prompt for requirements + 3 blocks max for best_sources
Browse files- .gitignore +2 -1
- src/control/controller.py +6 -4
.gitignore
CHANGED
|
@@ -3,4 +3,5 @@ venv1
|
|
| 3 |
test/files_to_test/*
|
| 4 |
config_key.py
|
| 5 |
test
|
| 6 |
-
.env
|
|
|
|
|
|
| 3 |
test/files_to_test/*
|
| 4 |
config_key.py
|
| 5 |
test
|
| 6 |
+
.env
|
| 7 |
+
database
|
src/control/controller.py
CHANGED
|
@@ -283,8 +283,9 @@ class Controller:
|
|
| 283 |
"""
|
| 284 |
coll_name = "collection_for_docs"
|
| 285 |
collection = self.client_db.get_or_create_collection(coll_name)
|
| 286 |
-
|
| 287 |
-
|
|
|
|
| 288 |
self.retriever.collection = collection
|
| 289 |
|
| 290 |
def fill_collection(self, doc: Doc, collection: str):
|
|
@@ -295,7 +296,7 @@ class Controller:
|
|
| 295 |
|
| 296 |
|
| 297 |
@staticmethod
|
| 298 |
-
def _select_best_sources(sources: [Block], delta_1_2=0.15, delta_1_n=0.3, absolute=1.2, alpha=0.9) -> [Block]:
|
| 299 |
"""
|
| 300 |
Select the best sources: not far from the very best, not far from the last selected, and not too bad per se
|
| 301 |
"""
|
|
@@ -311,6 +312,7 @@ class Controller:
|
|
| 311 |
absolute *= alpha
|
| 312 |
else:
|
| 313 |
break
|
|
|
|
| 314 |
return best_sources
|
| 315 |
|
| 316 |
def generate_response_to_requirements(self):
|
|
@@ -324,7 +326,7 @@ class Controller:
|
|
| 324 |
while (len(context) > 15000) and i < len(sources_contents):
|
| 325 |
context = "\n".join(sources_contents[:-i])
|
| 326 |
i += 1
|
| 327 |
-
reponse_exigence = generate_response_to_exigence(exigence = exigence["Exigence"], titre_exigence = exigence["Titre"],
|
| 328 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Conformité"] = reponse_exigence
|
| 329 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Document"] = best_sources[0].doc
|
| 330 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Paragraphes"] = "; ".join([block.index for block in best_sources])
|
|
|
|
| 283 |
"""
|
| 284 |
coll_name = "collection_for_docs"
|
| 285 |
collection = self.client_db.get_or_create_collection(coll_name)
|
| 286 |
+
if collection.count() == 0:
|
| 287 |
+
for doc in docs:
|
| 288 |
+
self.fill_collection(doc, collection)
|
| 289 |
self.retriever.collection = collection
|
| 290 |
|
| 291 |
def fill_collection(self, doc: Doc, collection: str):
|
|
|
|
| 296 |
|
| 297 |
|
| 298 |
@staticmethod
|
| 299 |
+
def _select_best_sources(sources: [Block], delta_1_2=0.15, delta_1_n=0.3, absolute=1.2, alpha=0.9, max_blocks=3) -> [Block]:
|
| 300 |
"""
|
| 301 |
Select the best sources: not far from the very best, not far from the last selected, and not too bad per se
|
| 302 |
"""
|
|
|
|
| 312 |
absolute *= alpha
|
| 313 |
else:
|
| 314 |
break
|
| 315 |
+
best_sources = sorted(best_sources, key=lambda x: x.distance)[:max_blocks]
|
| 316 |
return best_sources
|
| 317 |
|
| 318 |
def generate_response_to_requirements(self):
|
|
|
|
| 326 |
while (len(context) > 15000) and i < len(sources_contents):
|
| 327 |
context = "\n".join(sources_contents[:-i])
|
| 328 |
i += 1
|
| 329 |
+
reponse_exigence = generate_response_to_exigence(exigence = exigence["Exigence"], titre_exigence = exigence["Titre"], content = context)
|
| 330 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Conformité"] = reponse_exigence
|
| 331 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Document"] = best_sources[0].doc
|
| 332 |
dict_of_excel_content[dict_of_excel_content.index(exigence)]["Paragraphes"] = "; ".join([block.index for block in best_sources])
|