{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "UiO1w9dn_ZHb" }, "source": [ "# 라이브러리 설치" ] }, { "cell_type": "markdown", "metadata": { "id": "2sepsyCU_b--" }, "source": [ "- 양자화, 실행 최적화, 효율적 파인튜닝, 데이터셋 처리, 시각화 라이브러리 불러오기" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "DyzIFdONrhtC", "outputId": "1ef1c80a-92e1-45c8-b89b-5a7c8c1f3d11" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.0/76.0 MB\u001b[0m \u001b[31m25.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m411.0/411.0 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m491.2/491.2 kB\u001b[0m \u001b[31m31.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m105.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.9/183.9 kB\u001b[0m \u001b[31m12.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m80.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m79.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m45.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m1.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m35.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m92.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m18.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "tensorflow 2.18.0 requires tensorboard<2.19,>=2.18, but you have tensorboard 2.19.0 which is incompatible.\n", "gcsfs 2025.3.0 requires fsspec==2025.3.0, but you have fsspec 2024.12.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0m" ] } ], "source": [ "!pip install -q -U bitsandbytes accelerate peft datasets tensorboard" ] }, { "cell_type": "markdown", "metadata": { "id": "I15Xh4DNr1nw" }, "source": [ "# HuggingFace 로그인 및 데이터 업로드" ] }, { "cell_type": "markdown", "metadata": { "id": "MgVSOYxQ-Oi4" }, "source": [ "- 구글 드라이브를 마운트해서 데이터를 불러오는 방식이 아닌 HuggingFace에 업로드해서 다시 불러오는 방식으로 데이터 사용" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ZLbzElKyrn-r" }, "outputs": [], "source": [ "# HuggingFace hub에 업로드하기 위해 인증 토큰으로 로그인\n", "from huggingface_hub import login\n", "\n", "login(token=\"token\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Wu8rsKbIr9-N" }, "outputs": [], "source": [ "import os\n", "import shutil\n", "\n", "# 저장할 디렉토리 이름\n", "dataset_name = \"empathy_chat_couple_data\"\n", "\n", "# 디렉토리 생성\n", "if os.path.exists(dataset_name):\n", " shutil.rmtree(dataset_name)\n", "os.makedirs(dataset_name)\n", "\n", "# jsonl 파일 복사\n", "# 전처리한 파일 HuggingFace hub에 업로드\n", "shutil.copy(\"/content/converted_format.jsonl\", f\"{dataset_name}/data.jsonl\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "yE1BpErpr_Go" }, "outputs": [], "source": [ "from huggingface_hub import create_repo, upload_folder\n", "\n", "# Hugging Face에 데이터셋용 저장소 만들기\n", "create_repo(repo_id=f\"shjun/{dataset_name}\", repo_type=\"dataset\", exist_ok=True)\n", "\n", "# 디렉토리 통째로 업로드\n", "upload_folder(\n", " repo_id=f\"shjun/{dataset_name}\",\n", " folder_path=dataset_name,\n", " repo_type=\"dataset\"\n", ")" ] }, { "cell_type": "markdown", "metadata": { "id": "d-YCX6nN9_dq" }, "source": [ "![image.png](data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcoAAACxCAYAAABEDv1qAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAEo4SURBVHhe7b15fBT3nef9KfWhE5AQiMMcEuKQbPBBjGNsJ0YYEfCRCfYkmdgz2Rwz2THGmIydPLuTeWXzyibZmdl4gjFHMvtkM5kZO89uJiaHA9gSiDg2tsHmMBiJQ0gYMEJI6O5WH9X1/PGrbx2/7qrqlrplbL5vufzR53cfhb5V1a2WommaBoZhGIZhUpInJzAMwzAMY8KBkmEYhmFc4EDJMAzDMC5woGQYhmEYFzhQMgzDMIwLHCgZhmEYxgUOlAzDMAzjAgdKhmEYhnGBAyXDMAzDuMCBkmEYhmFc4EDJMAzDMC5woGQYhmEYFzhQMgzDMIwLHCgZhmEYxgUOlAzDMAzjAgdKhmEYhnGBAyXDMAzDuMCBkmEYhmFc4EDJMAzDMC5woGQYhmEYFzhQMgzDMIwLHCgZhmEYxgUOlAzDMAzjgqJpmiYnMsyHkWgsjmg0hriqIpHQIJ/aiqIgL0+B3+dDMBhAMOC35TMMw6SCAyXzoSc8HEF4OIpgwA+/3we/34c8RUFenv2BSSKhIZFIIK6qiMVVxGJxFBYEUViQbyvHMAxjhQMl86FFVRMYDIUBDSgpKYRPCoxeqIkEBofCAICSokL4fJnVZxjm2oADJfOhJBZXMTgUQmF+PgoKgnJ2RoSHowgPRzCupAgBv0/OZhjmGocDJfOhQ1UT6B8MYVxxIfxZCmzxuIqBoTDGlxTxnSXDMDb4JwLzoWMwFEZJUUHWgiQA+P0+FBfli0e5DMMwFsb0jjIaiyESEUc0FrPlBQMB5OeLIxgI2PIYhggPR6CqCZQUF8pZWWFwKAyfL4/f4MMwjMGYBcqe3gGEhyNyckoKCvIxsXScnOzCHvzdkqeww5IybdG9uHfNQ3io/hZMLbBkpEt8GL29w0BBKUpL5MwxYKz7j19B24krUMtnY+7Uq/dCpadvEBPGFSW9ozVbqIkE+vqHMjz/GIb5KDNmgfJCx2V0Xr6CkpJiBHx5yPP54fflAdAQi6tIqCrUhIb+wUFMmVyO66ZOlptwYQ/WKfdgi5wMAAsfxnPbn8PDc+UMD974O8xZ+n1UbTqD3Y9Xybm5Z8z6v4TXtu/HuwMqAGDcvE/iC0snyoWuCqKxOCLRGMZ53E3G4ype2/8Ozl24BDWRwLyqGVh84wIUpHmX2D8YQkF+kH/PkmEYYCxfowwG/PrV+gC6evrQ2dWN9y9dxvuXunC5uwfdvf3o7R9AIqGN4tHrY9itiV80D1/ch81fqAWOPY9H1jyFPYNyWQ8GetEmp40lY9Z/EWbOr8WdH5uOUjnrKiMajSHo8bqkpmnY/coBTK0ox5+s/iQefmglqqtmYP+h47jc3SsXT0l+MIBo1P7SAMMw1y5jFijHmoKpS/HYv/4em+sBHHsa3/+ZGXaG2/dgy/r7cP00BYqiQJk8B/d9cwc64gDQhue/uBiLv/YcAODgPz2ExUsWY/EXn0ebZ12deC/2/HdLmWl34KH/vgNtw2aR3re24C8/MQdligJFmYY7vrIFh3rh2X86bWfGOMy6YS5uuGESxstZVxlxVfV8A887x09jXvVMLJg7S/91Dz+mTJ6IT9x+Mw4dPZH0aT2p8Pt8iKviDpthGOYjGygBAP4qPPIXDwIA9hw6BBFLOvDCNx/CumcPovQTj+HJRx/EUn8bdvzP+/DINq97uPTqdvzbV3HPt3fg4tyH8eQ3nsRjS4ex5193oFm/qx1uegp3LFmHn56eiuWPPoknv3w92n62Dsu/9jw6zM5S4tX2R5lEQvN8bfLsuQ5UzZouJ0NRFEwsnYDunn45KwlFEX0xDMMA4lHVmNDbN6AdOnZSO3D4uOtx+NhJrbevX67uwW7tMUADHtN2y1kvP6YB0FC/WTtDaeeOa8cvW8r88VtaFaBhtVnmzKblGgBt+SajliCNursfhwYs1X7YbBYLD4T1745rP7wdGrBc23zKzD/43Vs04Bbte4eEd+rfve3R0Krt/Pl27fl93XLGVUPXlT45KYlde16XkwyOtbRq5y5ckpNTkk5fDMNcG7hfnmeR/oEhnL9wCeFQBID4YGpNA7SEBr/PL15XDEdw7sIlDA6N+DliesyoRe2kYfS2H8Kenz2Np/71kEjf2ez9umAadafNXArgdXzvC4/g6YZm9A4DBSX6W2/b92DPGwBWP4jlpb3o7RJHVc1iAIdw8Kj762iubX/EURTF89Gp2+uL/QNDmFjm/YA5kdCgKIqczDDMNcqYBcrCwnxMqShHMOhHODSMrit9uNTZjUuXr6Cruwfh4QgCAT+mVJQjP3+kb+ZJZrhXD7qlZSjT03rf2oJHbpmGsqrFeOi7z+HQ+R5rFVfSqVv79Rfw+28sR8Hh5/HUyutRNnMxHvlZs3j029aGfQCwcx2un1yGMjo+91MAwPBgcntWXNv+iJOXp0BNJORkG7feXIu3jrTIyegfGEI8rqKo0PuiQtM05OVxoGQYRjBmgVLTNCgK4PP7UFiUj9IJJZg6pRxTp5SjtHQcigrFJ60oCuBx05ABvdix81cAgKVLbhHv6ux6Ho+tXofnz9+B773eg562g9i96RGk9QsY6db1T8W9/7gbFwfOYPemx7AUh/D8V+7Auu3DwMwqLAaA27+HfZd70CMdz305qTU7bm1/xPH7fFBV90BZUlyEypnT8Mc3DuPE6ffw3vkOHHznBN58+1309A4gFPZep1g8Dr/P/U1DDMNcO4xZoBx7htH87CP46s96gUkPYt1f1IrkQ/vwfBeALzyJb92u/0JE3PmHZ8+g5VFounW7etELACVVWP74Zjz37eUAetF2/iIwdzGWzgXwxu+x53wpSidJh3TDY+sfHm1/xAkGA4g4PFa1Mn3qJHx88Q2IxWLo6RvA1Ipy1C+7DXfdfhMa9u7HUCjFnlmIxuIIBrP3VINhmA83H7FAeRA//eZT+MtPL8biqmm4fv0O9KIWjz3/Uzw8VS8yrQpLAeAXf4enfrYDLzy7DvfUPYU99oZQVVOLUgCH/ukxPPWzHfjpL1/HcDp1423Y8nAZapf+JZ7+5Q7s+N1PseX/HgRQhdqaaQCWYt23H0QpXsff1d+Bdc++gB2/ewFbvvkQ7vjaCyIIOvXv2fYIiIQw0DeAgb4I4gDUuO4HvAPSWBMM+BGPq56PX6EH1YW11bjphnmYPnUSAGBi6XjcfediNP5hP2Jx6+/zmKhqAvG4yh82wDCMifzunlzR2z+Q9A5Xp6O3f0Cu7gG969U8Sitv0e59fLO2u01+R2iPtu+7y7WpVHbqcu3J3+7Wflgvv2v2jPbcF2rNNic9qe1Op26sR9v3vx/Tlk+1jGdSlfbgPx7UeiyjuLjjyaQy937jV9oZY7gp+k+z7Uy4vG+n9pOfb08+fnlQs76592ohFB7W+vqH5OSMiESicpJBb/+gFgoPy8kMw1zDjNlH2AFAKDyMK739GB6OQJV+Ty0vT0FRQT7KSsen9YaLUTPYK94xWlqKApebh+HeXgzHAZRYHoumVXcYvV36Iz5rXRtmGae2UvafVtsfXfoGhpAfDKAgf3R/h1ImPBxBNBbHhHHFchbDMNcwYxooGSYbqGoCfQNDGF9S5PlJPekSi6sYGAxhwrhi/nuUDMPY4EDJfCiJxVUMDom/SxkY5euJ0VgcQ6FhlBQXIpClwMswzEcHDpTMhxZVTWAwFEbA70dhQTDjDwnQNA2hcARxVUVJUSHfSTIMkxIOlMyHnvBwBOHhKPKDAeQHA56PY2P6n+uKxVUU5Af4jzQzDOMKB0rmI0M0Fkc0GkNcVZFIiD+3ZkVRFOTliY9PDAYD/CsgDMOkBQdKhmEYhnGBX5RhGIZhGBc4UDIMwzCMCxwoGYZhGMYFDpQMwzAM4wIHSoZhGIZxgQMlwzAMw7jAgZJhGIZhXOBAyTAMwzAucKBkGIZhGBc4UDIMwzCMCxwoGYZhGMYFDpQMwzAM4wIHSoZhGIZxgQMlwzAMw7jAgZJhGIZhXOBAyTAMwzAufCB/uPkD6JJhGIZhRsSYBUpN06AmEojHVSQSCSQSY9ItwzAMw4yKMQmUmqZhOBpDQk3Al5cHJU+BoihysasQBYB1eTy8ogCaZqaT11VRFHE3LfkRKfWie0KDBgXWtaWS1x5Ou5W20vbJ3lJQ9qNTS4c0YtmnrpgDJTL1JtSa7LOhhOxtOA1A9rlQQvZZJPPpKPrPBy9vpsg+d0p4eQtUXfY50JwHSk3TEB6OAtDgDwRcA4ldqZjsk6vLPs3qrmqMX/LXErQesjfUJZAIpcAu+2SlirI3lHr28mkrIfurB8/1H4VSg7R/We/ASwnZZwQ16OXTUwoUsh+ZUiv2C1fZ55TRLUdmSsjehVwOj5B9ZpgjzGmg1PQ7SS2RgN/vl7PHAKetkL2uTpFX9rqO7g6Rqsl3iPrPfZ3RbfRHC4/dS1bPQO6sqRuU1akDywgzazCLSsg+fag12WdDCdm74jQg2WdDCS+fQ8zAbffmMJ28t1IN2aevhJd3gZqT/VgoWdk7aM4CpaZpiKsqotE4AsFAcs8p1BaXLBNRlJTFM1JjXJK/ljHW2/BOd4Cy91baONknBQ7ZOyrh5a9eaDayH7E6xeWsdeCihOwzghqUfXbUDDR2n1qplJe3T1f2WYU6lH0ulPDyLuRyuITs3cneiHIWKBOJBCLRGBQoyPN9WH8LxWOhbZFdCgSjvuN0V0L2jInH7iWrJdC4ektF2aetNCI5so24wWwrIXsTKi37XOiIGMsBykrIPot4T8/pjlP2yUolZJ89JWTvAlWXfS6U0H1OA+VwJAqfzwdFyRM92gIJDSx9T9VpQiNqzkEJ2V/LGOsteWPdjMBN65yuNzdO9kmBQvZpK5Gp/+Cg0cs+G0rYfC47lJWQfUY4DVj26Wlmd5yyUq3UnpB9VnGavuxzoYTsMyBVs9lSQvbuUAvJPieBUtM0JBIJhIejCAYDcnYWcZ6Yu9fVNfJ6K/3gl71QSnbw1KvsR7XRHy08ds9babtkbykoe3d1atAywswaHIUSmfr0od5knw0lZG8jlwPwUkL2OcQM3HZvDitVwE5PqQXZp6+El3eBmvPy2VDCxWfSfc4CpaqqGI7EEAwGU/fsFacoWfZpqjEWyV/LGOtteD2w0zqleMSbrtJGGZ5WXg4csndUQvYfHmg2ss+GUoNynE4qmCslZG+DKnj57KgZaOw+tVIpLy+UkH1WoQ5lnwslZJ8BuRwuIXs7TiOQ/eg154EyEAy4TPRqx2nhHbxr5Pe6A81QqRcjMAnMK0YzRZS89nDarbTV6QbSUlD2o9MUkU/2qSvmQIlMvQm1JvtsKCF7G04DkH0ulJB9Fsl8OnSh4OXNFNnnTgkvb4Gqyz4HmvNAKR696j16BBK7UjHZJ1eXfZrVXdWYi+SvJWg9ZG+oSyARSoFd9slKFWVvKPXs5dNWQvZXD57rPwqlBmn/st6BlxKyzwhq0Munp5ndkXoptWK/cJV9ThndcmSmhOxdyOXwCNlnhjnCMQqUHxROWyF7XZ0ir+x1Hd0dIlWT7xD1n/s6o9vojxYeu5esnoHcWVM3KKtTB5YRZtZgFpWQffpQa7LPhhKyd8VpQLLPhhIaEI/HMRgaRjQaE//er3IURUEwGEBJUSH8AV/KQE4TlX36Snh5F6g52Y+FkpW9g45NoFSU5J5TKP2ckSeSZnVXNcYm+WsZY70N73QHKHtvpY2TfVLgkL2jEl7+6oVmI/sRq1NczloHLkrIPiOoQdlnRzO7Q6RSdh+LxXCldwAlJUUozM8XH79pGfGopu/FCJcnoWkYHo5gYDCEiaXjEQj6U5azKeHlXRjhcNNSQvbuZG9EYxMoP7R4LLQtskuBYNR3nO5KyJ4x8di9ZLUEGldvqSj7tJVGJEe2ETeYbSVkb0KlZZ8LHRFZGGBv3yCCwQCKCvJT5jsqIfss4jW9cDiCSDSGsgklejpdGFC5VBcM2brj9FJC9i5QddnnQgndj1Gg1EdgCySUbPq4qkJLJMRaaDCu3vwBcUXkGpeSm0vtHdQYu+SvZYz1lryxbkbgpnVO15sbJ/ukQCH7tJXI1H9w0Ohlnw0lbD6XHcpKyD4jnAYs+/Q0nTvOS5evYHJ5GZQ8pHUHagYWgeyzitP0dRKahstdPZhSMTHV9EenhOwzIFWz2VJC9u5QC8l+jD4yR++cfupqmkjTfTg8jI5LXejq6kH3lT5cudKHKz196O7uRVd3Lzo6uhAeHoaI6VRP06trIl1LxzfhiUA9tp0WYxDZopxm9TB92+Z6BAN+rG+wp9P1RSpvn6qDFzbZt27FpwL12Naqez39g4DGLPuksctz04CmDX6s3HzGIT95zcgbKr5J9mmrBuMc0TTsecKHlZvPCJ/iHCLvrqKtlJ5WhdLNVUpbxXfJPrVSyfQ8HZSuQQxVfN+Grat88Pt88Pt98PtWYOtpUchajvyIVO+UPB2U3vSED/XPttny7IO1Nujl01PNwVtXWtM0KPpPSfsOaK6evmTv9iWXdWrbUGmsss+jJ1op5kg+fRXfGp4O2bsd1IZ+GNVlnwU1upSHZ/HScABj9ZL9GARKRb8jSFbxyFBBb99Acj4duu/tGxDlFb2eoujZqb04kr1iDElc5xnpVm9TfRZJ6SIjlRdq1kvphU3yZ3dtBzb+Mx6t1vP19GxAYzC9w1jSUv2iozX1GohvXdaYvKOae2r3GSidY9bx6OkiIdm7K3Rtx7bVfqxvJE8HeXPultU1fDZUMbqyewX6mjn6VFqFtbtUxFUV8ZZnUQcdqRx5WJdF9jbV1z2Fp4YpXfTn0KHsJW3bXA//hiaxEpZ08qlVcfdi5fRlML/sLpMvFZfOXUI8Kd38or5lnzS2DFSfQNKaJK2pvPZJSofpjb2zeRel7ry8pVnZ21U+ty0eVq+k9OK7ZJ9Kcx4oxRWMHrMlpav+nr4BdPX0YHAohHB0GOFh/YgMYyA0hM7uK+jpG9Cvjqx3iM5Kh+zFVYaU5ngAs9e+jEg0ho33CC/ny23b0snrKrpO9qY24Z82AJ9ZVeWQb1EIyBPmNZCZYnwnZTm1la4SqeYkvnVZE2PtnNRhjTM47Pusj8eS7qlyecs5q8/c9ClUPvfJOytMTzspe4sz5mTx2VDCni55OmRvOaCvu+zNZSGv92r1HkoDMjyg75WZr4n/Gel2r5leH1BKD5oj5Qsl76j0Rb7vDTy39UXs65PyU5WVffdpHL4QTupDHos5B1NpI4W3zM22JhYvr6Hkreq0FyNT0RR5OmRvOyDlkTeGRX70mvNASRFZiK6Sz/PlIRaPY2AohN6+AfT2D4qjbxCDQyGoagI+ny/pEkW+G1CUvXgiGEC+cazEj1utdxXUr7XcE9hrzT+zDauMPL+tjCimQDmzDauCT2AveUoLrLeU24sNgXr8+MxebAiIdoKUD1FPLIFFG7fjxyvWYHV1cv7ZrSsRNNrxY0MjLaECNK5HcPVWbNvgR34ggCcaxaPPYMCP4Ia9oiBE2qe2tJl5AT+CG5qMfL0UnqC8gB/BwHpQCUVvQ6TXYxua8ESNpezXRUlaZkCx9fXEbutc1xttK9Y10dP3SunG2jv53estYxbzFHtqlgcU7P26HwG9zPrdejrtvdKOH6828wP+evy41Tw32vXH8AH/PKxvBLatonJ+BFZvQ7vlXExLT2/DSqof8COwocnMp6vZ3etFvt+PgN+H9Y32q13xiLvNLA8Fezf4sHJzu+HbN69AYMNeoXo7gVXb0K6vCfWmKGYrYp+s6bpvXC/q68fKze32fIvCwUNpw7ZV4jFvwO9D/ZY2M18MBooCtG1ZgYBfKqfno3Ur6vX0eRuagM0rxGNjvw9+/wpsbTU7bNtSrz9SFvn1W9rlAen/pfD62ljz01KxcoZXJtyFJ//HX+KTEygn/a+Bd/bi1+/0JaXLLRl9kipi/PJYoHh55zVxVHmT5ZMhhYq9JE97T+lO3qLUnewtzco+MzXP/ZwHSoGmi66yp2TjG/0Sw+apvH75AE1c7Rh+D9YH6oEdMUSidPwtmmvrse20KEPdbV/3AtZEY4hE4zjxo2Z8avVWtGua6GXOo9gVjYv6sTgizZuwTO9bA/SrN3GVIZIpnYZqplNfn4mJ9k5u1PuylKdyGoCmF7fir/9mLSr1pii9fUs95r+4BidjcURjcURjDcBqP57QgyUAoHE9Wu6PI7pzLbY9XS++b9mEui3bjUAHAE0b5uHX95vtPLqlXgQVAEAbtq2uR8vGU3p+3D5mAHUbLXVRh2dayMcR/ZF4aGesxYt/ZfbVsgktq1ZgW6s+5xVfxzMrtuLvt7RZ1vAMtj29FXUbv45ltrWlq87UHg2PI7CqGZv0scRiDajZMA/rGy3lNDGe7ffFEYub49l6GqBzqWnDXKxf0IBYPI5YLI7Yrlqsr3kcTfo5VrmuQaTHT2HTCuDRXaru44jtfFTsG52T+vlhOTHsenoLVtY8jppdccTiqmizZYXxui6gQWt8HIFVwMtxVfQRbwRW+fB4o56vlzTK25R60t3mFfga/pfe12lswuOY94Q4M4wR61fjBrarc7HO/lVAQ1xFLK4iHj+NNS/ORf2zbSKfynlo0xNzsR7P4pTezj9rf4X6zdaBiL7mPVGLhriKeFxFPN6I2ifmYn2Dnl+9Fg2qyDu1sQ5Y1ygeHcdVxNVGrK3WO2x4HPOeqBHtqNROtWhHGphm9dC9rFRO90f+46fYvmc7fvDdp/HNb2/E5r3n9fxjeH5rE95q/Cn+5v/5f7G77xie/+52HEEcB57bjF+e0kBf776wGT9/J4aug9vx9D/8EN/6zt/jm5sacC4BaMe243uNHeh+7d/xre/+GoehAUOn8eI/b8a3vvtDfOvp7Tg+JNpJNVbCNnanOWektEy6p0P2LgcVl3021DhnJW+omEGytymt5lgFyqRLAbsWFRVAj98iXT/IK1BQVJCvX/XT1X8KRR1q51jT6/BMrAGPzhVpigIATah98lnU6b5q9RrUNW7HrjN6b9a7EMsVhmjfki5SbeXMuYp0oy/dV64Sfe1sNcuLcoDSuhV/v2Ut1tTr7egoaMKPNgDPbLYG0Dp8fWMdtr1oCYErNuHrKwBU1aAOa7B6hZllpW7jKTyj5ymKaKfpxZ1o18eyenMcL6+r0vNpzC1os47VmCy1Y0+nNWmq+RaeWaH76tVYs6IJv97Vpq9xFdY+tRZNL+7EWVrz1l34deNa/Nd1c8w9MJT2NNk3/X4r6jb+L2OfoSzHpriKv6lst5er+RY21eu7N/defTztwivtqFqnIvbMchjn5ooH8Sia0XJa99Zz15y8Jd3iqQylm2cMAKB913Y0rWvEphWUPgdrd6n451Xk27Hth1vx6K5nsdyoX4dNu9Zi2w+3ol2/2iUUw1N90bUC/QRe14iGx6v0c1WsPTa/oN+56yOmc91s1JLehq0/3Iq1u55FnbEM5h6261OWlylZm7B9M7D2G2tRRf8GH29Ewzr7QNqq/gZxdTPqjAHUYc06oPmEeFIgBmoZoHXAuiqwtyPWog4P6u3Q2ghV7F6sqN6qxevl6Cs21IEjg0vwzW8/hX/89goUNv4Gv+9QoCCG0Nm3cWTCn+Gf/uEvcc+EGEIDEcQRwMK5JThw6IRoIXECbx2fiiULAyiesRR/9Y2n8P3v/Bd8afLb+MUrvVAWrsHf3FmK8jv/HN//b5/BzejDnp++iP57/hrf/29P4furVTy3Q7RlzsVUc1nkdMmnWtNU3lA6TC/2WPYuSs3LPoV6Diel6j8vkrwi2krlHXRsAiVdZTvoxYtd6OsbRCwWR8CXJ35FRNPgy8tDPBZHb+8ALl7q1p+X63eSSboMn3nMfBSYv3or2iz5dAB1qKm0pM2pwQJ9jNZydNcirjioHZjpdM1hK6e3o6cbfZGvrhV9Gfmmtu3cDmz8OpZJ6drpZrTIjzgDfszf0AScaBZzHCGaBlQuqAX0QKhplajEVqz00yNFH4I15qNXecxmO9KY9TWomzvb4quwQJ+8sWb3rMGjjdux47TwbbteQNNja7BMXuukPbH6NjS3ADULKqV0DZXV1jQxHk00AE2rRI3YeMNXzt2D9ZbHigH/CmwTIxb1LKrPxPRpKJ1HgIa2002om1tppuv/r6yu1FtvQ7PtztG8ukVjM9og5kRohreUp/OU0nWvaYBWWYM6NKNZX3sj3exFSm9HcyPMd8fSsWqrGE/ydFPrqWY0ow41s0UHRrreofAaKqur0PREnt6PUPOuU69gVXPAhmrQUGVpx6e3s0Jvh9aG9iWlh5wvVLPkz5h1HfyaBs1/A26a14uODj0ncR1uurVYlDda0lBw40LMPHYU70KDduooTlYvRG2ehoKKckTPHsOu3/4CfzgPhMJhe11NgxZuwbtXyjH+ymG8vv8t7BvyYfyZc7iYYox6Ree5WdfAsmaQ1tDq3ZT2TvapVRRN9qmVmpW9u9K5K/s0FHY/NoFSj8vi0oGULiWA0rJxGD+uGGpCQ2//EELhCEKhYfQNDEFNqBhfWoIJE0r0Oxaqp989kCoK6jbGEYmKx28vLViPBaleg6T+rXclepp590GHeSUi0mGm07WGrZzejp5OfVm9nmp6RQHQhI1fF2/isaeLdiE/4qRjp7gqzx5NeKJmPWp26o8w9UeU9C5IY0rWKtZ025hhlLQshVFOAaAoy/H1jcATm/cad86bHq+zrKm5doZPpXpX9vTkPRPnihiIyKdhKlCUdmxbVY+Wjaf1x5MqYvFGPKrPQ8zNVL2i6TNVwvDmeWi0nVIJJ5+sqfLN2qYXS2K2Sp6WSgHMd8cajzJVxFX9rte6F7LqDRitk7emW3z7lhWob3kWp1S9H1XV7zqth2Vg5oBNhYI2WzsJWzuKrWPT2xQO6YpiW0PZU00qb7akQCm8FffUXsCREwqaj1zA/JtqoGAIb//8J/jXFj9uvuezWLOwxCxvrRuNI2z0AyiYiU/etwiltjHoalkX8kJln6y6SfKGWtbY5tNW2mv6N0neRak72afQkQ3Pfq5b/RgFSk0Xq+qXBQC0hPhdpfx8P4qLC2xHfn5QTBhIunoX1cWVFujKRPfLfhRDJHoKz6zYiu0N1nx9OJq4anBSccVoXqEkpaMZLfrrbVROH6RVkrx1CCJdAxq3Y9tjf4tHq810QquuRQ2a0EIvI+q0b6m3v0Y5QtpPNAMralAFAK3NaEEdasSTV3MOOvIcktLlueslzXyznKZr5ao1qNv8AvY0bse2FWuwqlrPFwXMNSefpFWoWQC0tLRJ6RqMc4TSLeeMMRgxIkATd3A1Cyot+VTGUt6WbvXWfNjrptCquXVoOk2v/Ip08R352ahZYU83el1RiypLSdEOebOcvT37CLT2ZjShFjVzdQ8xdLO0SDTTK1GzAmhuEb/rqFF+4+PpvUapd6LNrUUtnc8plpmO9lNNQE0tqixpRhlDHQZsUVs71vIaxEpZ0snLK55aNcPHErofOoS3Tk3CjFn2vTRLiu80aKi96Tq8e+RFHD55HW69XoOGUzjcMhWfWr0AU4p96BsatvWJREz4CbMxz9+F8KSPYemSW3H7ko9h6fVTUZBq7Ja56pmW9EzV3o5xyN7toDb0w6gu+yyo0aU8PIuXhgOYq53kxyBQWi8B7GpejYtytqshWRVY7hJIxaWAoiho37oS+cF6/PiM8IqiQDmzE9sbzdctbVcZit66rpC8qVTHki6/3gZg72b9EaWiiFkLSfJGspHdjm1Pb8Wj94v7NkonFHo9crX5CBStW/GfNwCAFD09UBSgacOPzHexKnvxow1NqLt/NSoBKHOvF0FZvB1SjO3Z7Zb6lG4fpXXNDC8WzVwz2nabV6DMXYv/um4rPrV6K+ruvxdVlG7ZY7tP1roH1qJpw9ewzXiH8148EfDr78i0nCu2c866EwqgVKF2BdByQrxmCUVB04YfoGWFXsYcvF6HqpO35sOYu3XHySsAKlc9iLrNKyzvYm3HtlU+BDbs1f0crH1qLbatEu8AFvWasH7VVqx9ai2q9K7q7pdeazy9Df9js96jovemANj8ffE7rwAUpQ3bfrgVWPcglhvlTDWwpVdh7TfWomnDX2ErvcauNOGJVVtFOX0J7Kqvu82L1xq3/nAr2ii/dSt+sNk+kMp5dUBLM9poALsfxw9alusDpIFaFDDnqqsCezsKrO0o5s8VfYA2Tz93jO/IW51IOb/7J/jB5p/iB//wB4RX3I9VE1PXtPnrF+GGdw7j3YVLcAMUKJiHm2vb8fNNz2Hzxp/gjaESYyxTF9cg8Ppv8OxP/gN/7JuJNV+5Feee+wd8/yf/HzY//TSefa1L9GDMxVTaRz3RTDc2W/KOSofpaU/t3kWpOy9vaVb2djV/pid5WL1Yf9mL75J9KoWWAxKJhBaLxbSBwZA2HI1pEY/jaMsZ7Y2Dx1yPoy2tSfWSjuZN2jLpAuKvd1jLvKz9Neq0Z5q90uiI620+qr1E3lbP0s+PNmnLsNZSztJuTNSLxhpEWgt50X6dXi9qlEvWEz+qs8xLtGHk71yrYcUm7WQsrkVaNml1+vfRlk1aHR7VXoqJsi89Bq1u4ybtUcu46zae0qJ6ftSoY+/npcfsfdJxcqN1TNDwWIORJ/qyt50qLRqLi/FjrTHOER0719rGUrfxlBaz5Fv7pvSX9TTysViDbW0e3WXO8dGdcS0W18vF41pM6g8rNmmn4qLtWLpqW2toWNdo5sfjWiyuJvXz6C49nfLjce2UdR9WbNI2rdPnZc1ft0nbtMLaV4OlndP2vKR5qXpZVYvtchpP8hF38PG4qr28ztrHs/qYT9vKNVjLrGvU4i3PivVa16jFbe01amtt467TNrWoWlwVZTzb0cul0gsXL6dMJ33jf39X+8nbqhYf6tMGIpZ8OmTvcYR7+7RwLDk9HhnSrvRHbGnDUlk1hV64eNniE1K+k09oqoPPndLh5S1HwsHnQD+Yz3qV/HAkivc7OhEKRxCLx/V8QcDvQ3FRIaZWTEJhYb443d2bE0rJsk9Tjbm0bsWqmhb8l5j5Wl02ad9Sj/+Mf8ZLj+nPO3NI0wY//n7eqbT7ovWQvaFJn/1qWW+Pz3q16tkt9Zh36m8Re2a5kS41ZPbs5dNWQvZXD57rn6a2b16Beae/hfjGOvtsLfs3qg5GooTsM4Ia9PLpqZLis187OrsxtaIcmsNnwR74+fdw5Ka/w1dvlvOplTH87FcZBei4JMZPPsW0s6OE7F2g6rLPhhKyzwxzhGPw6BXmchgx2e4L8oOYM3sGFtZU45aFC3DLwvnGsbCmGlWzpqOwIKiXNx9q0+tQstc0kSaalzy9fpXq9S59TCKbvD5a6+tluorkZJ+eikdOX6NP4oFAzzaQ7Jgi922MkVT/JskbaplryjXQ9E8jasKj99clr2nD4+IX8f0+8xflDb8eTdKeJSkcvO1ckb1QvaKHirZt/vQWrLS9c1Z6F+1pvVzyaiap+C7Zp6dUk5y9Bw2WKVi8VeHgHfWU+BAA8zNjrboCW0+Z5emQveshl6UGZE/lDJ+ealYP4RVFgZpIGN4op/uqu1bhE7NMb6pm8/Qle7cvuaxT28leaEI1/7KQZh2705wzUvGt4emQvctBxWWfDZXPWfnc1sQMkr1NaTXH6q+HpHlLZ7tDpOQR3hHKaoxN8snon2FqSanbmP5d2NWMfEdprLeO8x2g7L2VNk72hmIP1vv1dV7XiNjGZQ47J++Yl796odnIfsSa4o5Q9tDEJ9zMO/UtxJ/RH/QmNTQCJWSfEdSg7LOjqe4QNUelUnbf0zeA/GAAhYUFttYJ2WcV6lD2aWooPIxINI6y0pKU+WlNSPYujHK4rkrI3p3sjWhsAuWHFo+FtkV2KRBYAofss6GE7BkTj91LVkugcfWWirJPW2lEcmQbcYPZVkL2JlRa9rnQEZGFAcZjcfGHm4uLUFAQRJ6iv1vEoXzSgGWfRZympyU0hCNRDA6GMLF0HAIBvz4MujCgYaW6YKBHw6KE7LOnhOxdcJpwLpTQ/RgFSn0EtkBCyel7qj6q5hzUGLvkr2WM9Za8sW5G4KZ1TtebGyf7pEAh+7SVyNR/cNDoZZ8NJWw+lx3KSsg+I5wGLPv0NN07znhMxWAojGg0hhz8uMw6iqIgGAyipLgAAb9fnnZ2lJB9BqRqNltKyN4daiHZj1GgzBXOE3P3urpGXm+lH/yyF0rJDp56lf2oNvqjhcfueSttl+wtBWXvrk4NWkaYWYOjUCJTnz7Um+yzoYTsbeRyAF5KyD6HmIHb7s1hpQ7k6Si1IPv0lfDyLlBzXj4bSrj4TLofg0AZTN2zV5yiZNmnqcZYJH8tY6y34fXATuuU4hFvukobZXhaeTlwyN5RCdl/eKDZyD4bSg3KcTqpYK6UkL0NquDls6Pp3iEKpVJeXigh+6xCHco+F0rIPgNyOVxC9nacRiD70WvOA2UgGHCZ6NWO08I7eNfI73UHmqFSL0ZgEphXjGaKKHnt4bRbaavTDaSloOxHpykin+xTV8yBEpl6E2pN9tlQQvY2nAYg+1woIfsskvl06ELBy5spss+dEl7eAlWXfQ4054Ey5WuUsk+pVEz2ydVln2Z1VzXmIvlrCVoP2RvqEkiEUmCXfbJSRdkbSj17+bSVkP3Vg+f6j0KpQdq/rHfgpYTsM4Ia9PLpaWZ3pF5KrdgvXGWfU0a3HJkpIXsXcjk8QvaZYY5wjALlB4XTVsheV6fIK3tdR3eHSNXkO0T9577O6Db6o4XH7iWrZyB31tQNyurUgWWEmTWYRSVknz7UmuyzoYTsXXEakOyzoYSXzyFm4LZ7c5hO3luphuzTV8LLu0DNyX4slKzsHXRsAqWiJPecQunnjDyRNKu7qjE2yV/LGOtteKc7QNl7K22c7JMCh+wdlfDyVy80G9mPWJ3ictY6cFFC9hlBDco+O2oGGrtPrVTKy9unK/usQh3KPhdKeHkXcjlcQvbuZG9EYxMoP7R4LLQtskuBYNR3nO5KyJ4x8di9ZLUEGldvqSj7tJVGJEe2ETeYbSVkb0KlZZ8LTUUoLP7CRlFhgZwlGMsBykrIPot4T8/pjlP2yUolZJ89JWTvAlWXfS6U0P0YfYSd3jP9QFcUkZahFwGB0hUo0TAGB+Minf6iBKWHE1Ci72PfjlfwTpcl30HpsHvY8kyvKxy8rmLoyT4jjYYxGFKldBXR/hCiqlkuE7rfbMDPdnv/5RG5afKUbI7JwXcdwS+ffx2nkvKT14S8ofKaKoCitKPx+QYc6EqxF0lKe5nshabwdI55ees5KPm0lVbR4hXr4pHPkiqpvGKm27wCkZbSm4ok34aG517G/sv2fFinL/s01daRflzouIwLHXpntnzHAdpUcfDpqb5yDp6+ZJ/NL+pT9tSj09iSfbLqJskbKq+pvPaeKr6F8W+QvIvK506KYZGObHiWcx2SvxruKM9fuIS9rx1EXFXlLACA3+dD3V0fw3XTK+wZ7fvxb/uBT37uNswG9OnFcXLPTrxTcif+9KY+NO1sx4Q7l2HxZMq3XDoo7neEXqq43jFSsoOnXmWvK9H1ZgN+NzgXX77H+hF6bWh8/jQm1NdjyWRLcpp0v9mA3ya1mZozf9yFc9NX4e5qOScNLh/BLxtCuOnhpZhvy1DRd2w/dh3vQdwHqFoxbvrEXbhpis9WitbD9G1osMxb2s1kzfodoVODlhFn1mBGum//UfT09kNRrNe3lC/OpbKycbhjyY0p8lX0NR/AS0e6EfYDajQPFbVLcP8tkyxlLYQu4rU9B3FiMA++RAyFM27GA5+chUJcwYHtr+JoyFJ2yiJ8ZUUVFLTh5edOoXTlSs/9IWRvgyrIXtdTbecADZhXPTO5A9Khi9j/ymEc7wV8iCGaPw33rFqCykKH8vKAZJ9DFONRsd2bw0q+80tXqQXZp6+El3eBmvPy2VDCxWfS/RjcUVovAexKV+NdPf2OQRIA4qqKy1f67Ff3igJl1jTMQDfOtFM6gMQ5nOssQOWciVAKqlC3pg4fqxB1RDWLwsGnqXDwQsXYHb2wyV5XQvajgcZgeoexWDQejSGmJqfLqsiettnm9XLRM3j1aBQLH7gPf/6n9+KzC/048tYJDNn2yNxTq9cbtOS7KJ1jsk9xDpJ3V9F3Sk+rSOn6jyPL6hp+pDoUCuO+lXfhvvo7xLHyDtxbf6fh7//UnRgaCkNR9BEpojb5gXABbr1vFb702fvw1VUzED1+EAcuUzmrDuDIngN4f9ISfOnPVuNLf7YCd8wqQpECKEhARQEWrfw0vvrnn8ZXH/k0vlpfZS6DjvCibxjLaHrqkNKlAehtOXhDqU/hFSlfgQIkQkDFx/AXX1iNL37hfiwb34ndr7WJvm31JC9WzkinL0rPxRf1LfuksY1AdWN6eU3ltU9SOkxv3zvyLkrdeXlLs7K3q3luJ3lYvZLSi++SfSr1fec73/kOcoCmaYirCeT58vTOnOno7MbUKeVYdc9S3LxoPqAgyQMw/1wMoZQgr6sVx0PjsWhmsUg7exKvdY3HbYunoBDn8MoLh9E3ZTamFALAMM7v34edrx3FgaOtaOvJx+xZ4+G/dAzbGy6gpGYqxgPA+0ewveEsgvNnoEzRgP4T2LWzHYGa6ZiAcOo20I93dr2JS0Uajje8ir2n46haUIH8FHecsnfT0IUzOBmdiJurSi3pvWg9egUF1dWYXqSh//Ar2H0+D+qpt7Dz9XdxsPk8+guno7LML66JQh04sPs1vLS/GUfPXEEiEUW3ItqEGsLFYwfR8Md38MbhFhw9fQXBaTMwuaAfR3Y24e3uBHo7z+B4ywUkpszG1MJhnHvzNex67Sj2v3Mabb35mD1rAsRzA5G385V3sP/d8+iIqxjuzcPURTNg27m+izjSpqHq5uswAUBA68HJ9gRm1kxBobUcVPS1HMBvdx/Em++cwXsDKmK9EYybMwfTiwC1qw37XjmApgPv4uDR03hvcAKqZhQDp97E/90/iMq55fq4gO6De/Fiez6un+HH2T/+Ab957RgOHj2D93p8mDGrFJZfYoIy2Imj+95Aw5vH8NY7p3HikobZVeXIVwAtHsKZV1/B795oxpFjp3CspRXvHD+NnuBcVE5U0de8H7/fcxBvHDmBE+8NonBaBSbmK8mXqCnV6Q5VuOYT7RgKh3Gxs1scl7rRQd/rvqe3H/OrZ8sNA1Awfto0lOWL71EYw8Xj7yNaMQ9VpdJA3j+GvacK8fH6BSjL0wAliPGlRfqYOtB8cAhlt1Siwgf7JTp6ceboFeRXV+O6IkC5dBS/3t0KpWImygtSTHcEOhyJwu8TTx6u9PQDAMrLxgMAIpY8QK8QnIjrphWLOwJFQaD3At7tCWLe/MkIpuogW0rIPotQd7J3Vqc7VNmbKbLPnRJe3gJVl30ONOd3lGKK+kQVmnA6ni41ZG+qovgxe1oZopc70A1xd9Dxfg/yp05HuaJAUVREI1FEE6LOlbf2obGjFMsfegBf+twy1EaOoel4HMrUiSiNdqNDfy3zyvluhNUenD0nxhQ7dwkdheMxXYGtjS9b21ASiEaGcOzABVTcfS++eG8NSgEoin7Vos+FfCI0gMu9YSQs6SmVVsAlPR6NorPtDAYrl+KLX7gXn7+xCOfffAvNEQDoxYGGt3AyMA+f//wD+OKqagQjMb02AN8gOocm4e4HPoUvP3wv6ib34Y03TyKKYlz/iUWoLgSm1dyJP6lfgusnAN0HXkNjRynqHnoAX/n8MtQOH0PTu6K9K2+9hsazQSx+4F58+U8/iZvz44iIJTTGCgBK+TzcNKkHb+w5g+6IistnuoDrZmGStEaxU/vx4qFBzPzkffjKw/W4pxwIiwJQFCDRM4Tggtvx5w9/Gl95oBq+toM4cBHwV09HRd97OG68ltmDM++FMGnadCinDmFP9ySs/vyn8ZXP343Fs8ahRL4THehB35Sb8bnPfhpf+dxiTOs9gT+8K/5O6tC7+/GHoRn47J/eh//0p0tQHQhg/t2rcfdcBdGT+/G7d+JYcM99+Moj92H5pB784aUjuKgmn7tWVYcG0Nk7DFWfG53viuTz8wO4ZdECLF60wFHz84P6uZF8VSw0gdhAF06+0oJzRbOwcJZ+fsK82o5dGURo4mTMCnfizPEzaG7vQkwVQ9HPXnQePoCG3fvw2juXEU7oIxQdiO9DbXj5lUuY/vGPY8FEkQb9at82/UQEfT196I+IC4IUy2PT1vbzePPtY3on5vHm28dwuv28PZ06VBRAHUbvuePYe3oYM+fPxbikAUHMLYUfkYoVNTx90U5k40tuyeiTlO5EDU9j9PLJa0DeUcXJkexdVOwpedpj56cQSSqfSym6l31mat6h5jxQCvSwr1H4T/aHj57Evzz/O/zLL17E4aMncPjoCZsHNL28qZqmAdVTMSN8BWf7NGjaJZzpAGbMmgwYf4NQ3N1Cu4SW92KYf+uNmOzTgLxC1MwqQ2fHeWjaVFw3aRgXzw8A2gDOX/bjppoSXLzYCWga3u8aQvmUaQigU2/jJtGGr8jShgZARWn1bVg4yYc8n1hakS5p3yWcunAFXZ2X0NohXuwxVoK+IW+3zpTPxpLKIgA+FNXMQ3VhDy6cBXChDSdDk7BkWTWKfADyKzBnsvV14wrctLQa5fk+qKEQysaXAKEwBuBDoKQIQQD+gnEoGV+EgM+cf4Uf0PKKUDtbzB/oRPPZEGbefBvmlfgAXwBTZ5Wh0DIncw3yMaN6EvJ6WtH46x148f0JuOuWMtsaaYjhRGsXgnMW4/ZpedA0H0rmV4g7U31vg/MWYknVOPjiUQwlSlBaGEN4AEDeTNTMVHHm1CXRd+d7aFenorZKg+bPgy86gEuXo4CvEDNmTzLPJdLpC3BXbTmCvjiiQ8UoHweEBgcAaLjUNYCSiRUoytMAfwXKi0M4d64P0KI42dqF4OzrsXBSHoA8VNx+I+bjAo61wrrDdu3twMnz3ejq7EBrR9iWr6+GvbwlPbXKNezpfYdexf/Z9SZevRhA7Y3VqPCJdA1inzQA/UMRoL8Vv9t7Fj2IofvYm/i3nS0Y0AAtWozy6SWYNrsGS2+Yhnj7G/hVUxsi1o4TXXizoQW48S4smSI60PR8UnFE8P577+N85xWcf68DXVFrXooDwA0L5kDJU/DH1w8bk/rj64cRCARwQ80ccwy2Ds9i7/Ym/PqVNgxOrMbHqvU/AG/kC9WsHqa3rbDsHVWzefqSvduXXNap7WTvMFZ5jil8+iq+NTwdsnc5qLjss6F0LsveUDGDZG9TWr2xCpRJlwKSQsHNixbgyw8/gC994QHcvGiBzd+yaAEA6aqf1D8LsysGcaFtEEpXB95HBeZcR/nUvQJFCSEciaH19Zfw/Av6cawPwaAPiuJD1ZTx6O7qhBK9iPPxUkyrnIhxnRdxRbmMC90BTJsxDgqG9DZ24fkXXsJzv9plaUN0lp8fNPp00uHhuLEBajQq0nVPYyYkmyY++AD0D/YDQxFEC4tQbn+fDGD0paK7+U388j924TevHMORy3Ep38Rcw1147lf6cbQPwaAfwBDCkQJMKBNroVjqm17X9w/id+8EsGzNSvzZQ8uwfEofdu04hiv6non9GkR4GCgrK9M9nS+iYUVRgMH3cWBXA/79t6/hlSPn0StuyQBFwfSqCuD98+hQFHS3dSI6bSamQYEyZwnuXxRA895d+PlvXsfBcxEY5xZpvAfNe3fj33+5Fy+/dQoXY7QRCmbOmITw+23oVBUg1Iaz3QFMmToBUAYRGgbKyibqhRUAfgR9QE9fn2hbDJ5WEwqA4Yh5LsQj+lgs+VaNRGI4dPQkDh89kaSHdY1EYkbvinRVrChA6S134y8+dx++eu8M9L3ZhD1tqi2f6qJoFlY+sAS3Xl+DT9y7CJWD59F8BVCCFfjY8iW4fto4jJtWhWV3V6Oo4z2cjFJFFe1vHMCxoRhCoZjYstT/5AElAv30BxIxDMfkgUgKobctXojiogIcPnoSh46eQHFRIRbfJH5GWMuZd0Ozseyzq/HlR1bjnuL38OudxzFgyReq2L1YuRReKH1Rei6+qG/ZJ401yXurbkwvrbGjt54kkhd7K3sXpeZln0I9h5NS7ee+6RXRVirvoGMTKPWrdUfVD7qql71x1aSJOnbNw+wpZei+dBFd568gXF6BqUY+da9B0wLI8wVQvfRTePhBy3HXTGiahsDMSSi/0oX29m4MTJ6KieOmYqrWg/MXLqMb5ZgxSYOmBfU2Vom6D+l61yz9LojuhoQafUu+oKwYhT4Aig/F48Rrq9Z8q04sLQEG+nHJmt4fRgiFmDDRLGcnhqgKjB83XpwlkWEMykWgL3/bW/j9cR9u/8wqPLhqKe6cmW/Pt2Cu4So88pDl+MRMUUCJYWDQOmeqZ1+bS2c7kTdlFibnadD84zD7jkWYH7uI1k6xZ2K/fIAChIYGdE/ng2hY0/pxeO/b6Jx+Jx5ZswyrPjHfeM1M0zRo0+ZhXn4Hjp/px7lOFZWV9JQhD+W1H8dnP38vHlqUj9ZXX8eRPsu5pmlof/V1HPFfjy989h7ct3wxZhdAv8TUEJgxFdPRjT/+bif+/aV2BG68A3dN1wDNj/wAEBrqF4U1DYAKFUDZ+PGib+mc1wAUlJrnQsn4Yr2cPg9JxaPX+bh50YIkvVnX/PyA0bqmuei4OZhdrqLzYldS/sSyEiChQqX0vBKU5CegqfrILdPQCgIIIgHVuBmOIVJcgz+rr0K0+SCODlmWI0nHoWy8eLrhCxajtEiMAfRv16Jmx0IX31iD2TOnonLWNCy+cUFSPvSx230eKqonYVzfFZyLmPm0L+l5oeJcTvaG0pfsM/mS63r0LY8xeexOHuL8t3inNXRTea/kPbSrKJrsUys1K3t3FW0n+zQUdj82gVKPy+LSgZQuJYDJ5WU4evw0/uUXL+qPWk+KR7G6P3a8FZMnijsLs555F5A/rwIVPRfxZkcYM2bPMO4+zO4UQJmG6qkJtB4/g7D+mqWixhFNiHaUCZNR7h/Eu2cHUTF9ChRlImZOGcbF4z0YmDQF0xQFijJVb6MV4YS42lDUOGIJ0Ye4cjGV+pY9gqWorK5E7byZmFUmflDY8q1aWY6K8AUcPRHW0yM4e+g9dE+qQKXPLIdoFCH9jcOhk61oj0/CrFkAZpWjItGN5hbxy9mId+LMZfM1ysHeMNSicSj3A4CKzm4zpCqKH0E/EImK8tb5h+h3OOMxxBIAMAMzylWcP92KsCLauvxeD8KAfR8AFBcWYLCnU+wDACXcg+5IAPn5ljVUxqOyIoDus2dwOaFAUVSEWzvRLRqCovTjylAA48ryRfnBK+iOmP0oyngsqpqAiy0H0RqpwHx6ytB5AecGE1AUP0pmT8eUYAyhYesd5SD6BlUUTpgIHxQo6mV0D0A/hRX0Hz+D7mmL8dCa1fjzNXVYVjtBZCrjUX3dOHSfPYXOuOgrdOIUWiOTMLvSPGcNpX8T+WWomluF6+fPwuwy/VOsxCyMq1nS8eOKsaPhdexo2Oeo48cXJ9VTACihdvyh6ST6VN3HL+PKAFBYXAQFQLj1CP5wrEtcXc+djcqhCzjWqQrf+R7ORsZjSrmKy4cO4OBFVV8OFZcPv4fOojLM1JcBKMCCm6pQPPl6LJsTwduvtiJknb4Yku4VTJg2AzfUVKFmTjlKUiyTdbkOHT2Ji53dhr9t8ULctvgGw1/s7MahoydEJQXitdQDb+DVcxHDh7oGEfIXYFy+vjLGgEyfniqunr5kn80v6lP21KPT2JJ9suomyRtqrLHk01baW/3ntOFd1HbuWHwKHdnwLHegkPzV8HuUI0cRlxiI4d2XXsKBvgrUGb9TCQDtaPzFaUxYsUL8vqHajXca3sLBfiDoB1TVh+rb7sYds4MANJx9ZSeaLozHHZ+/C/PzNPF7mq93YvLie7FqQZ5YzfhlvNPwttTGMtwxewBv/eY19Nbej/oFKX5vUvY0etnrSmgAwmfewu/fvIhBXwA+NQa1cBqWr7wVs/U3Ina/2YDfXsxDeTyGASQQjeVj/p134s5Z4hNLQidfx/a3u6AGAvD5CjF/MnAsMkv8HmWoDY07juF9BOBTApg2owTdFwuw/DM3oRzA4JG9+OXxEILBACoX1+POWV048rK8hstwZ2XAaOucGkDQ50PpzDKorTHUyL9Hqfbj2N59ONjtQ2lxHnoHY6i44Q6sWijevUgoahf2//51HBvyIRjIQ3DSNJT1dmLCnfVYMlnF5QN78eLpCILBPASLp2E63kN4zgOon6+vY+I97P7lEVye9XF8fmkFFCWGrmMHsedYN6L+PEBNAJNq8eA9VSiybEDo5Ot44e0eIJgHBMowf0I/2gtuxedunwh0votf7WkVe6EAgA/jKuZg2Z3VmOCLoP2VV7DnfAzBIKDGi1B9x+24a1ahZWezpUSaXg2h9bV9ePX9CHz+PKjRGPIrarC6bj4m+GJofnknXg9X4aE/WYQJAELvvY1d+zoQ8udBjftQueSTWFZdgPj772Lna63oRgC+RAxR3yTcds/HsbDMl/x7lOoF7P6Pw1Bvvgf1CwrSGr0NqqBztLkVPb39GFdC78C1MzAYQtmE8Vh0Q7U57QvvYse+dnRrefAhgahahAV3LsVdMz3ehkvIPoco/HuUyd2OVAkXn0n3YxAor8K/RxkJYTCSh8LxBUjx0l16ZKONjNA/jcdfhJIie4/mBwjMQrQ/AhQXISgPSo0hPKQiaBmvCOAaFKiI9EeglBQjkJfi11SiYQzEgxhX7DfTh4cwFPGhYHw+/JaN07Q4YoNhoLAEQX+KX8C3+kgYg8MagiXFCPqcd1ANDSLiK0JRvjwpfR/U/KQ1EXTi1f84jODdK3Gb7YMZVET7QogXOLQJAPFhDIZ9KBlnvdBT0b53N5qnfBKr5yQwOKwC6MfbDW8juujTqF9gqTsEFE0osD2ySXWejlSpQU36bZKkgrLGYwgNDcNfNA7BgCU9NoxQIoCioHjkLdJVRHsjwLgiBPPs7aihAYTUAowrCVgGIEMVvHx6mkhouNx1BQlrExbyFAWTJ5UiL88HSIEiHupHWC1EyTi/nk6tUjknb5+e7LPK6JYnMyVknwG5HC4heztOI5D96DXngZL/HmXy703KfkRKvWgaruxvND5px7xiJKjktYQKqBB3nL0z8flV81GU8vT3UEsgEr4H+7e/ifDH6nH3TBFQtNhFvPKbI/AtXYW7rpMbyFRTRD7Zp66YAyUy9SbUmuyzoYTsbTgNQPa5UEL2WSTz6TjdocreTJF97pTw8haouuxzoDkPlPz3KHNP36E/4MXBKjzyiVly1qig9ZC9oUmBRN5GCuyyT1aqKHtDqWcvDw0YPIUXf9uC7vHXYfnyxZhZlDRyy6xk706o9QBePNCJRMkElPuGcWkQmL5wCZbXTpCLjhrP9R+FUoO0f1nvwEsJ2WcENejl01MKFLIfmVIr9gtX2eeU0S1HZkrI3oVcDo+QfWaYIxyjQPlB4bQVstfVKfLKXtfR3SFSNfkOUf+5rzO6jf5o4bF7yeoZyJ01dYOmqkMDCCvFKCnMS9GBZYTpNph1JWSfPtSa7LOhhOxdcRqQ7LOhhJfPIWbgtntzmE7eW6mG7NNXwsu7QM3JfiyUrOwddGwCpaIk95xCbXHJMpE0q7uqMTbJX8sY6214pztA2XsrbZzskwKH7B2V8PJXLzQb2Y9YneJy1jpwUUL2GUENyj47agYau0+tVMrL26cr+6xCHco+F0p4eRdyOVxC9u5kb0RjEyg/tHgstC2yJ9+aUOCQfTaUkD1j4rF7yWoJNK7eUlH2aSuNSI5sI24w20rI3oRKyz4XOiLGcoCyErLPIt7Tc7rjlH2yUgnZZ08J2btA1WWfCyV0n9NAGY3F4fP59D8LJAcSGlj6nqrThEbUnIMaY5f8tYyx3pI31s0I3LTO6Xpz42SfFChkn7YSmfoPDhq97LOhhM3nskNZCdlnhNOAZZ+eZnbHKSvVSu0J2WcVp+nLPhdKyD4DUjWbLSVk7w61kOxzFigTiQSisTgUKMbnnmYf54m5e11dI6+30g9+2QulZAdPvcp+VBv90cJj97yVtkv2loKyd1enBi0jzKzBUSiRqU8f6k322VBC9jZyOQAvJWSfQ8zAbffmsFIF7PSUWpB9+kp4eReoOS+fDSVcfCbd5yRQAkAikUAsHkc8nkAgGEju2StOUbLs01RC9tcyxnobXg/stE4pHvGmq7RRhqeVlwOH7B2VkP2HB5qN7LOh1KAcp5MK5koJ2dugCl4+O2oGGrtPrVTKywslZJ9VqEPZ50IJ2WdALodLyN6O0whkP3rNaaBUVRWxuPhcNb/fLxf5EOC08A7eNfJ73YFmqNSLEZgE5hWjmSJKXns47Vba6nQDaSko+9Fpisgn+9QVc6BEpt6EWpN9NpSQvQ2nAcg+F0rIPotkPh26UPDyZorsc6eEl7dA1WWfA81ZoNT0x6/xeBxqAgA0+AMB10BiVyom++Tqsk+zuqsa85D8tQSth+wNdQkkQimwyz5ZqaLsDaWevXzaSsj+6sFz/Ueh1CDtX9Y78FJC9hlBDXr59DSzO1IvpVbsF66yzymjW47MlJC9C7kcHiH7zDBHmLNACQCa/qYeVVWhJsQnsfvy8qDk0YdQ5xqnrZC9rk6RV/a60g962aenVE2+Q9R/7uuMbqM/WnjsXrJ6BnJnTd2grE4dWEaYWYNZVEL26UOtyT4bSsjeFacByT4bSnj5HGIGbrs3h+nkvZVqyD59Jby8C9Sc7MdCycreQXMaKAERCChYagAS+gc1kjIMwzDM1UzOAyX0YJlIJIxD08TfUGMYhmGYq50xCZTQgyV1xYGSYRiG+bAwZoHSygfQJcMwDMOMiA8kUDIMwzDMh4VcfWQOwzAMw3wk4EDJMAzDMC5woGQYhmEYFzhQMgzDMIwLHCgZhmEYxgUOlAzDMAzjAgdKhmEYhnGBAyXDMAzDuMCBkmEYhmFc4EDJMAzDMC5woGQYhmEYFzhQMgzDMIwLHCgZhmEYxgUOlAzDMAzjAgdKhmEYhnGBAyXDMAzDuMCBkmEYhmFc4EDJMAzDMC5woGQYhmEYFzhQMgzDMIwLHCgZhmEYxgUOlAzDMAzjAgdKhmEYhnGBAyXDMAzDuMCBkmEYhmFc4EDJMAzDMC5woGQYhmEYFzhQMgzDMIwLHCgZhmEYxgUOlAzDMAzjAgdKhmEYhnGBAyXDMAzDuMCBkmEYhmFc4EDJMAzDMC5woGQYhmEYFzhQMgzDMIwLHCgZhmEYxgUOlAzDMAzjAgdKhmEYhnGBAyXDMAzDuMCBkmEYhmFc4EDJMAzDMC5woGQYhmEYFzhQMgzDMIwLHCgZhmEYxoX/H4+3KeAgdZe3AAAAAElFTkSuQmCC)" ] }, { "cell_type": "markdown", "metadata": { "id": "5mxWSYaAsLfg" }, "source": [ "# HuggingFace에서 데이터셋 가져오기" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 190, "referenced_widgets": [ "f3f714a45faa410aa8fe97afbfad4bc9", "81452c23ddc74b73b00683e6e0ffd62c", "5a457b732e6f41d6bdd60d3e319e16b3", "3ec60119a667432591ff84d689498d64", "ef3bfefab1894c639c1efa361e02ad1c", "93bc3ada38bb4544a20337de5b480c80", "3997e390204e4f93ad584bc43a763092", "e3d10614ae6840b8978dfe947d52794e", "25859ea43ecd4c36ababe00bd9d9a9ee", "ea835a895f704f68a5f2632a0ab69a5e", "fb9b3e0f77724dd49226198162394de9", "c3a5b2d6e7014e6485397b1cd800b858", "ea1fc5e0e5b445cfbe8f3546014975de", "3efe0e4cfa204ff5bef1bb1460149e2e", "322b8fffe4e5498188bc56ba62ecccf4", "cfd3b1e036dc40c4ad31e15e523236e7", "98ad3eaf942348e59aea1964abd2202c", "5d4799296ede4114a5bad2648def9ca1", "cbbb14ab13854610858fadba2e4ca5c2", "afc57fb7a5264dea966c34249a91bacf", "ef23cddf9b8748c0a7722500c2af0410", "720b980a7f644059ae593129c7e8e0ed" ] }, "id": "8h-5xlBJsAwb", "outputId": "3598fb76-3221-4ed2-9323-6945dcb5225d" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.11/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", "You will be able to reuse this secret in all of your notebooks.\n", "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f3f714a45faa410aa8fe97afbfad4bc9", "version_major": 2, "version_minor": 0 }, "text/plain": [ "data.jsonl: 0%| | 0.00/12.0M [00:00 EEVE 계열 모델 구조에 특화\n", "- lora_dropout=0 // 드롭아웃 비활성화 —> 안정적 학습\n", "- bias=\"none\" // LoRA에서 bias 학습 안 함\n", "- use_rslora=False //Rank Stabilized LoRA 사용 안 함 (보통 False가 안정적)\n", "\n", "### 토큰 설정\n", "- 생성 결과 에서 발화자 구분이 필요할 때, 생성 멈춤 조건을 걸때, 생성 후 후처리 파싱할때 활용하기 위해 미리 확인차 설정\n", "- 각 항목에 대한 숫자는 해당 모델에 학습하며 이미 설정되어 있음\n", "\n", "### 학습 파라미터 결과\n", "- all params : 해당 모델의 전체 파라미터 수\n", "- trainable params : 이 중에서 LoRA로 학습 가능한 파라미터 수\n", "- trainable% : 전체 파라미터 중에서서 학습되는 비율" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "EXfC3BpLtN2f", "outputId": "f4267ffc-bc6d-4465-cbd8-8f0bf80c51a8" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "trainable params: 62914560 || all params: 5633347584 || trainable%: 1.1168236836422412\n" ] } ], "source": [ "# Gradient checkpointing 활성화 -> 메모리 최적화 기법\n", "model.gradient_checkpointing_enable()\n", "model = prepare_model_for_kbit_training(model)\n", "\n", "# 전체 파라미터 대비 LoRA로 학습되는 파라미터 비율 확인\n", "# 보통 수백억 파라미터 중 수십만 개만 학습(약 0.1%~0.3%)\n", "def print_trainable_parameters(model):\n", " \"\"\"\n", " Prints the number of trainable parameters in the model.\n", " \"\"\"\n", " trainable_params = 0\n", " all_param = 0\n", " for _, param in model.named_parameters():\n", " all_param += param.numel()\n", " if param.requires_grad:\n", " trainable_params += param.numel()\n", " print(\n", " f\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}\"\n", " )\n", "\n", "model.gradient_checkpointing_enable()\n", "# LoRA 설정\n", "config = LoraConfig(\n", " r=16,\n", " lora_alpha=32,\n", " target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", " \"gate_proj\", \"up_proj\", \"down_proj\",],\n", " lora_dropout=0,\n", " bias=\"none\",\n", " use_rslora = False,\n", ")\n", "\n", "# 원본 모델에 LoRA 어댑터 붙이기(trainable 파라미터만 추가)\n", "model = get_peft_model(model, config)\n", "print_trainable_parameters(model)\n", "\n", "#역할 토큰 넘버 저장\n", "tokenNum_ai = 33626 # \"남자\"\n", "tokenNum_human = 33269 # \"여자\"\n", "tokenNum_com = 714 # \":\"" ] }, { "cell_type": "markdown", "metadata": { "id": "1If0pHV6OPFO" }, "source": [ "##" ] }, { "cell_type": "markdown", "metadata": { "id": "1bp452wGYAhZ" }, "source": [ "## 학습 손실 계산을 재정의\n", "\n", "- '남자' 뒤에 나오는 부분만 학습하도록 손실을 마스킹하는 것\n", "- 즉, 모델은 '여자' 뒤의 내용은 예측하지 않도록 -100 마스크를 씌움" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "RY0ITNMitO_I" }, "outputs": [], "source": [ "class maskTrainer(Trainer):\n", " def __init__(self, *args, **kwargs):\n", " super().__init__(*args, **kwargs)\n", "\n", " def compute_loss(self, model, inputs, return_outputs=False):\n", " for x in range(len(inputs['labels'])):\n", " # print(tokenizer.decode(inputs['labels'][x]))\n", "\n", " maskindex1 = (inputs['labels'][x]==tokenNum_human).nonzero()[:, 0].cpu()\n", " temp = 0\n", " for i, index in enumerate(maskindex1):\n", " if (inputs['labels'][x][index+1] != tokenNum_com):\n", " maskindex1 = np.delete(maskindex1, i-temp)\n", " temp += 1\n", "\n", " maskindex2 = (inputs['labels'][x]==tokenNum_ai).nonzero()[:, 0].cpu()\n", " temp = 0\n", " for i, index in enumerate(maskindex2):\n", " if (inputs['labels'][x][index+1] != tokenNum_com):\n", " maskindex2 = np.delete(maskindex2, i-temp)\n", " temp += 1\n", "\n", " for i in range(len(maskindex1)):\n", " ai_index = -1\n", " for num in maskindex2:\n", " if (maskindex1[i] < num):\n", " ai_index = num\n", " break\n", " if (ai_index == -1):\n", " inputs['labels'][x][maskindex1[i]+2:] = -100\n", " else:\n", " inputs['labels'][x][maskindex1[i]+2:ai_index+2] = -100\n", " # print(inputs['labels'][x])\n", "\n", " outputs = model(**inputs)\n", " loss = outputs['loss']\n", " return (loss,outputs) if return_outputs else loss\n", "\n", "# tokenizer.pad_token = tokenizer.eos_token" ] }, { "cell_type": "markdown", "metadata": { "id": "svg6ImYiZE0V" }, "source": [ "# 모델 학습 및 파라미터 설정" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 553 }, "id": "iBWy6eGztQL-", "outputId": "d12504b6-3cf2-4dda-ba52-06c08cfaac09" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n" ] }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [7292/7292 1:49:40, Epoch 2/2]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining Loss
5002.231800
10002.214900
15002.218900
20002.181500
25002.195300
30002.144400
35002.145200
40001.834900
45001.688400
50001.673200
55001.657700
60001.630900
65001.614700
70001.612100

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "NUM_EPOCHS = 2\n", "\n", "trainer = Trainer(\n", " model=model,\n", " train_dataset=data[\"train\"],\n", " args=TrainingArguments(\n", " per_device_train_batch_size=1, # GPU 1개당 batch 1개\n", " gradient_accumulation_steps=1, # 1 step 마다 weight 업데이트\n", " fp16=True, # float16 혼합정밀도 사용 -> 메모리 절약\n", " output_dir=\"outputs\", # 체크포인트 저장 위치\n", " save_total_limit=2, # 최대 저장 모델 수 2개로 제한\n", " logging_steps=500, # 500 step마다 로그 출력\n", " report_to=[\"tensorboard\"], # 로그를 텐서보드로 기록\n", " num_train_epochs = NUM_EPOCHS, # 2회 학습\n", " learning_rate=2e-4, # 비교적 높은 학습률(LoRA에 적합한 수치)\n", " lr_scheduler_type= \"cosine\", # 학습률 점차 감소\n", " ),\n", " data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),\n", ")\n", "\n", "model.config.use_cache = False\n", "trainer.train()\n", "\n", "model.save_pretrained(f\"./saved/EVEE/10.8B/{NUM_EPOCHS}epoch\") # 학습이 완료된 모델을 로컬 디렉토리에 저장" ] }, { "cell_type": "markdown", "metadata": { "id": "eRgmQi4ev5f8" }, "source": [ "# HuggingFace에 push하기" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2m0NF7-_wF8y", "outputId": "06ca6b8a-e7e1-4d2e-8d72-76b66599de5b" }, "outputs": [ { "data": { "text/plain": [ "('./saved/EVEE/10.8B/2epoch/tokenizer_config.json',\n", " './saved/EVEE/10.8B/2epoch/special_tokens_map.json',\n", " './saved/EVEE/10.8B/2epoch/tokenizer.json')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output_path = f\"./saved/EVEE/10.8B/{NUM_EPOCHS}epoch\"\n", "\n", "model.save_pretrained(output_path)\n", "tokenizer.save_pretrained(output_path)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 103, "referenced_widgets": [ "88ddd9672305452fa558d592aee0f30c", "d3ab5d075be349cc8af47a3f6b422b11", "5d0655e6412c4c7599eedc23af8c2df5", "d338b7988f6e4b979610c1be46101b50", "98f540f7aa4f49c4859612a0db8c7bc6", "783c546fa7584065987402f4197c758e", "8fc2951ab45a46a294d10ad459f93ddd", "bfd06e21c7334d2bafa0261e1ce918dc", "3c455257a8e0404b9cc758baaae76d86", "e17b538116f74b0ab5b1ce65ebf85312", "122cfefc839b43c29da85d76407ccf79" ] }, "id": "sF5h09M3tRS9", "outputId": "02e5d435-0861-47a4-92ff-f9cd6d13e6bd" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "88ddd9672305452fa558d592aee0f30c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "adapter_model.safetensors: 0%| | 0.00/252M [00:00