ns commited on
Commit
04eac40
·
1 Parent(s): 0e231b3
volumes/.DS_Store ADDED
Binary file (6.15 kB). View file
 
volumes/notebooks/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .ipynb_checkpoints
volumes/notebooks/etl.ipynb ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "d12005fa",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "ename": "",
11
+ "evalue": "",
12
+ "output_type": "error",
13
+ "traceback": [
14
+ "\u001b[1;31mRunning cells with 'blip' requires ipykernel package.\n",
15
+ "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n",
16
+ "\u001b[1;31mCommand: 'conda install -n blip ipykernel --update-deps --force-reinstall'"
17
+ ]
18
+ }
19
+ ],
20
+ "source": [
21
+ "import pandas as pd "
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 8,
27
+ "id": "e7eebe96",
28
+ "metadata": {},
29
+ "outputs": [
30
+ {
31
+ "data": {
32
+ "text/plain": [
33
+ "part p10\n",
34
+ "patient p10000032\n",
35
+ "scan [s50414267.txt, s53189527.txt, s53911762.txt, ...\n",
36
+ "Name: 0, dtype: object"
37
+ ]
38
+ },
39
+ "execution_count": 8,
40
+ "metadata": {},
41
+ "output_type": "execute_result"
42
+ }
43
+ ],
44
+ "source": [
45
+ "# one row in the control dictionary\n",
46
+ "pd.read_json(\"/opt/physionet/control.jsonl\",lines=True).iloc[0]"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": 10,
52
+ "id": "1d191897",
53
+ "metadata": {},
54
+ "outputs": [
55
+ {
56
+ "data": {
57
+ "text/plain": [
58
+ "fold p10\n",
59
+ "image /opt/physionet/physionet.org/files/mimic-cxr-j...\n",
60
+ "original FINAL REPORT\\...\n",
61
+ "report /opt/physionet/physionet.org/files/mimic-cxr/2...\n",
62
+ "patient p10000764\n",
63
+ "text findings: pa and lateral views of the chest pr...\n",
64
+ "indication indication: unknown year old male with hypoxia...\n",
65
+ "Name: 0, dtype: object"
66
+ ]
67
+ },
68
+ "execution_count": 10,
69
+ "metadata": {},
70
+ "output_type": "execute_result"
71
+ }
72
+ ],
73
+ "source": [
74
+ "# each row contains the labels and metadata needed to train the transformer\n",
75
+ "example = pd.read_json(\"/opt/physionet/dataset.jsonl\",lines=True).iloc[0]\n",
76
+ "example"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 11,
82
+ "id": "dd14ab31",
83
+ "metadata": {},
84
+ "outputs": [
85
+ {
86
+ "name": "stdout",
87
+ "output_type": "stream",
88
+ "text": [
89
+ " FINAL REPORT\n",
90
+ " EXAMINATION: CHEST (PA AND LAT)\n",
91
+ " \n",
92
+ " INDICATION: ___M with hypoxia // ?pna, aspiration.\n",
93
+ " \n",
94
+ " COMPARISON: None\n",
95
+ " \n",
96
+ " FINDINGS: \n",
97
+ " \n",
98
+ " PA and lateral views of the chest provided. The lungs are adequately\n",
99
+ " aerated.\n",
100
+ " \n",
101
+ " There is a focal consolidation at the left lung base adjacent to the lateral\n",
102
+ " hemidiaphragm. There is mild vascular engorgement. There is bilateral apical\n",
103
+ " pleural thickening.\n",
104
+ " \n",
105
+ " The cardiomediastinal silhouette is remarkable for aortic arch calcifications.\n",
106
+ " The heart is top normal in size.\n",
107
+ " \n",
108
+ " IMPRESSION: \n",
109
+ " \n",
110
+ " Focal consolidation at the left lung base, possibly representing aspiration or\n",
111
+ " pneumonia.\n",
112
+ " \n",
113
+ " Central vascular engorgement.\n",
114
+ "\n"
115
+ ]
116
+ }
117
+ ],
118
+ "source": [
119
+ "# original text\n",
120
+ "print(example[\"original\"])"
121
+ ]
122
+ },
123
+ {
124
+ "cell_type": "code",
125
+ "execution_count": 12,
126
+ "id": "ea5792df",
127
+ "metadata": {},
128
+ "outputs": [
129
+ {
130
+ "name": "stdout",
131
+ "output_type": "stream",
132
+ "text": [
133
+ "findings: pa and lateral views of the chest provided. the lungs are adequately aerated. there is a focal consolidation at the left lung base adjacent to the lateral hemidiaphragm. there is mild vascular engorgement. there is bilateral apical pleural thickening. the cardiomediastinal silhouette is remarkable for aortic arch calcifications. the heart is top normal in size. impression: focal consolidation at the left lung base, possibly representing aspiration or pneumonia. central vascular engorgement.\n"
134
+ ]
135
+ }
136
+ ],
137
+ "source": [
138
+ "# text used as a label for the model\n",
139
+ "# no indicatin\n",
140
+ "# no technique\n",
141
+ "# no comparison\n",
142
+ "print(example[\"text\"])"
143
+ ]
144
+ },
145
+ {
146
+ "cell_type": "code",
147
+ "execution_count": 13,
148
+ "id": "67e5bb00",
149
+ "metadata": {},
150
+ "outputs": [
151
+ {
152
+ "name": "stdout",
153
+ "output_type": "stream",
154
+ "text": [
155
+ "indication: unknown year old male with hypoxia // question pna, aspiration.\n"
156
+ ]
157
+ }
158
+ ],
159
+ "source": [
160
+ "# used as a INPUT to the model along with the image\n",
161
+ "print(example[\"indication\"])"
162
+ ]
163
+ }
164
+ ],
165
+ "metadata": {
166
+ "kernelspec": {
167
+ "display_name": "blip",
168
+ "language": "python",
169
+ "name": "python3"
170
+ },
171
+ "language_info": {
172
+ "codemirror_mode": {
173
+ "name": "ipython",
174
+ "version": 3
175
+ },
176
+ "file_extension": ".py",
177
+ "mimetype": "text/x-python",
178
+ "name": "python",
179
+ "nbconvert_exporter": "python",
180
+ "pygments_lexer": "ipython3",
181
+ "version": "3.10.8 (main, Nov 24 2022, 08:08:27) [Clang 14.0.6 ]"
182
+ },
183
+ "vscode": {
184
+ "interpreter": {
185
+ "hash": "d2929fa862ca5c20be7df7418b9bcb368752100a819a60622976f7f091b1ba7c"
186
+ }
187
+ }
188
+ },
189
+ "nbformat": 4,
190
+ "nbformat_minor": 5
191
+ }
volumes/physionet/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *
2
+ !.gitignore