drjieliu commited on
Commit
a61d226
·
1 Parent(s): 68d3c63

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +56 -0
  2. func_gradio.py +165 -0
  3. requirements.txt +11 -0
  4. util.py +318 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import pyBigWig
4
+ from func_gradio import predict_func,make_plots
5
+
6
+ inputs = [
7
+ gr.Dropdown([str(i) for i in range(1, 23)], label='Chromosome', default='1'),
8
+ gr.Dropdown(['Micro-C', 'Hi-C (ChIA-PET)']
9
+ , label='Chromatin contact map', info='One type of contact map is predicted for each time'),
10
+ gr.Number(label='Region of interest (500kb for Micro-C and 1Mb for Hi-C)', info='From'),
11
+ gr.Number(info='To', show_label=False),
12
+ gr.File(label='Processed ATAC-seq file (in .pickle format)'),
13
+
14
+ ]
15
+
16
+ outputs = [
17
+ gr.Files(label='Download the results'),
18
+ ]
19
+
20
+ app1 = gr.Interface(
21
+ fn=predict_func,
22
+ inputs=inputs,
23
+ outputs=outputs,
24
+ title='A computational tool to use ATAC-seq to impute epigenome, transcriptome, and high-resolution chromatin contact maps',
25
+ description='<a href="https://github.com/zzh24zzh/EPCOT_gradio" class="built-with svelte-1lyswbr" target="_blank" '
26
+ 'style="font-size: 15px; font-color: black; font-weight:bold" rel="noreferrer">'
27
+ 'View Documentation </a>',
28
+ # examples=[["11","Micro-C","10500000","11000000","./examples/atac_GM12878.pickle"],
29
+ # ["11","Hi-C (ChIA-PET)","7750000","8750000","./examples/atac_GM12878.pickle"]]
30
+ )
31
+
32
+
33
+ with open(os.path.abspath('data/epigenomes.txt'), 'r') as f:
34
+ epis=f.read().splitlines()
35
+ inputs1 = [
36
+ gr.File(label="Prediction file (in .npz format))"),
37
+ gr.Markdown(value='### Visualization options'),
38
+ gr.Dropdown(epis,label='Epigenome features',multiselect=True,max_choices=10,value=['CTCF','H3K4me3']),
39
+ gr.Radio(choices=['Signal p-values (archsinh)','Binding probability'], label='Type of epigenomic feature data'
40
+ , value='Signal p-values (archsinh)'),
41
+ gr.Slider(maximum=16,label='Range of values displayed on the plots',info="Choose between 0 and 16 (contact maps)",value=4),
42
+ gr.Slider(minimum=2,maximum=12,info="Choose between 2 and 12 (epigenomic feature signals)",value=4,show_label=False),
43
+ gr.Slider(minimum=2,maximum=12,info="Choose between 2 and 12 (CAGE-seq)",value=8,show_label=False),
44
+ ]
45
+ outputs1 = gr.Plot(label='Plots')
46
+ app2 = gr.Interface(
47
+ fn=make_plots,
48
+ inputs=inputs1,
49
+ outputs=outputs1,
50
+ live=True
51
+ )
52
+
53
+ demo = gr.TabbedInterface([app1, app2], ["Run model", "Visualize prediction results"],
54
+ theme=gr.themes.Soft())
55
+
56
+ demo.launch(debug=True)
func_gradio.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio,psutil
2
+ import numpy as np
3
+ import torch,os,pickle,uuid
4
+ from util import check_region,predict_microc,predict_cage,predict_epis,filetobrowser,predict_hic,predict_epb
5
+ from scipy.sparse import load_npz
6
+ import matplotlib.pyplot as plt
7
+ from matplotlib.gridspec import GridSpec
8
+ import matplotlib
9
+ def predict_func(input_chrom,cop_type, region_start,region_end, atac_seq):
10
+ device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
11
+ print(device)
12
+ if input_chrom == '' or cop_type == '':
13
+ raise gradio.Error("The prediction options cannot be empty")
14
+ if atac_seq is None:
15
+ raise gradio.Error("Must provide an ATAC-seq file!")
16
+ if not os.path.exists('refSeq/hg38/chr%s.npz'%input_chrom):
17
+ raise gradio.Error("The reference genome must be downloaded!")
18
+
19
+ ref_genome = load_npz('refSeq/hg38/chr%s.npz'%input_chrom).toarray()
20
+ try:
21
+ with open(atac_seq.name,'rb') as f:
22
+ tmp_atac=pickle.load(f)
23
+ atac_seq = tmp_atac[int(input_chrom)].toarray()
24
+ except Exception:
25
+ raise gradio.Error('The ATAC-seq file cannot be read!')
26
+
27
+ if cop_type == 'Micro-C':
28
+ chrom, start, end = check_region(input_chrom, region_start,region_end, ref_genome,500000)
29
+ else:
30
+ chrom, start, end = check_region(input_chrom, region_start,region_end, ref_genome,1000000)
31
+
32
+
33
+
34
+ out_epi_binding = predict_epb(os.path.abspath('models/epi_bind.pt'), [start, end], ref_genome, atac_seq, device,
35
+ cop_type)
36
+ out_cage = predict_cage(os.path.abspath('models/cage.pt'), [start, end], ref_genome, atac_seq, device, cop_type)
37
+
38
+ out_epi = predict_epis(os.path.abspath('models/epi_track.pt'), [start, end], ref_genome, atac_seq, device, cop_type)
39
+
40
+ file_id = str(uuid.uuid4())
41
+
42
+ if not os.path.exists('results'):
43
+ os.mkdir('results')
44
+ else:
45
+ for f in os.listdir('results/'):
46
+ os.remove(os.path.join('results/', f))
47
+
48
+
49
+ if cop_type == 'Micro-C':
50
+ out_cop = predict_microc(os.path.abspath('models/microc.pt'), [start, end], ref_genome, atac_seq, device)
51
+ np.savez_compressed( 'results/prediction_%s.npz'%file_id,
52
+ chrom= input_chrom,start =start+10000,end=end-10000,
53
+ epi=out_epi,epb=out_epi_binding, cage=out_cage,cop=out_cop)
54
+ return ['results/prediction_%s.npz'%file_id,
55
+ filetobrowser(out_epi,out_cage,out_cop,input_chrom, start+10000,end-10000,file_id)]
56
+ else:
57
+ out_cop=predict_hic(os.path.abspath('models/hic.pt'), [start, end], ref_genome, atac_seq, device)
58
+ np.savez_compressed('results/prediction_%s.npz'%file_id,
59
+ chrom=input_chrom, start=start + 20000, end=end - 20000,
60
+ epi=out_epi,epb=out_epi_binding, cage=out_cage,cop=out_cop)
61
+
62
+ return ['results/prediction_%s.npz'%file_id,
63
+ filetobrowser(out_epi,out_cage,out_cop,input_chrom, start + 20000, end - 20000,file_id)]
64
+
65
+
66
+ def make_plots(in_file,md,epis,epi_type, maxv1, maxv2,maxv3):
67
+ matplotlib.use("Agg")
68
+ # matplotlib.pyplot.switch_backend('Agg')
69
+ if in_file is None:
70
+ raise gradio.Error('Must upload a prediction file!')
71
+ try:
72
+ prediction = np.load(in_file.name)
73
+ except Exception:
74
+ raise gradio.Error('The prediction file cannot be read!')
75
+ maxv1,maxv2,maxv3=float(maxv1),float(maxv2),float(maxv3)
76
+ with open(os.path.abspath('data/epigenomes.txt'), 'r') as f:
77
+ epigenomes = f.read().splitlines()
78
+
79
+ bins = prediction['cop'].shape[-1]
80
+ if epis=='':
81
+ raise gradio.Error("No epigenomic feature is selected")
82
+ num_mod = len(epis) + 1
83
+ epi_idx=np.array([epigenomes.index(epi) for epi in epis])
84
+
85
+
86
+ # plt.rcParams['font.sans-serif'] = 'Arial'
87
+ # plt.rcParams['font.family'] = 'sans-serif'
88
+ plt.rcParams['font.size'] = 14
89
+
90
+ if bins==480:
91
+ fig = plt.figure(figsize=(9, num_mod + 4))
92
+ gs = GridSpec(num_mod+4, 9)
93
+ ax_map = [fig.add_subplot(gs[:4, :8])]
94
+ axc=fig.add_subplot(gs[:4, 8:])
95
+ axc.axis('off')
96
+ axs = [fig.add_subplot(gs[4+i, :8]) for i in range(num_mod)]
97
+ else:
98
+ fig = plt.figure(figsize=(9, num_mod+12))
99
+ gs = GridSpec(num_mod + 12, 9)
100
+ ax_map = [fig.add_subplot(gs[4*i:4*i+4, :8]) for i in range(4)]
101
+ axc = fig.add_subplot(gs[:8, 8:])
102
+ axc.axis('off')
103
+ axc1 = fig.add_subplot(gs[8:12, 8:])
104
+ axc1.axis('off')
105
+ axs = [fig.add_subplot(gs[12 + i, :8]) for i in range(num_mod)]
106
+
107
+ if bins == 480:
108
+ bin_coords = np.true_divide(np.arange(bins), np.sqrt(2))
109
+ x, y = np.meshgrid(bin_coords, bin_coords)
110
+ sin45 = np.sin(np.radians(45))
111
+ x, y = x * sin45 + y * sin45, x * sin45 - y * sin45
112
+ m=ax_map[0].pcolormesh(x, y, prediction['cop'], cmap='RdBu_r', vmin=0, vmax=maxv1)
113
+
114
+ cbar=fig.colorbar(m,ax=axc,aspect=20,fraction=1)
115
+ else:
116
+ bin_coords = np.true_divide(np.arange(bins), np.sqrt(2))
117
+ x, y = np.meshgrid(bin_coords, bin_coords)
118
+ sin45 = np.sin(np.radians(45))
119
+ x, y = x * sin45 + y * sin45, x * sin45 - y * sin45
120
+ m = [ax_map[i].pcolormesh(x, y, prediction['cop'][i], cmap='RdBu_r', vmin=0, vmax=maxv1) for i in range(3)]
121
+ cbar = fig.colorbar(m[0], ax=axc, aspect=30, fraction=1,shrink=0.85)
122
+ cbar.set_label('log2(x)+1')
123
+ cbar1 = fig.colorbar(m[2], ax=axc1, aspect=15, fraction=1,shrink=0.85)
124
+ types=['CTCF ChIA-PET','POLR2 ChIA-PET','Hi-C']
125
+ for i in range(3):
126
+ ax_map[i].text(2, bins//2.5, types[i],va='top',fontsize=18,color='r')
127
+
128
+
129
+ for i in range(len(ax_map)):
130
+ ax_map[i].set_yticks([])
131
+ ax_map[i].set_ylim(0, bins//2)
132
+ ax_map[i].spines['left'].set_visible(False)
133
+
134
+ for axm in axs+ax_map:
135
+ axm.set_xticks([])
136
+ axm.margins(x=0)
137
+ axm.spines['top'].set_visible(False)
138
+ axm.spines['right'].set_visible(False)
139
+ axm.spines['bottom'].set_visible(False)
140
+ for i in range(num_mod-1):
141
+ if epi_type=='Signal p-values (archsinh)':
142
+ axs[i].fill_between(np.arange(prediction['epi'].shape[0]), 0, prediction['epi'][:,epi_idx[i]])
143
+ axs[i].set_ylim(0, maxv2)
144
+ axs[i].text(2, maxv2, epis[i],va='top')
145
+ else:
146
+ axs[i].fill_between(np.arange(prediction['epb'].shape[0]), 0, prediction['epb'][:, epi_idx[i]])
147
+ axs[i].set_ylim(0, 1)
148
+ axs[i].text(2, 1, epis[i], va='top')
149
+
150
+ start=int(prediction['start'])
151
+ chrom=int(prediction['chrom'])
152
+ end= int(prediction['end'])
153
+ seq_inter=1000 if bins==480 else 5000
154
+ axs[-1].fill_between(np.arange(prediction['cage'].shape[0]), 0, prediction['cage'])
155
+ axs[-1].set_ylim(0, maxv3)
156
+ axs[-1].text(2, maxv3, 'CAGE',va='top')
157
+ axs[-1].set_xticks([i*prediction['cage'].shape[0]//4 for i in range(5)])
158
+ axs[-1].set_xticklabels([start+i*bins*seq_inter//4 for i in range(5)])
159
+
160
+ axs[-1].set_xlabel('chr%s:%s-%s'%(chrom,start,end))
161
+ plt.show()
162
+ return fig
163
+
164
+
165
+
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ psutil
3
+ pyBigWig==0.3.17
4
+ scipy==1.9.1
5
+ torch==1.10.1
6
+ einops==0.3.2
7
+ gradio==3.24.1
8
+ gdown==4.7.1
9
+ deepTools==3.5.1
10
+ torchvision==0.11.2
11
+ matplotlib==3.5.3
util.py ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import numpy as np
3
+ import pyBigWig,os
4
+ from zipfile import ZipFile
5
+ import zipfile
6
+ import shutil
7
+ import torch
8
+ from pretrain.model import build_epd_model
9
+ from pretrain.track.model import build_track_model
10
+ from cage.model import build_cage_model
11
+ from cop.micro_model import build_microc_model
12
+ from cop.hic_model import build_hic_model
13
+ from einops import rearrange
14
+ import gradio
15
+
16
+
17
+
18
+
19
+ def parser_args():
20
+ """
21
+ Hyperparameters for the pre-training model
22
+ """
23
+ # add_help = False
24
+ parser = argparse.ArgumentParser(add_help = False)
25
+ parser.add_argument('--num_class', default=245, type=int,help='the number of epigenomic features to be predicted')
26
+ parser.add_argument('--seq_length', default=1600, type=int,help='the length of input sequences')
27
+ parser.add_argument('--nheads', default=4, type=int)
28
+ parser.add_argument('--hidden_dim', default=512, type=int)
29
+ parser.add_argument('--dim_feedforward', default=1024, type=int)
30
+ parser.add_argument('--enc_layers', default=1, type=int)
31
+ parser.add_argument('--dec_layers', default=2, type=int)
32
+ parser.add_argument('--dropout', default=0.2, type=float)
33
+ args, unknown = parser.parse_known_args()
34
+ return args,parser
35
+ def get_args():
36
+ args,_ = parser_args()
37
+ return args,_
38
+
39
+ def parser_args_epi(parent_parser):
40
+ """
41
+ Hyperparameters for the downstream model to predict 1kb-resolution CAGE-seq
42
+ """
43
+ parser=argparse.ArgumentParser(parents=[parent_parser],add_help = False)
44
+ parser.add_argument('--bins', type=int, default=500)
45
+ parser.add_argument('--crop', type=int, default=10)
46
+ parser.add_argument('--embed_dim', default=768, type=int)
47
+ parser.add_argument('--return_embed', default=False, action='store_true')
48
+ args, unknown = parser.parse_known_args()
49
+ return args
50
+
51
+ def parser_args_cage(parent_parser):
52
+ """
53
+ Hyperparameters for the downstream model to predict 1kb-resolution CAGE-seq
54
+ """
55
+ parser=argparse.ArgumentParser(parents=[parent_parser],add_help = False)
56
+ parser.add_argument('--bins', type=int, default=500)
57
+ parser.add_argument('--crop', type=int, default=10)
58
+ parser.add_argument('--embed_dim', default=768, type=int)
59
+ parser.add_argument('--return_embed', default=True, action='store_false')
60
+ args, unknown = parser.parse_known_args()
61
+ return args
62
+
63
+ def parser_args_hic(parent_parser):
64
+ """
65
+ Hyperparameters for the downstream model to predict 5kb-resolution Hi-C and ChIA-PET
66
+ """
67
+ parser=argparse.ArgumentParser(parents=[parent_parser],add_help = False)
68
+ parser.add_argument('--bins', type=int, default=200)
69
+ parser.add_argument('--crop', type=int, default=4)
70
+ parser.add_argument('--embed_dim', default=256, type=int)
71
+ args, unknown = parser.parse_known_args()
72
+ return args
73
+
74
+ def parser_args_microc(parent_parser):
75
+ """
76
+ Hyperparameters for the downstream model to predict 1kb-resolution Micro-C
77
+ """
78
+ parser=argparse.ArgumentParser(parents=[parent_parser],add_help = False)
79
+ parser.add_argument('--bins', type=int, default=500)
80
+ parser.add_argument('--crop', type=int, default=10)
81
+ parser.add_argument('--embed_dim', default=768, type=int)
82
+ parser.add_argument('--return_embed', default=True, action='store_false')
83
+ args, unknown = parser.parse_known_args()
84
+ return args
85
+
86
+
87
+
88
+
89
+ def check_region(chrom,start,end,ref_genome,region_len):
90
+ start,end=int(start),int(end)
91
+ if end-start != region_len:
92
+ if region_len==500000:
93
+ raise gradio.Error("Please enter a 500kb region!")
94
+ else:
95
+ raise gradio.Error("Please enter a 1Mb region!")
96
+ if start<300 or end > ref_genome.shape[1]-300:
97
+ raise gradio.Error("The start of input region should be greater than 300 and "
98
+ "the end of the region should be less than %s!"%(ref_genome.shape[1]-300))
99
+ return int(chrom),start,end
100
+
101
+ def generate_input(start,end,ref_genome,atac_seq):
102
+ # inputs=[]
103
+ pad_left=np.expand_dims(np.vstack((ref_genome[:,start-300:start],atac_seq[:,start-300:start])),0)
104
+ pad_right=np.expand_dims(np.vstack((ref_genome[:,end:end+300],atac_seq[:,end:end+300])),0)
105
+ center=np.vstack((ref_genome[:,start:end],atac_seq[:,start:end]))
106
+ center=rearrange(center,'n (b l)-> b n l',l=1000)
107
+ dmatrix = np.concatenate((pad_left, center[:, :, -300:]), axis=0)[:-1, :, :]
108
+ umatrix = np.concatenate((center[:, :, :300], pad_right), axis=0)[1:, :, :]
109
+ return np.concatenate((dmatrix, center, umatrix), axis=2)
110
+
111
+
112
+ def search_tf(tf):
113
+ with open('data/epigenomes.txt', 'r') as f:
114
+ epigenomes = f.read().splitlines()
115
+ tf_idx= epigenomes.index(tf)
116
+ return tf_idx
117
+
118
+
119
+
120
+ def predict_epb(
121
+ model_path,
122
+ region, ref_genome,atac_seq,
123
+ device,
124
+ cop_type
125
+ ):
126
+ args, parser = get_args()
127
+
128
+ pretrain_model = build_epd_model(args)
129
+ pretrain_model.load_state_dict(torch.load(model_path,map_location=torch.device(device)))
130
+ pretrain_model.eval()
131
+ pretrain_model.to(device)
132
+ start,end=region
133
+ inputs=generate_input(start,end,ref_genome,atac_seq)
134
+ inputs=torch.tensor(inputs).float().to(device)
135
+ with torch.no_grad():
136
+ pred_epi=torch.sigmoid(pretrain_model(inputs)).detach().cpu().numpy()
137
+ if cop_type == 'Micro-C':
138
+ return pred_epi[10:-10,:]
139
+ else:
140
+ return pred_epi[20:-20,:]
141
+
142
+
143
+ def predict_epis(
144
+ model_path,
145
+ region, ref_genome,atac_seq,
146
+ device,
147
+ cop_type
148
+ ):
149
+ args, parser = get_args()
150
+ epi_args = parser_args_epi(parser)
151
+ pretrain_model = build_track_model(epi_args)
152
+ pretrain_model.load_state_dict(torch.load(model_path,map_location=torch.device(device)))
153
+ pretrain_model.eval()
154
+ pretrain_model.to(device)
155
+ inputs=[]
156
+ start,end=region
157
+ if cop_type == 'Micro-C':
158
+ inputs.append(generate_input(start,end,ref_genome,atac_seq))
159
+ else:
160
+ for loc in range(start+20000,end-20000,480000):
161
+ inputs.append(generate_input(loc-10000,loc+490000,ref_genome,atac_seq))
162
+ inputs=np.stack(inputs)
163
+ inputs=torch.tensor(inputs).float().to(device)
164
+ pred_epi=[]
165
+ with torch.no_grad():
166
+ for i in range(inputs.shape[0]):
167
+ pred_epi.append(pretrain_model(inputs[i:i+1]).detach().cpu().numpy())
168
+
169
+ out_epi = rearrange(np.vstack(pred_epi), 'i j k -> (i j) k')
170
+ return out_epi
171
+
172
+ def predict_cage(
173
+ model_path,
174
+ region, ref_genome, atac_seq,
175
+ device,
176
+ cop_type
177
+ ):
178
+ args, parser = get_args()
179
+ cage_args = parser_args_cage(parser)
180
+ cage_model=build_cage_model(cage_args)
181
+ cage_model.load_state_dict(torch.load(model_path,map_location=torch.device(device)))
182
+ cage_model.eval()
183
+ cage_model.to(device)
184
+ inputs = []
185
+ start, end = region
186
+ if cop_type == 'Micro-C':
187
+ inputs.append(generate_input(start, end, ref_genome, atac_seq))
188
+ else:
189
+ for loc in range(start + 20000, end - 20000, 480000):
190
+ inputs.append(generate_input(loc - 10000, loc + 490000, ref_genome, atac_seq))
191
+ inputs = np.stack(inputs)
192
+ inputs = torch.tensor(inputs).float().to(device)
193
+ pred_cage = []
194
+ with torch.no_grad():
195
+ for i in range(inputs.shape[0]):
196
+ pred_cage.append(cage_model(inputs[i:i + 1]).detach().cpu().numpy().squeeze())
197
+ return np.concatenate(pred_cage)
198
+
199
+ def arraytouptri(arrays,args):
200
+ effective_lens=args.bins-2*args.crop
201
+ triu_tup = np.triu_indices(effective_lens)
202
+ temp=np.zeros((effective_lens,effective_lens))
203
+ temp[triu_tup]=arrays
204
+ return temp
205
+ def complete_mat(mat):
206
+ temp = mat.copy()
207
+ np.fill_diagonal(temp,0)
208
+ mat= mat+temp.T
209
+ return mat
210
+
211
+
212
+ def predict_hic(
213
+ model_path,
214
+ region, ref_genome,atac_seq,
215
+ device
216
+ ):
217
+ args, parser = get_args()
218
+ hic_args = parser_args_hic(parser)
219
+ hic_model = build_hic_model(hic_args)
220
+ hic_model.load_state_dict(torch.load(model_path,map_location=torch.device(device)))
221
+ hic_model.eval()
222
+ hic_model.to(device)
223
+ start,end=region
224
+ inputs=np.stack([generate_input(start,end,ref_genome,atac_seq)])
225
+ inputs=torch.tensor(inputs).float().to(device)
226
+ with torch.no_grad():
227
+ temp=hic_model(inputs).detach().cpu().numpy().squeeze()
228
+ return np.stack([complete_mat(arraytouptri(temp[:,i], hic_args)) for i in range(temp.shape[-1])])
229
+
230
+
231
+ def predict_microc(
232
+ model_path,
233
+ region, ref_genome,atac_seq,
234
+ device
235
+ ):
236
+ args, parser = get_args()
237
+ microc_args = parser_args_microc(parser)
238
+ microc_model = build_microc_model(microc_args)
239
+ microc_model.load_state_dict(torch.load(model_path,map_location=torch.device(device)))
240
+ microc_model.eval()
241
+ microc_model.to(device)
242
+ start,end=region
243
+ inputs=np.stack([generate_input(start,end,ref_genome,atac_seq)])
244
+ inputs=torch.tensor(inputs).float().to(device)
245
+ with torch.no_grad():
246
+ temp=microc_model(inputs).detach().cpu().numpy().squeeze()
247
+ return complete_mat(arraytouptri(temp, microc_args))
248
+
249
+
250
+ def filetobrowser(out_epis,out_cages,out_cop,chrom,start,end,file_id):
251
+ with open('data/epigenomes.txt', 'r') as f:
252
+ epigenomes = f.read().splitlines()
253
+
254
+ files_to_zip = file_id
255
+ if os.path.exists(files_to_zip):
256
+ shutil.rmtree(files_to_zip)
257
+ os.mkdir(files_to_zip)
258
+
259
+ hdr=[]
260
+ with open('data/chrom_size_hg38.txt', 'r') as f:
261
+ for line in f:
262
+ tmp=line.strip().split('\t')
263
+ hdr.append((tmp[0],int(tmp[1])))
264
+
265
+
266
+ for i in range(out_epis.shape[1]):
267
+ bwfile = pyBigWig.open(os.path.join(files_to_zip,"%s.bigWig"%epigenomes[i]), 'w')
268
+ bwfile.addHeader(hdr)
269
+ bwfile.addEntries(['chr' + str(chrom)]*out_epis.shape[0],[loc for loc in range(start,end,1000)],
270
+ ends=[loc+1000 for loc in range(start,end,1000)],values=out_epis[:,i].tolist())
271
+ bwfile.close()
272
+ bwfile = pyBigWig.open(os.path.join(files_to_zip,"cage.bigWig"),'w')
273
+ bwfile.addHeader(hdr)
274
+
275
+ bwfile.addEntries(['chr' + str(chrom)] * out_cages.shape[0], [loc for loc in range(start, end, 1000)],
276
+ ends=[loc + 1000 for loc in range(start, end, 1000)], values=out_cages.tolist())
277
+ bwfile.close()
278
+ cop_lines=[]
279
+
280
+ interval=1000 if out_cop.shape[-1]==480 else 5000
281
+ if out_cop.shape[-1]==480:
282
+ for bin1 in range(out_cop.shape[-1]):
283
+ for bin2 in range(bin1,out_cop.shape[-1],1):
284
+ # tmp=['chr' + str(chrom),str(start+bin1*interval),str(start+(bin1+1)*interval),'chr' + str(chrom),
285
+ # str(start + bin2 * interval), str(start + (bin2 + 1) * interval),'.',str(np.around(out_cop[bin1,bin2],2)),'.','.'
286
+ # ]
287
+ tmp = ['0', 'chr' + str(chrom), str(start + bin1 * interval), '0', '0', 'chr' + str(chrom),
288
+ str(start + bin2 * interval), '1', str(np.around(out_cop[bin1, bin2], 2))]
289
+ cop_lines.append('\t'.join(tmp)+'\n')
290
+ with open(os.path.join(files_to_zip,"microc.bedpe"),'w') as f:
291
+ f.writelines(cop_lines)
292
+ else:
293
+ types=['CTCF_ChIA-PET','POLR2_ChIA-PET','Hi-C']
294
+ for i in range(len(types)):
295
+ for bin1 in range(out_cop.shape[-1]):
296
+ for bin2 in range(bin1, out_cop.shape[-1], 1):
297
+ tmp=['0','chr' + str(chrom), str(start + bin1 * interval),'0','0','chr' +str(chrom),str(start + bin2 * interval),'1',str(np.around(out_cop[i,bin1, bin2], 2))]
298
+ cop_lines.append('\t'.join(tmp) + '\n')
299
+ with open(os.path.join(files_to_zip,"%s.bedpe"%types[i]), 'w') as f:
300
+ f.writelines(cop_lines)
301
+
302
+ out_zipfile = ZipFile("results/formatted_%s.zip" % file_id, "w", zipfile.ZIP_DEFLATED)
303
+ for file_to_zip in os.listdir(files_to_zip):
304
+ file_to_zip_full_path = os.path.join(files_to_zip, file_to_zip)
305
+ out_zipfile.write(filename=file_to_zip_full_path, arcname=file_to_zip)
306
+
307
+ out_zipfile.close()
308
+ shutil.rmtree(files_to_zip)
309
+ return "results/formatted_%s.zip"%file_id
310
+
311
+
312
+
313
+
314
+
315
+
316
+
317
+
318
+