ht = ht.persist()
rank_ht = ht.select(is_indel=hl.is_indel(ht.alleles[0], ht.alleles[1]), score=ht.CNN_1D_Score)
rank_ht = rank_ht.order_by(rank_ht.is_indel, rank_ht.score)
rank_ht = rank_ht.add_index().persist()
n_snvs = rank_ht.aggregate(hl.agg.count_where(~rank_ht.is_indel))
rank_ht = rank_ht.annotate(idx=hl.cond(rank_ht.is_indel, rank_ht.idx - n_snvs, rank_ht.idx))
gsutil du -s -h gs://path/to
ht = hl.import_table('gs://gnomad/variant_qc/temp/friedman_cnn_scores.tsv.gz', force_bgz=True, min_partitions=1000, impute=True)
ht.write('gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht', overwrite=True)
Laurent:hail2 laurent$ gsutil ls gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht/rows/parts/
gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht/rows/parts/
gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht/rows/parts/part-0-2-0-0-4c1e0ba4-7fc3-be60-93ab-7160eaea2afa
def main():
hl.init(log='/variantqc.log')
ht = hl.import_table('gs://gnomad/variant_qc/temp/friedman_cnn_scores.tsv.gz', force_bgz=True, min_partitions=1000, impute=True)
ht.write('gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht', overwrite=True)
ht = hl.read_table('gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht')
ht = ht.annotate(alt_alleles=ht.Alt.split(',')) # This transforms to a list
ht = ht.explode('alt_alleles')
ht = ht.annotate(locus=hl.locus(hl.str(ht.Contig), ht.Pos))
# Apply minrep
ht = ht.annotate(alleles=hl.min_rep(ht.locus, [ht.Ref, ht.alt_alleles])[1])
# Add variant_type
ht = ht.annotate(vartype=add_variant_type(ht.alleles))
ht = ht.transmute(variant_type=ht.vartype.variant_type, n_alt_alleles=ht.vartype.n_alt_alleles)
# Add rank
print('Adding rank...')
ht = add_rank(ht)
ht.key_by('locus', 'alleles').write('gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ranked.ht', overwrite=True)