---------------------------------------------------------------------------
FatalError Traceback (most recent call last)
<ipython-input-5-3c081f84aeeb> in <module>()
30 # ))
31 snv_variants.describe()
---> 32 snv_variants.persist()
33 snv_variants.count()
/home/hail/hail.zip/hail/typecheck/check.py in wrapper(*args, **kwargs)
545 def wrapper(*args, **kwargs):
546 args_, kwargs_ = check_all(f, args, kwargs, checkers, is_method=is_method)
--> 547 return f(*args_, **kwargs_)
548
549 update_wrapper(wrapper, f)
/home/hail/hail.zip/hail/table.py in persist(self, storage_level)
1518 Persisted table.
1519 """
-> 1520 return Table(self._jt.persist(storage_level))
1521
1522 def unpersist(self):
/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py in __call__(self, *args)
1131 answer = self.gateway_client.send_command(command)
1132 return_value = get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name)
1134
1135 for temp_arg in temp_args:
/home/hail/hail.zip/hail/utils/java.py in deco(*args, **kwargs)
194 raise FatalError('%s\n\nJava stack trace:\n%s\n'
195 'Hail version: %s\n'
--> 196 'Error summary: %s' % (deepest, full, hail.__version__, deepest)) from None
197 except pyspark.sql.utils.CapturedException as e:
198 raise FatalError('%s\n\nJava stack trace:\n%s\n'
FatalError: AssertionError: assertion failed
Java stack trace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 203 in stage 3.0 failed 20 times, most recent failure: Lost task 203.19 in stage 3.0 (TID 9608, lfrani-sw-d884.c.broad-mpg-gnomad.internal, executor 27): java.lang.AssertionError: assertion failed
at scala.Predef$.assert(Predef.scala:156)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:938)
at is.hail.rvd.OrderedRVD$$anonfun$apply$16$$anon$3.next(OrderedRVD.scala:920)
at scala.collection.Iterator$$anon$12.next(Iterator.scala:444)
at scala.collection.Iterator$$anon$1.next(Iterator.scala:1010)
at scala.collection.Iterator$$anon$1.head(Iterator.scala:997)
at is.hail.utils.richUtils.RichIterator$$anon$5.value(RichIterator.scala:20)
at is.hail.utils.StagingIterator.value(FlipbookIterator.scala:47)
at is.hail.utils.FlipbookIterator$$anon$1.calculateValidity(FlipbookIterator.scala:178)
at is.hail.utils.FlipbookIterator$ValidityCachingStateMachine$class.refreshValidity(FlipbookIterator.scala:167)
v2 = hl.read_matrix_table(f'{root_dir}/gnomad_v2_b38_chrom20.qc_regions.v2_qc_samples.mt').rows()
v3 = hl.read_matrix_table(f'{root_dir}/gnomad_v3_chrom20.hardcalls.split.qc_regions.v2_qc_samples.mt').rows()
snv_variants = hl.read_table(f'{root_dir}/gnomad_v2_v3_qc_snv_variant_concordance.annotated.ht')
snv_variants = snv_variants.annotate(
v2_info=v2[snv_variants.key].info,
v3_info=v3[snv_variants.key].info,
# adj_concordance=adj_variants[snv_variants.key].concordance
)
snv_variants.persist()
snv_variants.count()
....ht/rows/metadata.json.gz
- does it say OrderedRVDSpec or UnpartitionedRVDSpec)
----------------------------------------
Global fields:
None
----------------------------------------
Row fields:
'locus': locus<GRCh38>
'alleles': array<str>
'n_discordant': int64
'concordance': array<array<int64>>
'dataset': str
'v2_callstats': struct {
AC: array<int32>,
AF: array<float64>,
AN: int32,
homozygote_count: array<int32>
}
'v2_was_split': bool
'v3_callstats': struct {
AC: array<int32>,
AF: array<float64>,
AN: int32,
homozygote_count: array<int32>
}
'v3_was_split': bool
----------------------------------------
Key: ['locus', 'alleles']
----------------------------------------
ht = ht.persist()
rank_ht = ht.select(is_indel=hl.is_indel(ht.alleles[0], ht.alleles[1]), score=ht.CNN_1D_Score)
rank_ht = rank_ht.order_by(rank_ht.is_indel, rank_ht.score)
rank_ht = rank_ht.add_index().persist()
n_snvs = rank_ht.aggregate(hl.agg.count_where(~rank_ht.is_indel))
rank_ht = rank_ht.annotate(idx=hl.cond(rank_ht.is_indel, rank_ht.idx - n_snvs, rank_ht.idx))
gsutil du -s -h gs://path/to
ht = hl.import_table('gs://gnomad/variant_qc/temp/friedman_cnn_scores.tsv.gz', force_bgz=True, min_partitions=1000, impute=True)
ht.write('gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht', overwrite=True)
Laurent:hail2 laurent$ gsutil ls gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht/rows/parts/
gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht/rows/parts/
gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht/rows/parts/part-0-2-0-0-4c1e0ba4-7fc3-be60-93ab-7160eaea2afa
def main():
hl.init(log='/variantqc.log')
ht = hl.import_table('gs://gnomad/variant_qc/temp/friedman_cnn_scores.tsv.gz', force_bgz=True, min_partitions=1000, impute=True)
ht.write('gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht', overwrite=True)
ht = hl.read_table('gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ht')
ht = ht.annotate(alt_alleles=ht.Alt.split(',')) # This transforms to a list
ht = ht.explode('alt_alleles')
ht = ht.annotate(locus=hl.locus(hl.str(ht.Contig), ht.Pos))
# Apply minrep
ht = ht.annotate(alleles=hl.min_rep(ht.locus, [ht.Ref, ht.alt_alleles])[1])
# Add variant_type
ht = ht.annotate(vartype=add_variant_type(ht.alleles))
ht = ht.transmute(variant_type=ht.vartype.variant_type, n_alt_alleles=ht.vartype.n_alt_alleles)
# Add rank
print('Adding rank...')
ht = add_rank(ht)
ht.key_by('locus', 'alleles').write('gs://gnomad/variant_qc/temp/friedman_cnn_scores.no_chr17.ranked.ht', overwrite=True)