In [1]: from pyrap.tables import table
In [2]: tt = table('AF0236_spw01.ms')
Successful readonly open of default-locked table AF0236_spw01.ms: 26 columns, 683354 rows
In [3]: subtab = tt.query(query='ANTENNA1==5 || ANTENNA2==5',columns='DATA')
In [4]: subtab.colnames()
Out[4]: ['DATA']
In [5]: subtab.getcol('DATA').shape
Out[5]: (50496, 1, 4)
import argparse
from daskms import xds_from_ms
import dask
import dask.array as da
def create_parser():
p = argparse.ArgumentParser()
p.add_argument("ms")
p.add_argument("-rc", "--row-chunks", default=1000, type=int)
return p
def script():
args = create_parser().parse_args()
datasets = xds_from_ms(args.ms, chunks={'row': args.row_chunks})
ds_data = []
# cartesian product of unique (FIELD_ID, DATA_DESC_ID)
for ds in datasets:
# Find the unique ANTENNA1 and ANTENNA2 values
# and their locations within the data
uant1, ant1_inv = da.unique(ds.ANTENNA1.data, return_inverse=True)
uant2, ant2_inv = da.unique(ds.ANTENNA2.data, return_inverse=True)
uants = da.concatenate([uant1, uant2])
# At this point, compute unique antenna values
# We need to know them to loop through them
uants = dask.compute(uants)[0]
ant_data = {}
# Continue to construct a lazy expression for
# each antenna's data
for a in uants:
# Select rows where ANTENNA1 == a or ANTENNA2 == a
sel = da.logical_or(ant1_inv == a, ant2_inv == a)
# Select data at the relevant rows
data = ds.DATA.data[sel]
# TODO
# This only exists to prevent memory explosions by
# reducing to a single value. Remove it in actual code
data = da.nanmean(data)
# Stash the lazy expression in a dictionary on
# antenna number as the key
ant_data[a] = data
# Add lazy antenna expression data for this dataset
ds_data.append(ant_data)
# Now actually compute all the lazy expressions
# (dask traverses native python structures looking for
# dask objects)
dask.compute(ds_data)
if __name__ == "__main__":
script()