{ "cells": [ { "cell_type": "markdown", "id": "7619b911-e058-4c11-93d0-8aa1d73305b9", "metadata": {}, "source": [ "# GMRT conversion guide" ] }, { "cell_type": "code", "execution_count": 1, "id": "6304f081-3151-4813-a383-bc1e0d17983c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "XRADIO version 1.1.3 already installed.\n" ] } ], "source": [ "from importlib.metadata import version\n", "import os\n", "\n", "try:\n", " import xradio\n", "\n", " print(\"XRADIO version\", version(\"xradio\"), \"already installed.\")\n", "except ImportError as exc:\n", " print(f\"Could not import XRADIO: {exc}\")\n", " print(\"Installing XRADIO\")\n", "\n", " os.system(\"pip install xradio\")\n", "\n", " import xradio\n", "\n", " print(\"xradio version\", version(\"xradio\"), \" installed.\")" ] }, { "cell_type": "markdown", "id": "611036ee-e72f-4b15-865d-94be871e15bf", "metadata": {}, "source": [ "## Download dataset" ] }, { "cell_type": "code", "execution_count": 2, "id": "1fb89a45-a951-41c9-bbaf-75f50a6e8fea", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[\u001b[38;2;128;05;128m2026-04-20 15:18:03,597\u001b[0m] \u001b[38;2;50;50;205m INFO\u001b[0m\u001b[38;2;112;128;144m toolviper: \u001b[0m Initializing download... \n", "[\u001b[38;2;128;05;128m2026-04-20 15:18:03,598\u001b[0m] \u001b[38;2;50;50;205m INFO\u001b[0m\u001b[38;2;112;128;144m toolviper: \u001b[0m File already exists: /Users/vdesouza/work/xradio/docs/source/measurement_set/guides/gmrt.ms \n" ] } ], "source": [ "import toolviper\n", "import os\n", "from pathlib import Path\n", "file_path_os = \"gmrt.ms\"\n", "\n", "toolviper.utils.data.download(file=\"gmrt.ms\")" ] }, { "cell_type": "markdown", "id": "57ddd4f9-af5d-4b6b-9355-b60bf9b61d21", "metadata": {}, "source": [ "## Convert to Processing Set" ] }, { "cell_type": "code", "execution_count": 3, "id": "87431f1b-b94d-442a-88b4-936f74be530c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[\u001b[38;2;128;05;128m2026-04-20 15:18:03,897\u001b[0m] \u001b[38;2;50;50;205m INFO\u001b[0m\u001b[38;2;112;128;144m toolviper: \u001b[0m Updated partition scheme used: ['DATA_DESC_ID', 'OBSERVATION_ID'] \n", "[\u001b[38;2;128;05;128m2026-04-20 15:18:03,898\u001b[0m] \u001b[38;2;50;50;205m INFO\u001b[0m\u001b[38;2;112;128;144m toolviper: \u001b[0m Number of partitions: 1 \n", "[\u001b[38;2;128;05;128m2026-04-20 15:18:03,898\u001b[0m] \u001b[38;2;50;50;205m INFO\u001b[0m\u001b[38;2;112;128;144m toolviper: \u001b[0m OBSERVATION_ID [0], DDI [0], STATE [None], FIELD [3], SCAN [7], EPHEMERIS [None] \n" ] } ], "source": [ "from xradio.measurement_set import convert_msv2_to_processing_set\n", "\n", "ms_file = \"gmrt.ms\"\n", "main_chunksize = {\"frequency\": 1, \"time\": 20} # baseline, polarization\n", "outfile = \"gmrt.ps.zarr\"\n", "convert_msv2_to_processing_set(\n", " in_file=ms_file,\n", " out_file=outfile,\n", " parallel_mode=\"none\",\n", " persistence_mode='w',\n", " main_chunksize=main_chunksize,\n", ")" ] }, { "cell_type": "markdown", "id": "b2f1eab6-f0d1-48a0-b10a-3a4290d41a2b", "metadata": {}, "source": [ "## Processing Set" ] }, { "cell_type": "code", "execution_count": 4, "id": "bb4686bf-f333-48ff-b509-ef0f58f0a3f7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | name | \n", "scan_intents | \n", "shape | \n", "execution_block_UID | \n", "polarization | \n", "scan_name | \n", "spw_name | \n", "spw_intents | \n", "field_name | \n", "source_name | \n", "line_name | \n", "field_coords | \n", "session_reference_UID | \n", "scheduling_block_UID | \n", "project_UID | \n", "start_frequency | \n", "end_frequency | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "gmrt_0 | \n", "[None] | \n", "(2, 435, 64, 4) | \n", "--- | \n", "[RR, RL, LR, LL] | \n", "[7] | \n", "spw_0 | \n", "[UNSPECIFIED] | \n", "[3C286_3] | \n", "[3C286_3] | \n", "[] | \n", "[fk5, 13h31m08.28s, 30d30m32.90s] | \n", "--- | \n", "--- | \n", "\n", " | 551562500.0 | \n", "748437500.0 | \n", "
<xarray.DataTree 'gmrt_0'>\n",
"Group: /gmrt_0\n",
"│ Dimensions: (time: 2, baseline_id: 435, frequency: 64,\n",
"│ polarization: 4, uvw_label: 3)\n",
"│ Coordinates:\n",
"│ * time (time) float64 16B 1.72e+09 1.72e+09\n",
"│ field_name (time) <U27 216B dask.array<chunksize=(2,), meta=np.ndarray>\n",
"│ scan_name (time) <U21 168B dask.array<chunksize=(2,), meta=np.ndarray>\n",
"│ * baseline_id (baseline_id) int64 3kB 0 1 2 3 ... 432 433 434\n",
"│ baseline_antenna1_name (baseline_id) <U3 5kB dask.array<chunksize=(435,), meta=np.ndarray>\n",
"│ baseline_antenna2_name (baseline_id) <U3 5kB dask.array<chunksize=(435,), meta=np.ndarray>\n",
"│ * frequency (frequency) float64 512B 5.516e+08 ... 7.484e+08\n",
"│ * polarization (polarization) <U2 32B 'RR' 'RL' 'LR' 'LL'\n",
"│ * uvw_label (uvw_label) <U1 12B 'u' 'v' 'w'\n",
"│ Data variables:\n",
"│ EFFECTIVE_INTEGRATION_TIME (time, baseline_id) float64 7kB dask.array<chunksize=(2, 435), meta=np.ndarray>\n",
"│ FLAG (time, baseline_id, frequency, polarization) bool 223kB dask.array<chunksize=(2, 435, 1, 4), meta=np.ndarray>\n",
"│ TIME_CENTROID (time, baseline_id) float64 7kB dask.array<chunksize=(2, 435), meta=np.ndarray>\n",
"│ UVW (time, baseline_id, uvw_label) float64 21kB dask.array<chunksize=(2, 435, 3), meta=np.ndarray>\n",
"│ VISIBILITY (time, baseline_id, frequency, polarization) complex64 2MB dask.array<chunksize=(2, 435, 1, 4), meta=np.ndarray>\n",
"│ WEIGHT (time, baseline_id, frequency, polarization) float32 891kB dask.array<chunksize=(2, 435, 1, 4), meta=np.ndarray>\n",
"│ Attributes:\n",
"│ creation_date: 2026-04-20T21:18:03.911368+00:00\n",
"│ creator: {'software_name': 'xradio', 'version': '1.1.3'}\n",
"│ data_groups: {'base': {'correlated_data': 'VISIBILITY', 'date': '20...\n",
"│ observation_info: {'observer': ['TEST_PM'], 'observing_log': '[]', 'proj...\n",
"│ processor_info: {'sub_type': '', 'type': ''}\n",
"│ schema_version: 4.0.0\n",
"│ type: visibility\n",
"├── Group: /gmrt_0/antenna_xds\n",
"│ Dimensions: (antenna_name: 30, cartesian_pos_label: 3,\n",
"│ receptor_label: 2)\n",
"│ Coordinates:\n",
"│ * antenna_name (antenna_name) <U3 360B 'C00' 'C01' ... 'W05' 'W06'\n",
"│ mount (antenna_name) <U6 720B dask.array<chunksize=(30,), meta=np.ndarray>\n",
"│ station_name (antenna_name) <U6 720B dask.array<chunksize=(30,), meta=np.ndarray>\n",
"│ telescope_name (antenna_name) <U4 480B dask.array<chunksize=(30,), meta=np.ndarray>\n",
"│ * cartesian_pos_label (cartesian_pos_label) <U1 12B 'x' 'y' 'z'\n",
"│ * receptor_label (receptor_label) <U5 40B 'pol_0' 'pol_1'\n",
"│ polarization_type (antenna_name, receptor_label) <U1 240B dask.array<chunksize=(30, 2), meta=np.ndarray>\n",
"│ Data variables:\n",
"│ ANTENNA_DISH_DIAMETER (antenna_name) float64 240B dask.array<chunksize=(30,), meta=np.ndarray>\n",
"│ ANTENNA_POSITION (antenna_name, cartesian_pos_label) float64 720B dask.array<chunksize=(30, 3), meta=np.ndarray>\n",
"│ ANTENNA_RECEPTOR_ANGLE (antenna_name, receptor_label) float64 480B dask.array<chunksize=(30, 2), meta=np.ndarray>\n",
"│ Attributes:\n",
"│ overall_telescope_name: GMRT\n",
"│ relocatable_antennas: False\n",
"│ type: antenna\n",
"└── Group: /gmrt_0/field_and_source_base_xds\n",
" Dimensions: (field_name: 1, sky_dir_label: 2,\n",
" line_label: 1)\n",
" Coordinates:\n",
" * field_name (field_name) <U27 108B '3C286_3'\n",
" source_name (field_name) <U27 108B dask.array<chunksize=(1,), meta=np.ndarray>\n",
" * sky_dir_label (sky_dir_label) <U3 24B 'ra' 'dec'\n",
" * line_label (line_label) <U21 84B '0'\n",
" line_name (field_name, line_label) <U1 4B dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
" Data variables:\n",
" FIELD_PHASE_CENTER_DIRECTION (field_name, sky_dir_label) float64 16B dask.array<chunksize=(1, 2), meta=np.ndarray>\n",
" LINE_REST_FREQUENCY (field_name, line_label) float64 8B dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
" LINE_SYSTEMIC_VELOCITY (field_name, line_label) float64 8B dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
" SOURCE_DIRECTION (field_name, sky_dir_label) float64 16B dask.array<chunksize=(1, 2), meta=np.ndarray>\n",
" Attributes:\n",
" type: field_and_source<xarray.DataArray 'frequency' (frequency: 64)> Size: 512B\n",
"array([5.515625e+08, 5.546875e+08, 5.578125e+08, 5.609375e+08, 5.640625e+08,\n",
" 5.671875e+08, 5.703125e+08, 5.734375e+08, 5.765625e+08, 5.796875e+08,\n",
" 5.828125e+08, 5.859375e+08, 5.890625e+08, 5.921875e+08, 5.953125e+08,\n",
" 5.984375e+08, 6.015625e+08, 6.046875e+08, 6.078125e+08, 6.109375e+08,\n",
" 6.140625e+08, 6.171875e+08, 6.203125e+08, 6.234375e+08, 6.265625e+08,\n",
" 6.296875e+08, 6.328125e+08, 6.359375e+08, 6.390625e+08, 6.421875e+08,\n",
" 6.453125e+08, 6.484375e+08, 6.515625e+08, 6.546875e+08, 6.578125e+08,\n",
" 6.609375e+08, 6.640625e+08, 6.671875e+08, 6.703125e+08, 6.734375e+08,\n",
" 6.765625e+08, 6.796875e+08, 6.828125e+08, 6.859375e+08, 6.890625e+08,\n",
" 6.921875e+08, 6.953125e+08, 6.984375e+08, 7.015625e+08, 7.046875e+08,\n",
" 7.078125e+08, 7.109375e+08, 7.140625e+08, 7.171875e+08, 7.203125e+08,\n",
" 7.234375e+08, 7.265625e+08, 7.296875e+08, 7.328125e+08, 7.359375e+08,\n",
" 7.390625e+08, 7.421875e+08, 7.453125e+08, 7.484375e+08])\n",
"Coordinates:\n",
" * frequency (frequency) float64 512B 5.516e+08 5.547e+08 ... 7.484e+08\n",
"Attributes:\n",
" channel_width: {'attrs': {'type': 'quantity', 'units': 'Hz'}, ...\n",
" observer: TOPO\n",
" reference_frequency: {'attrs': {'observer': 'TOPO', 'type': 'spectra...\n",
" spectral_window_intents: ['UNSPECIFIED']\n",
" spectral_window_name: spw_0\n",
" type: spectral_coord\n",
" units: Hz<xarray.DataTree 'antenna_xds'>\n",
"Group: /gmrt_0/antenna_xds\n",
" Dimensions: (time: 2, baseline_id: 435, frequency: 64,\n",
" polarization: 4, uvw_label: 3, antenna_name: 30,\n",
" cartesian_pos_label: 3, receptor_label: 2)\n",
" Coordinates:\n",
" * antenna_name (antenna_name) <U3 360B 'C00' 'C01' ... 'W05' 'W06'\n",
" mount (antenna_name) <U6 720B dask.array<chunksize=(30,), meta=np.ndarray>\n",
" station_name (antenna_name) <U6 720B dask.array<chunksize=(30,), meta=np.ndarray>\n",
" telescope_name (antenna_name) <U4 480B dask.array<chunksize=(30,), meta=np.ndarray>\n",
" * cartesian_pos_label (cartesian_pos_label) <U1 12B 'x' 'y' 'z'\n",
" * receptor_label (receptor_label) <U5 40B 'pol_0' 'pol_1'\n",
" polarization_type (antenna_name, receptor_label) <U1 240B dask.array<chunksize=(30, 2), meta=np.ndarray>\n",
" Inherited coordinates:\n",
" * baseline_id (baseline_id) int64 3kB 0 1 2 3 ... 431 432 433 434\n",
" * frequency (frequency) float64 512B 5.516e+08 ... 7.484e+08\n",
" * polarization (polarization) <U2 32B 'RR' 'RL' 'LR' 'LL'\n",
" * time (time) float64 16B 1.72e+09 1.72e+09\n",
" * uvw_label (uvw_label) <U1 12B 'u' 'v' 'w'\n",
" Data variables:\n",
" ANTENNA_DISH_DIAMETER (antenna_name) float64 240B dask.array<chunksize=(30,), meta=np.ndarray>\n",
" ANTENNA_POSITION (antenna_name, cartesian_pos_label) float64 720B dask.array<chunksize=(30, 3), meta=np.ndarray>\n",
" ANTENNA_RECEPTOR_ANGLE (antenna_name, receptor_label) float64 480B dask.array<chunksize=(30, 2), meta=np.ndarray>\n",
" Attributes:\n",
" overall_telescope_name: GMRT\n",
" relocatable_antennas: False\n",
" type: antenna<xarray.DatasetView> Size: 4kB\n",
"Dimensions: (field_name: 1, sky_dir_label: 2,\n",
" line_label: 1, baseline_id: 435,\n",
" frequency: 64, polarization: 4, time: 2,\n",
" uvw_label: 3)\n",
"Coordinates:\n",
" * field_name (field_name) <U27 108B '3C286_3'\n",
" source_name (field_name) <U27 108B dask.array<chunksize=(1,), meta=np.ndarray>\n",
" * sky_dir_label (sky_dir_label) <U3 24B 'ra' 'dec'\n",
" * line_label (line_label) <U21 84B '0'\n",
" line_name (field_name, line_label) <U1 4B dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
" * baseline_id (baseline_id) int64 3kB 0 1 2 ... 432 433 434\n",
" * frequency (frequency) float64 512B 5.516e+08 ... 7.48...\n",
" * polarization (polarization) <U2 32B 'RR' 'RL' 'LR' 'LL'\n",
" * time (time) float64 16B 1.72e+09 1.72e+09\n",
" * uvw_label (uvw_label) <U1 12B 'u' 'v' 'w'\n",
"Data variables:\n",
" FIELD_PHASE_CENTER_DIRECTION (field_name, sky_dir_label) float64 16B dask.array<chunksize=(1, 2), meta=np.ndarray>\n",
" LINE_REST_FREQUENCY (field_name, line_label) float64 8B dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
" LINE_SYSTEMIC_VELOCITY (field_name, line_label) float64 8B dask.array<chunksize=(1, 1), meta=np.ndarray>\n",
" SOURCE_DIRECTION (field_name, sky_dir_label) float64 16B dask.array<chunksize=(1, 2), meta=np.ndarray>\n",
"Attributes:\n",
" type: field_and_source