Format In-Situ Sequencing Data

The following script formats In-Situ Sequencing data in SpaceTx Format. This is a good example of converting a cropped region from single-plane tiffs.

The data consists of multiple fields of view. There are 4 rounds, each with 4 primary image channels and 1 DAPI stain. There is a 5th round “DO” that contains the “dots” image with all RNA labeled and a DAPI image.

input data structure:

└── parent
    ├── slideA_1_1st_Cy3.5.TIF
    ├── slideA_1_1st_Cy3.TIF
    ├── slideA_1_1st_Cy5.TIF
    ├── slideA_1_1st_DAPI.TIF
    ├── slideA_1_1st_FITC.TIF
    ├── slideA_1_2nd_Cy3.5.TIF
    ├── slideA_1_2nd_Cy3.TIF
    ├── ...

The locations of the data files for use with this script can be found in the s3_bucket variable.

import argparse
import json
import os
from typing import Mapping, Union

import numpy as np
from import imread
from slicedimage import ImageFormat

from starfish.core.util.argparse import FsExistsType
from starfish.experiment.builder import FetchedTile, TileFetcher, write_experiment_json
from starfish.types import Axes, Coordinates, CoordinateValue

class IssCroppedBreastTile(FetchedTile):

    def __init__(
            file_path: str,
    ) -> None:
        self.file_path = file_path
        self._coordinates = coordinates

    def shape(self) -> Mapping[Axes, int]:
        return {Axes.Y: 1044, Axes.X: 1390}

    def coordinates(self) -> Mapping[Union[str, Coordinates], CoordinateValue]:
        return self._coordinates

    def crop(img):
        crp = img[40:1084, 20:1410]
        return crp

    def tile_data(self) -> np.ndarray:
        return self.crop(imread(self.file_path))

class ISSCroppedBreastPrimaryTileFetcher(TileFetcher):
    def __init__(self, input_dir):
        self.input_dir = input_dir
        coordinates = os.path.join(input_dir, "fabricated_test_coordinates.json")
        with open(coordinates) as f:
            self.coordinates_dict = json.load(f)

    def ch_dict(self):
        ch_dict = {0: 'FITC', 1: 'Cy3', 2: 'Cy3 5', 3: 'Cy5'}
        return ch_dict

    def round_dict(self):
        round_str = ['1st', '2nd', '3rd', '4th']
        round_dict = dict(enumerate(round_str))
        return round_dict

    def get_tile(
            self, fov_id: int, round_label: int, ch_label: int, zplane_label: int) -> FetchedTile:

        # get filepath
        fov_ = str(fov_id + 1)
        round_ = self.round_dict[round_label]
        ch_ = self.ch_dict[ch_label]
        filename = f"slideA_{fov_}_{round_}_{ch_}.TIF"
        file_path = os.path.join(self.input_dir, filename)

        # get coordinates
        fov_c_id = f"fov_{fov_id:03d}"
        coordinates = {
            Coordinates.X: self.coordinates_dict[fov_c_id]["xc"],
            Coordinates.Y: self.coordinates_dict[fov_c_id]["yc"],

        return IssCroppedBreastTile(file_path, coordinates)

class ISSCroppedBreastAuxTileFetcher(TileFetcher):
    def __init__(self, input_dir, aux_type):
        self.input_dir = input_dir
        self.aux_type = aux_type
        coordinates = os.path.join(input_dir, "fabricated_test_coordinates.json")
        with open(coordinates) as f:
            self.coordinates_dict = json.load(f)

    def get_tile(
            self, fov_id: int, round_label: int, ch_label: int, zplane_label: int) -> FetchedTile:
        if self.aux_type == 'nuclei':
            filename = 'slideA_{}_DO_DAPI.TIF'.format(str(fov_id + 1))
        elif self.aux_type == 'dots':
            filename = 'slideA_{}_DO_Cy3.TIF'.format(str(fov_id + 1))
            msg = 'invalid aux type: {}'.format(self.aux_type)
            msg += ' expected either nuclei or dots'
            raise ValueError(msg)

        file_path = os.path.join(self.input_dir, filename)

        # get coordinates
        fov_c_id = f"fov_{fov_id:03d}"
        coordinates = {
            Coordinates.X: self.coordinates_dict[fov_c_id]["xc"],
            Coordinates.Y: self.coordinates_dict[fov_c_id]["yc"],

        return IssCroppedBreastTile(file_path, coordinates=coordinates)

def format_data(input_dir, output_dir, num_fov):

    primary_image_dimensions = {
        Axes.ROUND: 4,
        Axes.CH: 4,
        Axes.ZPLANE: 1,

    aux_name_to_dimensions = {
        'nuclei': {
            Axes.ROUND: 1,
            Axes.CH: 1,
            Axes.ZPLANE: 1,
        'dots': {
            Axes.ROUND: 1,
            Axes.CH: 1,
            Axes.ZPLANE: 1,

            'nuclei': ISSCroppedBreastAuxTileFetcher(input_dir, 'nuclei'),
            'dots': ISSCroppedBreastAuxTileFetcher(input_dir, 'dots'),

if __name__ == "__main__":
    This TileFetcher should be run on data found at:

    The data produced by this TileFetcher have been uploaded and can be found at the following

    s3_bucket = "s3://"
    input_help_msg = "Path to raw data. Raw data can be downloaded from: {}".format(s3_bucket)
    output_help_msg = "Path to output experment.json and all formatted images it references"
    fov_help_msg = "The number of fovs that should be extracted from the directory"
    parser = argparse.ArgumentParser()
    parser.add_argument("input_dir", type=FsExistsType(), help=input_help_msg)
    parser.add_argument("output_dir", type=FsExistsType(), help=output_help_msg)
    parser.add_argument("num_fov", type=int, help=fov_help_msg)

    args = parser.parse_args()
    format_data(args.input_dir, args.output_dir, args.num_fov)

Gallery generated by Sphinx-Gallery