{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\nHigh-throughput single-cell gene-expression profiling with multiplexed error-robust fluorescence in situ hybridization\n======================================================================================================================\n\nJeffrey R. Moffitt, Junjie Hao, Guiping Wang, Kok Hao Chen, Hazen P. Babcock, Xiaowei Zhuang\n\nThis publication can be found at https://www.pnas.org/content/113/39/11046 and the\ndata referenced below can be downloaded from\ns3://starfish.data.published/MERFISH/20181005/starfish_results/published_MERFISH_decoded_results.csv\n\nChecklist:\n- [x] point locations\n- [ ] cell locations\n- [ ] cell x gene expression matrix (derivable)\n\nThis file converts point locations constructed with a starfish pipeline that has 99.7%\ncorrespondence to Jeff Moffit's original matlab processing of these same data. Minor deviations\nare the result of numerical differences in deconvolution algorithms between matlab and python.\n\nLoad the data\n-------------\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "from io import BytesIO\n\nimport numpy as np\nimport pandas as pd\nimport requests\n\nimport starspace\nfrom starspace.constants import *\n\nresponse = requests.get(\n    \"https://d24h2xsgaj29mf.cloudfront.net/raw/merfish_moffit_2016_pnas_u2-os/\"\n    \"published_MERFISH_decoded_results.csv\"\n)\n\ndata = pd.read_csv(BytesIO(response.content), index_col=0)\n\n# convert distance to quality; we'll map the name to quality below\ndata['distance'] = 1 - data['distance']\n\n# drop the passes_thresholds column, this data has been conditioned on that previously\nassert np.all(data['passes_thresholds'])\ndata = data.drop('passes_thresholds', axis=1)\n\n# drop z_spot, it's not informative\nassert np.allclose(data['zc'], 0.0005)\ndata = data.drop('zc', axis=1)\n\ncolumn_map = {\n    'radius': SPOTS_OPTIONAL_VARIABLES.RADIUS.value,\n    'target': SPOTS_REQUIRED_VARIABLES.GENE_NAME.value,\n    'distance': SPOTS_OPTIONAL_VARIABLES.QUALITY.value,\n    'xc': SPOTS_REQUIRED_VARIABLES.X_SPOT.value,\n    'yc': SPOTS_REQUIRED_VARIABLES.Y_SPOT.value\n}\n\ncolumns = [column_map[c] for c in data.columns]\ndata.columns = columns\n\nattributes = {\n    REQUIRED_ATTRIBUTES.ORGANISM: \"human\",\n    REQUIRED_ATTRIBUTES.ASSAY: ASSAYS.MERFISH.value,\n    REQUIRED_ATTRIBUTES.YEAR: 2016,\n    REQUIRED_ATTRIBUTES.AUTHORS: [\n        \"Jeffrey R. Moffitt\", \"Junjie Hao\", \"Guiping Wang\", \"Kok Hao Chen\", \"Hazen P. Babcock\",\n        \"Xiaowei Zhuang\"\n    ],\n    REQUIRED_ATTRIBUTES.SAMPLE_TYPE: \"osteosarcoma (bone, epithelial) cell line\",\n    OPTIONAL_ATTRIBUTES.PUBLICATION_NAME: (\n        \"High-throughput single-cell gene-expression profiling with multiplexed error-robust \"\n        \"fluorescence in situ hybridization\"\n    ),\n    OPTIONAL_ATTRIBUTES.PUBLICATION_URL: \"https://www.pnas.org/content/113/39/11046\"\n}\n\nspots = starspace.Spots.from_spot_data(data, attributes)\ns3_url = \"s3://starfish.data.output-warehouse/merfish-moffit-2016-pnas-u2os/\"\nurl = \"merfish-moffit-2016-pnas-u2os/\"\nspots.save_zarr(url)"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}