{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\nVisualization and analysis of gene expression in tissue sections by spatial transcriptomics\n===========================================================================================\n\nPatrik L. St\u00e5hl, Fredrik Salm\u00e9n, Sanja Vickovic, Anna Lundmark, Jos\u00e9 Fern\u00e1ndez Navarro, Jens Magnusson,\nStefania Giacomello, Michaela Asp, Jakub O. Westholm4, Mikael Huss4, Annelie Mollbrink2,\nSten Linnarsson, Simone Codeluppi, \u00c5ke Borg, Fredrik Pont\u00e9n, Paul Igor Costea, Pelin Sahl\u00e9n,\nJan Mulder, Olaf Bergmann, Joakim Lundeberg, Jonas Fris\u00e9n\n\nthis publication can be found at https://science.sciencemag.org/content/353/6294/78.long and the\ndata referenced below can be downloaded from\nhttps://www.spatialresearch.org/resources-published-datasets/doi-10-1126science-aaf2403/\n\nchecklist:\n- [x] point locations\n- [x] cell locations (NA)\n- [x] cell x gene expression matrix (NA)\n\nload the data\n-------------\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "from io import BytesIO\n\nimport dask.array as da\nimport numpy as np\nimport pandas as pd\nimport requests\nfrom skimage.transform import matrix_transform\n\nimport starspace\nfrom starspace.constants import *\n\nresponse = requests.get(\n    \"https://d24h2xsgaj29mf.cloudfront.net/raw/spatial_transcriptomics_stahl_2016/\"\n    \"Layer1_BC_count_matrix-1.tsv\"\n)\ndata = pd.read_csv(BytesIO(response.content), sep='\\t', index_col=0)\n\nattributes = {\n    REQUIRED_ATTRIBUTES.AUTHORS: (\n        \"Patrik L. St\u00e5hl\", \"Fredrik Salm\u00e9n\", \"Sanja Vickovic\", \"Anna Lundmark\",\n        \"Jos\u00e9 Fern\u00e1ndez Navarro\", \"Jens Magnusson\", \"Stefania Giacomello\", \"Michaela Asp\",\n        \"Jakub O. Westholm\", \"Mikael Huss\", \"Annelie Mollbrink\", \"Sten Linnarsson\",\n        \"Simone Codeluppi\", \"\u00c5ke Borg\", \"Fredrik Pont\u00e9n\", \"Paul Igor Costea\", \"Pelin Sahl\u00e9n\",\n        \"Jan Mulder\", \"Olaf Bergmann\", \"Joakim Lundeberg\", \"Jonas Fris\u00e9n\"\n    ),\n    REQUIRED_ATTRIBUTES.YEAR: 2016,\n    REQUIRED_ATTRIBUTES.SAMPLE_TYPE: \"prostate cancer\",\n    REQUIRED_ATTRIBUTES.ORGANISM: \"human\",\n    REQUIRED_ATTRIBUTES.ASSAY: ASSAYS.SPATIAL_TRANSCRIPTOMICS.value,\n    OPTIONAL_ATTRIBUTES.PUBLICATION_NAME: (\n        \"Visualization and analysis of gene expression in tissue sections by spatial \"\n        \"transcriptomics\"\n    ),\n    OPTIONAL_ATTRIBUTES.PUBLICATION_URL: \"https://science.sciencemag.org/content/353/6294/78.long\"\n}\n# convert the spots data\n# cells maybe need a radius?\n\n# transform coordinates\nresponse = requests.get(\n    \"https://d24h2xsgaj29mf.cloudfront.net/raw/spatial_transcriptomics_stahl_2016/\"\n    \"Layer1_BC_transformation.txt\"\n)\ntransform = np.array([float(v) for v in response.content.decode().strip().split()]).reshape(3, 3).T\n\nx, y = zip(*[map(float, v.split('x')) for v in data.index])\n\nxy = np.hstack([\n    np.array(x)[:, None],\n    np.array(y)[:, None],\n])\n\ntransformed = matrix_transform(xy, transform)\n\ndims = (MATRIX_AXES.REGIONS.value, MATRIX_AXES.FEATURES.value)\ncoords = {\n    MATRIX_REQUIRED_REGIONS.REGION_ID: (MATRIX_AXES.REGIONS, np.arange(data.shape[0])),\n    MATRIX_REQUIRED_REGIONS.X_REGION: (MATRIX_AXES.REGIONS, transformed[:, 0]),\n    MATRIX_REQUIRED_REGIONS.Y_REGION: (MATRIX_AXES.REGIONS, transformed[:, 1]),\n    MATRIX_REQUIRED_FEATURES.GENE_NAME: (MATRIX_AXES.FEATURES, data.columns)\n}\ndata = da.from_array(data.values, chunks=MATRIX_CHUNK_SIZE)\n\nmatrix = starspace.Matrix.from_expression_data(\n    data=data, coords=coords, dims=dims, name=\"matrix\", attrs=attributes\n)\nurl = \"spatial-transcriptomics-stahl-2016-science-prostate-cancer\"\nmatrix.save_zarr(url=url)"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}