Newer
Older
notebooks / xicorr.ipynb
Morteza Ansarinia on 21 Feb 2022 4 KB Created using Colaboratory
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "xicorr.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyPUlsT6KuGfIwhEKgliVO+J",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/morteza/notebooks/blob/master/xicorr.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "\n",
        "import seaborn as sns; sns.set()"
      ],
      "metadata": {
        "id": "-4vX69aFhGlb"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "-1bLlqB6hDjQ"
      },
      "outputs": [],
      "source": [
        "\n",
        "def xicorr(X,Y):\n",
        "    n = X.size\n",
        "    xi = np.argsort(X,kind='quicksort')\n",
        "    Y = Y[xi]\n",
        "    _,b,c = np.unique(Y,return_counts=True,return_inverse=True)\n",
        "    r = np.cumsum(c)[b]\n",
        "    _,b,c = np.unique(-Y,return_counts=True,return_inverse=True)\n",
        "    l = np.cumsum(c)[b]\n",
        "    return 1 - n*np.abs(np.diff(r)).sum() / (2*(l*(n-l)).sum())\n",
        "\n",
        "\n",
        "def xicorr2(x: 'ArrayLike', y: 'ArrayLike') -> float:\n",
        "    \"\"\"xi correlation coefficient (alternative implementation)\n",
        "\n",
        "    Written by github/atarashansky, modified by github/escherba\n",
        "    https://github.com/czbiohub/xicor/issues/17#issue-965635013\n",
        "    \"\"\"\n",
        "    x = np.asarray(x)\n",
        "    y = np.asarray(y)\n",
        "    n = x.size\n",
        "    assert y.size == n, \"arrays must be of the same size\"\n",
        "    y = y[np.argsort(x, kind='quicksort')]\n",
        "    _, inverse, counts = np.unique(y, return_inverse=True, return_counts=True)\n",
        "    right = np.cumsum(counts)[inverse]\n",
        "    left = np.cumsum(np.flip(counts))[(counts.size - 1) - inverse]\n",
        "    return 1. - 0.5 * float(np.abs(np.diff(right)).sum() / np.mean(left * (n - left)))\n",
        "\n",
        "from scipy.stats import rankdata\n",
        "\n",
        "def xicorr_orig(x: 'ArrayLike', y: 'ArrayLike') -> float:\n",
        "    \"\"\"Original R implementation of Xi translated into Python\n",
        "\n",
        "    R implementation:\n",
        "    https://github.com/cran/XICOR/blob/master/R/calculateXI.R\n",
        "    \"\"\"\n",
        "    x = np.asarray(x)\n",
        "    y = np.asarray(y)\n",
        "    n = x.size\n",
        "    assert y.size == n, \"arrays must be of the same size\"\n",
        "    PI = rankdata(x, method=\"average\")\n",
        "    fr = rankdata(y, method=\"average\")\n",
        "    CU = np.mean((float(n + 1) - fr) * (fr - 1.))\n",
        "    A1 = np.abs(np.diff(fr[np.argsort(PI, kind=\"quicksort\")])).sum()\n",
        "    return 1. - 0.5 * float(A1 / CU)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "x = np.random.rand(125) + 2\n",
        "y = x**2 + x + np.sqrt(x) + 12 + np.random.randn(125)\n",
        "\n",
        "from scipy.stats import pearsonr\n",
        "\n",
        "xicorr(x, y), xicorr2(x, y), xicorr_orig(x, y), pearsonr(x, y)[0]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_lUdB0FxhSGw",
        "outputId": "3cc8fd00-b880-48ba-82c5-6bd40e03f85c"
      },
      "execution_count": 84,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(0.5468509984639016,\n",
              " 0.5468509984639016,\n",
              " 0.5468509984639016,\n",
              " 0.8923135014900104)"
            ]
          },
          "metadata": {},
          "execution_count": 84
        }
      ]
    }
  ]
}