diff --git a/xicorr.ipynb b/xicorr.ipynb new file mode 100644 index 0000000..d55dd24 --- /dev/null +++ b/xicorr.ipynb @@ -0,0 +1,134 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "xicorr.ipynb", + "provenance": [], + "collapsed_sections": [], + "authorship_tag": "ABX9TyPUlsT6KuGfIwhEKgliVO+J", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "\n", + "import seaborn as sns; sns.set()" + ], + "metadata": { + "id": "-4vX69aFhGlb" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "-1bLlqB6hDjQ" + }, + "outputs": [], + "source": [ + "\n", + "def xicorr(X,Y):\n", + " n = X.size\n", + " xi = np.argsort(X,kind='quicksort')\n", + " Y = Y[xi]\n", + " _,b,c = np.unique(Y,return_counts=True,return_inverse=True)\n", + " r = np.cumsum(c)[b]\n", + " _,b,c = np.unique(-Y,return_counts=True,return_inverse=True)\n", + " l = np.cumsum(c)[b]\n", + " return 1 - n*np.abs(np.diff(r)).sum() / (2*(l*(n-l)).sum())\n", + "\n", + "\n", + "def xicorr2(x: 'ArrayLike', y: 'ArrayLike') -> float:\n", + " \"\"\"xi correlation coefficient (alternative implementation)\n", + "\n", + " Written by github/atarashansky, modified by github/escherba\n", + " https://github.com/czbiohub/xicor/issues/17#issue-965635013\n", + " \"\"\"\n", + " x = np.asarray(x)\n", + " y = np.asarray(y)\n", + " n = x.size\n", + " assert y.size == n, \"arrays must be of the same size\"\n", + " y = y[np.argsort(x, kind='quicksort')]\n", + " _, inverse, counts = np.unique(y, return_inverse=True, return_counts=True)\n", + " right = np.cumsum(counts)[inverse]\n", + " left = np.cumsum(np.flip(counts))[(counts.size - 1) - inverse]\n", + " return 1. - 0.5 * float(np.abs(np.diff(right)).sum() / np.mean(left * (n - left)))\n", + "\n", + "from scipy.stats import rankdata\n", + "\n", + "def xicorr_orig(x: 'ArrayLike', y: 'ArrayLike') -> float:\n", + " \"\"\"Original R implementation of Xi translated into Python\n", + "\n", + " R implementation:\n", + " https://github.com/cran/XICOR/blob/master/R/calculateXI.R\n", + " \"\"\"\n", + " x = np.asarray(x)\n", + " y = np.asarray(y)\n", + " n = x.size\n", + " assert y.size == n, \"arrays must be of the same size\"\n", + " PI = rankdata(x, method=\"average\")\n", + " fr = rankdata(y, method=\"average\")\n", + " CU = np.mean((float(n + 1) - fr) * (fr - 1.))\n", + " A1 = np.abs(np.diff(fr[np.argsort(PI, kind=\"quicksort\")])).sum()\n", + " return 1. - 0.5 * float(A1 / CU)" + ] + }, + { + "cell_type": "code", + "source": [ + "x = np.random.rand(125) + 2\n", + "y = x**2 + x + np.sqrt(x) + 12 + np.random.randn(125)\n", + "\n", + "from scipy.stats import pearsonr\n", + "\n", + "xicorr(x, y), xicorr2(x, y), xicorr_orig(x, y), pearsonr(x, y)[0]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_lUdB0FxhSGw", + "outputId": "3cc8fd00-b880-48ba-82c5-6bd40e03f85c" + }, + "execution_count": 84, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(0.5468509984639016,\n", + " 0.5468509984639016,\n", + " 0.5468509984639016,\n", + " 0.8923135014900104)" + ] + }, + "metadata": {}, + "execution_count": 84 + } + ] + } + ] +} \ No newline at end of file