{ "cells": [ { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "# requirements:\n", "\n", "%pip install -q scikit-dimension pandas numba matplotlib" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data shape: (32, 100)\n", "estimated IDs (DANCo, lPCA): 6.6024110947192325 5.0\n" ] } ], "source": [ "import skdim\n", "import numpy as np\n", "\n", "#generate data : np.array (n_points x n_dim). Here a uniformly sampled 5-ball embedded in 10 dimensions\n", "n_rows = 32\n", "n_features = 100\n", "n_components = 5\n", "\n", "data = np.zeros((n_rows, n_features))\n", "\n", "data[:,:n_components] = skdim.datasets.hyperBall(n=n_rows, d=n_components, radius=2)\n", "\n", "# add some correlation\n", "data[:,8] = data[:, 1] + np.random.rand(n_rows)\n", "data[:,12] = data[:, 1] + np.random.rand(n_rows)\n", "data[:,20] = data[:, 2] + np.random.rand(n_rows)\n", "\n", "print('data shape:', data.shape)\n", "\n", "#estimate global intrinsic dimension\n", "danco = skdim.id.DANCo().fit(data)\n", "\n", "#estimate local intrinsic dimension (dimension in k-nearest-neighborhoods around each point):\n", "lpca = skdim.id.lPCA().fit_pw(data,\n", " n_neighbors=n_rows-1,\n", " n_jobs=1)\n", "\n", "#get estimated intrinsic dimension\n", "print('estimated IDs (DANCo, lPCA):', danco.dimension_, np.mean(lpca.dimension_pw_))" ] } ], "metadata": { "interpreter": { "hash": "4d4c55ad0dd25f9ca95e4d49a929aa3f71bfb37020ae570a9996c3e164818202" }, "kernelspec": { "display_name": "Python 3.9.9 ('py3')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }