Newer
Older
notebooks / metapath2vec.ipynb
Morteza Ansarinia on 3 Nov 2021 16 KB add spektral general_gnn (SOTA)
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %conda activate py38\n",
    "# %pip install stellargraph chardet spektral -Uq"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2021-11-03 03:47:25.594278: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
     ]
    }
   ],
   "source": [
    "%reload_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import stellargraph as sg\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "StellarGraph: Undirected multigraph\n",
      " Nodes: 6, Edges: 5\n",
      "\n",
      " Node types:\n",
      "  task: [3]\n",
      "    Features: float32 vector, length 1\n",
      "    Edge types: task-default->construct, task-default->task\n",
      "  construct: [3]\n",
      "    Features: float32 vector, length 1\n",
      "    Edge types: construct-default->construct, construct-default->task\n",
      "\n",
      " Edge types:\n",
      "    task-default->task: [2]\n",
      "        Weights: range=[1, 2], mean=1.5, std=0.707107\n",
      "        Features: none\n",
      "    construct-default->construct: [2]\n",
      "        Weights: range=[10, 12], mean=11, std=1.41421\n",
      "        Features: none\n",
      "    construct-default->task: [1]\n",
      "        Weights: all 10\n",
      "        Features: none\n"
     ]
    }
   ],
   "source": [
    "tasks = pd.DataFrame(\n",
    "  [10,20,30],\n",
    "  index = ['t1','t2','t3']\n",
    ")\n",
    "\n",
    "constructs = pd.DataFrame(\n",
    "  [50,60,70],\n",
    "  index = ['c1','c2','c3']\n",
    ")\n",
    "\n",
    "edges = pd.DataFrame(\n",
    "    {'source': ['t1', 't2', 'c1', 'c2', 't1'],\n",
    "     'target': ['t2', 't3', 'c2', 'c1', 'c1'],\n",
    "     'weight': [1, 2, 10, 12, 10]},\n",
    ")\n",
    "\n",
    "G = sg.StellarGraph({'task': tasks, 'construct': constructs}, edges=edges)\n",
    "print(G.info())\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of random walks: {len(walks)}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[['c1', 'c2', 'c1', 'c2', 'c1'],\n",
       " ['c2', 'c1', 'c2', 'c1', 'c2'],\n",
       " ['c3'],\n",
       " ['t1', 'c1', 't1', 'c1', 't1'],\n",
       " ['t1', 'c1', 'c2'],\n",
       " ['t2'],\n",
       " ['t2'],\n",
       " ['t3'],\n",
       " ['t3']]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# metapath2vec\n",
    "\n",
    "rnd_walk_length = 5  # maximum length of a random walk\n",
    "\n",
    "# metapath schemas as a list of lists of node types.\n",
    "metapaths = [\n",
    "    ['task', 'construct', 'task'],\n",
    "    ['construct', 'construct'],\n",
    "    ['task', 'construct', 'construct', 'task'],\n",
    "]\n",
    "\n",
    "\n",
    "from stellargraph.data import UniformRandomMetaPathWalk\n",
    "\n",
    "rw = UniformRandomMetaPathWalk(G)\n",
    "\n",
    "walks = rw.run(\n",
    "    nodes=list(G.nodes()),  # root nodes\n",
    "    length=rnd_walk_length,  # maximum length of a random walk\n",
    "    n=1,  # number of random walks per root node\n",
    "    metapaths=metapaths,  # the metapaths\n",
    ")\n",
    "\n",
    "print('Number of random walks: {len(walks)}')\n",
    "walks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "shape: (6, 128)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[('c1', 0.0630912259221077),\n",
       " ('t2', 0.02670864760875702),\n",
       " ('t3', -0.016970207914710045),\n",
       " ('c2', -0.07985257357358932),\n",
       " ('c3', -0.15879441797733307)]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from gensim.models import Word2Vec\n",
    "\n",
    "model = Word2Vec(walks, vector_size=128, window=5, min_count=0, sg=1, workers=2, epochs=1)\n",
    "print('shape:', model.wv.vectors.shape)\n",
    "model.wv.most_similar('t1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/Caskroom/miniforge/base/envs/py38/lib/python3.8/site-packages/sklearn/manifold/_t_sne.py:819: FutureWarning: 'square_distances' has been introduced in 0.24 to help phase out legacy squaring behavior. The 'legacy' setting will be removed in 1.1 (renaming of 0.26), and the default setting will be changed to True. In 1.3, 'square_distances' will be removed altogether, and distances will be squared by default. Set 'square_distances'=True to silence this warning.\n",
      "  warnings.warn(\n",
      "/usr/local/Caskroom/miniforge/base/envs/py38/lib/python3.8/site-packages/sklearn/manifold/_t_sne.py:982: FutureWarning: The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "data": {
      "image/png": "",
      "text/plain": [
       "<Figure size 360x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# viz\n",
    "\n",
    "node_embeddings = model.wv.vectors\n",
    "node_types = [G.node_type(node_id) for node_id in model.wv.index_to_key]\n",
    "\n",
    "from sklearn.manifold import TSNE\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "node_embeddings_2d = TSNE(n_components=2, learning_rate='auto', init='pca', metric='cosine').fit_transform(node_embeddings)\n",
    "node_embeddings_2d\n",
    "\n",
    "label_map = {l: i for i, l in enumerate(np.unique(node_types))}\n",
    "node_colors = [label_map[t] for t in node_types]\n",
    "\n",
    "plt.figure(figsize=(5, 5))\n",
    "plt.axes().set(aspect=\"equal\")\n",
    "plt.scatter(node_embeddings_2d[:, 0], node_embeddings_2d[:, 1], c=node_colors, alpha=0.3)\n",
    "plt.title('2d projection of the node embeddings')\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "4bc0b60e31b0a7441a2e25ec194980ab62b84e91a65eae3f6b114824d9f7dec0"
  },
  "kernelspec": {
   "display_name": "Python 3.8.12 64-bit ('py38': conda)",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}