diff --git a/.vscode/settings.json b/.vscode/settings.json index 6e379b8..8566722 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,4 @@ { - "python.pythonPath": "/Users/morteza/miniconda3/envs/py3/bin/python", + "python.pythonPath": "/usr/local/Caskroom/miniforge/base/envs/py3/bin/python", "python.condaPath": "~/miniconda3/bin/conda" } \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 6e379b8..8566722 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,4 +1,4 @@ { - "python.pythonPath": "/Users/morteza/miniconda3/envs/py3/bin/python", + "python.pythonPath": "/usr/local/Caskroom/miniforge/base/envs/py3/bin/python", "python.condaPath": "~/miniconda3/bin/conda" } \ No newline at end of file diff --git a/sentence_transformer_pg.py b/sentence_transformer_pg.py new file mode 100644 index 0000000..99d5fa5 --- /dev/null +++ b/sentence_transformer_pg.py @@ -0,0 +1,43 @@ +# %% +!pip install -Uq sentence-transformers hdbscan + +# %% +from hdbscan.hdbscan_ import hdbscan +import numpy as np +from sentence_transformers import SentenceTransformer +import seaborn as sns +import matplotlib.pyplot as plt +from umap import UMAP + +import hdbscan + + +model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu') + +embeddings = model.encode([ + 'this is about a cat', + 'but this one is not about animals', + 'this is about a dog', + 'but this one is about animals', + 'this one is about humans', + 'this one is about queen', + 'this one is about king' +]) + + +clusterer = hdbscan.HDBSCAN(2, prediction_data=True).fit(embeddings) + +clusterer.labels_, clusterer.probabilities_ + +embeddings_2d = UMAP().fit_transform(embeddings) + +palette = sns.color_palette('deep', 8) + +colors = [sns.desaturate(palette[col], sat) + for col, sat in zip(clusterer.labels_, + clusterer.probabilities_)] + +plt.scatter(embeddings_2d.T[0], embeddings_2d.T[1], c=colors) + + +hdbscan.prediction.all_points_membership_vectors(clusterer) \ No newline at end of file