From 7b5cf999bfc5ee0dc6a966194879e47db6319063 Mon Sep 17 00:00:00 2001
From: Alexander Dunkel <alexander.dunkel@tu-dresden.de>
Date: Fri, 12 Jan 2024 09:54:11 +0100
Subject: [PATCH] Update artifacts

---
 notebooks/04_topic_classification.ipynb | 159 +++++++++++++++++-------
 py/_04_topic_classification.py          |  34 +++--
 2 files changed, 139 insertions(+), 54 deletions(-)

diff --git a/notebooks/04_topic_classification.ipynb b/notebooks/04_topic_classification.ipynb
index 17098b1..147bb4e 100644
--- a/notebooks/04_topic_classification.ipynb
+++ b/notebooks/04_topic_classification.ipynb
@@ -7,14 +7,19 @@
     "<div style=\"width: 100%;display: flex; align-items: top;\">\n",
     "    <div style=\"float:left;width: 80%;text-align:left;position:relative\">\n",
     "        <h1>Part 4: Topic Classification of Social Media</h1>\n",
-    "        <p><strong>Workshop: Social Media, Data Analysis, &amp; Cartograpy, WS 2022/23</strong><p>\n",
-    "            <p><em>Madalina Gugulica, <a href=\"mailto:alexander.dunkel@tu-dresden.de\">Alexander Dunkel</a>, Institute of Cartography, TU Dresden</em><br><img src=\"https://kartographie.geo.tu-dresden.de/python_datascience_course/version.svg\" style=\"float:left\"></p></div>\n",
-    "    <div style=\"float:right\">\n",
-    "    <img src=\"https://kartographie.geo.tu-dresden.de/python_datascience_course/TU_Dresden_Logo_blau_HKS41.svg\" style=\"position:relative;width:256px;margin-top:0px;margin-right:10px\"/>\n",
+    "        <p><strong>Workshop: Social Media, Data Analysis, &amp; Cartograpy, WS 2023/24</strong><p>\n",
+    "            <p><em><a href=\"mailto:madalina.gugulica@tu-dresden.de\">Madalina Gugulica</a>, Institute of Cartography, TU Dresden</em>\n",
+    "    <p><em><a href=\"mailto:alexander.dunkel@tu-dresden.de\">Alexander Dunkel</a>\n",
+    "            <br> Leibniz Institute of Ecological Urban and Regional Development, \n",
+    "        Transformative Capacities & Research Data Centre & TU Dresden, \n",
+    "        Institute of Cartography</em></p><br><img src=\"https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/version.svg\" style=\"float:left\"></p></div>\n",
+    "    <div style=\"float: right;\">\n",
+    "    <div style=\"width:300px\">\n",
+    "    <img src=\"https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/FDZ-Logo_DE_RGB-blk_bg-tra_mgn-full_h200px_web.svg\" style=\"position:relative;width:256px;margin-top:0px;margin-right:10px;clear: both;\"/>\n",
+    "    <img  src=\"https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/TU_Dresden_Logo_blau_HKS41.svg\" style=\"position:relative;width:256px;margin-top:0px;margin-right:10px;clear: both;\"/>\n",
     "    </div>\n",
-    "</div>\n",
-    "\n",
-    "<img src=\"https://ad.vgiscience.org/mobile_cart_workshop2020/img_topics.png\" style=\"width:500px;text-align:left;position:relative;float:left\">"
+    "    </div>\n",
+    "</div>"
    ]
   },
   {
@@ -46,9 +51,18 @@
    "source": [
     "<div class=\"alert alert-warning\" role=\"alert\" style=\"color: black;\">\n",
     "    <ul>\n",
-    "        <li>Please make sure that <strong>\"04_topics_env\"</strong> is shown on the \n",
+    "        <li>For this notebook, please make sure that <code>04_topics_env</code> is shown on the \n",
     "            <strong>top-right corner</strong>. If not, click & select.</li>\n",
     "    </ul>\n",
+    "        <details style=\"margin-left: 1em;\"><summary style=\"cursor: pointer;\"><strong>Link the environment for this notebook, if not already done.</strong></summary>Use this command in a notebook cell:\n",
+    "<pre><code>\n",
+    "!/projects/p_lv_mobicart_2324/topics_env/bin/python \\\n",
+    "    -m ipykernel install \\\n",
+    "    --user \\\n",
+    "    --name topics_env \\\n",
+    "    --display-name=\"04_topics_env\"\n",
+    "</code></pre>\n",
+    "</details>\n",
     "</div>"
    ]
   },
@@ -122,7 +136,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "import pandas as pd\n",
@@ -147,7 +163,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "from pathlib import Path\n",
@@ -178,7 +196,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "%load_ext autoreload\n",
@@ -195,7 +215,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "import sys\n",
@@ -211,7 +233,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "source = \"topic_data.zip\""
@@ -227,7 +251,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "%%time\n",
@@ -250,7 +276,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "from gensim import utils\n",
@@ -269,7 +297,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "#idf-scores dictionary deserialization\n",
@@ -289,7 +319,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "def avg_topic_vector(lang_model, tokens_list):\n",
@@ -349,7 +381,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "filename = \"DD_Neustadt_NormalizedInstagramPosts.pickle\"\n",
@@ -360,7 +394,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "df.head()"
@@ -410,7 +446,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "topic_list = ['event','music','festival','concert']"
@@ -435,7 +473,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "enhanced_list = []\n",
@@ -455,7 +495,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "topic_list = topic_list + enhanced_list\n",
@@ -479,7 +521,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "topic_embedding = avg_topic_vector(model_w2v, topic_list)"
@@ -500,7 +544,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "import matplotlib.pyplot as plt\n",
@@ -529,7 +575,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "%%time\n",
@@ -568,7 +616,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "df_classified = df[df['classification'] == 1]\n",
@@ -592,7 +642,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "import geopandas as gp\n",
@@ -619,7 +671,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "df_classified.reset_index()"
@@ -628,7 +682,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "gdf = gp.GeoDataFrame(\n",
@@ -638,7 +694,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "CRS_PROJ = \"epsg:3857\" # Web Mercator\n",
@@ -657,7 +715,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "gdf.head()"
@@ -666,7 +726,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "x = gdf.loc[gdf.first_valid_index()].geometry.x\n",
@@ -692,7 +754,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "posts_layer = gv.Points(\n",
@@ -705,7 +769,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "from bokeh.models import HoverTool\n",
@@ -726,7 +792,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "def set_active_tool(plot, element):\n",
@@ -759,7 +827,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "gv_layers.opts(\n",
@@ -798,7 +868,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "gv_layers.opts(\n",
@@ -824,7 +896,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "!jupyter nbconvert --to html \\\n",
@@ -842,7 +916,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "tools.clean_folders(\n",
@@ -869,9 +945,6 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "jupyter": {
-     "source_hidden": true
-    },
     "tags": []
    },
    "outputs": [],
@@ -898,7 +971,7 @@
   "kernelspec": {
    "display_name": "04_topics_env",
    "language": "python",
-   "name": "topics_env"
+   "name": "topic_env"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/py/_04_topic_classification.py b/py/_04_topic_classification.py
index 816edc9..3597e17 100644
--- a/py/_04_topic_classification.py
+++ b/py/_04_topic_classification.py
@@ -6,24 +6,29 @@
 #       extension: .py
 #       format_name: light
 #       format_version: '1.5'
-#       jupytext_version: 1.14.4
+#       jupytext_version: 1.14.5
 #   kernelspec:
 #     display_name: 04_topics_env
 #     language: python
-#     name: topics_env
+#     name: topic_env
 # ---
 
 # <div style="width: 100%;display: flex; align-items: top;">
 #     <div style="float:left;width: 80%;text-align:left;position:relative">
 #         <h1>Part 4: Topic Classification of Social Media</h1>
-#         <p><strong>Workshop: Social Media, Data Analysis, &amp; Cartograpy, WS 2022/23</strong><p>
-#             <p><em>Madalina Gugulica, <a href="mailto:alexander.dunkel@tu-dresden.de">Alexander Dunkel</a>, Institute of Cartography, TU Dresden</em><br><img src="https://kartographie.geo.tu-dresden.de/python_datascience_course/version.svg" style="float:left"></p></div>
-#     <div style="float:right">
-#     <img src="https://kartographie.geo.tu-dresden.de/python_datascience_course/TU_Dresden_Logo_blau_HKS41.svg" style="position:relative;width:256px;margin-top:0px;margin-right:10px"/>
+#         <p><strong>Workshop: Social Media, Data Analysis, &amp; Cartograpy, WS 2023/24</strong><p>
+#             <p><em><a href="mailto:madalina.gugulica@tu-dresden.de">Madalina Gugulica</a>, Institute of Cartography, TU Dresden</em>
+#     <p><em><a href="mailto:alexander.dunkel@tu-dresden.de">Alexander Dunkel</a>
+#             <br> Leibniz Institute of Ecological Urban and Regional Development, 
+#         Transformative Capacities & Research Data Centre & TU Dresden, 
+#         Institute of Cartography</em></p><br><img src="https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/version.svg" style="float:left"></p></div>
+#     <div style="float: right;">
+#     <div style="width:300px">
+#     <img src="https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/FDZ-Logo_DE_RGB-blk_bg-tra_mgn-full_h200px_web.svg" style="position:relative;width:256px;margin-top:0px;margin-right:10px;clear: both;"/>
+#     <img  src="https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/TU_Dresden_Logo_blau_HKS41.svg" style="position:relative;width:256px;margin-top:0px;margin-right:10px;clear: both;"/>
+#     </div>
 #     </div>
 # </div>
-#
-# <img src="https://ad.vgiscience.org/mobile_cart_workshop2020/img_topics.png" style="width:500px;text-align:left;position:relative;float:left">
 
 # This is the fourth notebook in a series of four notebooks:
 #     
@@ -40,9 +45,18 @@
 
 # <div class="alert alert-warning" role="alert" style="color: black;">
 #     <ul>
-#         <li>Please make sure that <strong>"04_topics_env"</strong> is shown on the 
+#         <li>For this notebook, please make sure that <code>04_topics_env</code> is shown on the 
 #             <strong>top-right corner</strong>. If not, click & select.</li>
 #     </ul>
+#         <details style="margin-left: 1em;"><summary style="cursor: pointer;"><strong>Link the environment for this notebook, if not already done.</strong></summary>Use this command in a notebook cell:
+# <pre><code>
+# # !/projects/p_lv_mobicart_2324/topics_env/bin/python \
+# #     -m ipykernel install \
+# #     --user \
+# #     --name topics_env \
+# #     --display-name="04_topics_env"
+# </code></pre>
+# </details>
 # </div>
 
 # <div class="alert alert-success" role="alert" style="color: black;">
@@ -508,12 +522,10 @@ tools.clean_folders(
 #         
 # </div>
 
-# + jupyter={"source_hidden": true} tags=[]
 root_packages = [
     'python', 'geoviews', 'holoviews', 'ipywidgets', 'geopandas', 
     'shapely',
     'matplotlib', 'sklearn', 'numpy', 'pandas', 'bokeh', 'gensim', 'wordcloud']
 tools.package_report(root_packages)
-# -
 
 
-- 
GitLab