From 7b5cf999bfc5ee0dc6a966194879e47db6319063 Mon Sep 17 00:00:00 2001 From: Alexander Dunkel <alexander.dunkel@tu-dresden.de> Date: Fri, 12 Jan 2024 09:54:11 +0100 Subject: [PATCH] Update artifacts --- notebooks/04_topic_classification.ipynb | 159 +++++++++++++++++------- py/_04_topic_classification.py | 34 +++-- 2 files changed, 139 insertions(+), 54 deletions(-) diff --git a/notebooks/04_topic_classification.ipynb b/notebooks/04_topic_classification.ipynb index 17098b1..147bb4e 100644 --- a/notebooks/04_topic_classification.ipynb +++ b/notebooks/04_topic_classification.ipynb @@ -7,14 +7,19 @@ "<div style=\"width: 100%;display: flex; align-items: top;\">\n", " <div style=\"float:left;width: 80%;text-align:left;position:relative\">\n", " <h1>Part 4: Topic Classification of Social Media</h1>\n", - " <p><strong>Workshop: Social Media, Data Analysis, & Cartograpy, WS 2022/23</strong><p>\n", - " <p><em>Madalina Gugulica, <a href=\"mailto:alexander.dunkel@tu-dresden.de\">Alexander Dunkel</a>, Institute of Cartography, TU Dresden</em><br><img src=\"https://kartographie.geo.tu-dresden.de/python_datascience_course/version.svg\" style=\"float:left\"></p></div>\n", - " <div style=\"float:right\">\n", - " <img src=\"https://kartographie.geo.tu-dresden.de/python_datascience_course/TU_Dresden_Logo_blau_HKS41.svg\" style=\"position:relative;width:256px;margin-top:0px;margin-right:10px\"/>\n", + " <p><strong>Workshop: Social Media, Data Analysis, & Cartograpy, WS 2023/24</strong><p>\n", + " <p><em><a href=\"mailto:madalina.gugulica@tu-dresden.de\">Madalina Gugulica</a>, Institute of Cartography, TU Dresden</em>\n", + " <p><em><a href=\"mailto:alexander.dunkel@tu-dresden.de\">Alexander Dunkel</a>\n", + " <br> Leibniz Institute of Ecological Urban and Regional Development, \n", + " Transformative Capacities & Research Data Centre & TU Dresden, \n", + " Institute of Cartography</em></p><br><img src=\"https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/version.svg\" style=\"float:left\"></p></div>\n", + " <div style=\"float: right;\">\n", + " <div style=\"width:300px\">\n", + " <img src=\"https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/FDZ-Logo_DE_RGB-blk_bg-tra_mgn-full_h200px_web.svg\" style=\"position:relative;width:256px;margin-top:0px;margin-right:10px;clear: both;\"/>\n", + " <img src=\"https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/TU_Dresden_Logo_blau_HKS41.svg\" style=\"position:relative;width:256px;margin-top:0px;margin-right:10px;clear: both;\"/>\n", " </div>\n", - "</div>\n", - "\n", - "<img src=\"https://ad.vgiscience.org/mobile_cart_workshop2020/img_topics.png\" style=\"width:500px;text-align:left;position:relative;float:left\">" + " </div>\n", + "</div>" ] }, { @@ -46,9 +51,18 @@ "source": [ "<div class=\"alert alert-warning\" role=\"alert\" style=\"color: black;\">\n", " <ul>\n", - " <li>Please make sure that <strong>\"04_topics_env\"</strong> is shown on the \n", + " <li>For this notebook, please make sure that <code>04_topics_env</code> is shown on the \n", " <strong>top-right corner</strong>. If not, click & select.</li>\n", " </ul>\n", + " <details style=\"margin-left: 1em;\"><summary style=\"cursor: pointer;\"><strong>Link the environment for this notebook, if not already done.</strong></summary>Use this command in a notebook cell:\n", + "<pre><code>\n", + "!/projects/p_lv_mobicart_2324/topics_env/bin/python \\\n", + " -m ipykernel install \\\n", + " --user \\\n", + " --name topics_env \\\n", + " --display-name=\"04_topics_env\"\n", + "</code></pre>\n", + "</details>\n", "</div>" ] }, @@ -122,7 +136,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import pandas as pd\n", @@ -147,7 +163,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from pathlib import Path\n", @@ -178,7 +196,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "%load_ext autoreload\n", @@ -195,7 +215,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import sys\n", @@ -211,7 +233,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "source = \"topic_data.zip\"" @@ -227,7 +251,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "%%time\n", @@ -250,7 +276,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from gensim import utils\n", @@ -269,7 +297,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "#idf-scores dictionary deserialization\n", @@ -289,7 +319,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def avg_topic_vector(lang_model, tokens_list):\n", @@ -349,7 +381,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "filename = \"DD_Neustadt_NormalizedInstagramPosts.pickle\"\n", @@ -360,7 +394,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "df.head()" @@ -410,7 +446,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "topic_list = ['event','music','festival','concert']" @@ -435,7 +473,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "enhanced_list = []\n", @@ -455,7 +495,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "topic_list = topic_list + enhanced_list\n", @@ -479,7 +521,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "topic_embedding = avg_topic_vector(model_w2v, topic_list)" @@ -500,7 +544,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", @@ -529,7 +575,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "%%time\n", @@ -568,7 +616,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "df_classified = df[df['classification'] == 1]\n", @@ -592,7 +642,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "import geopandas as gp\n", @@ -619,7 +671,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "df_classified.reset_index()" @@ -628,7 +682,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "gdf = gp.GeoDataFrame(\n", @@ -638,7 +694,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "CRS_PROJ = \"epsg:3857\" # Web Mercator\n", @@ -657,7 +715,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "gdf.head()" @@ -666,7 +726,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "x = gdf.loc[gdf.first_valid_index()].geometry.x\n", @@ -692,7 +754,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "posts_layer = gv.Points(\n", @@ -705,7 +769,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "from bokeh.models import HoverTool\n", @@ -726,7 +792,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def set_active_tool(plot, element):\n", @@ -759,7 +827,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "gv_layers.opts(\n", @@ -798,7 +868,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "gv_layers.opts(\n", @@ -824,7 +896,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "!jupyter nbconvert --to html \\\n", @@ -842,7 +916,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "tools.clean_folders(\n", @@ -869,9 +945,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "jupyter": { - "source_hidden": true - }, "tags": [] }, "outputs": [], @@ -898,7 +971,7 @@ "kernelspec": { "display_name": "04_topics_env", "language": "python", - "name": "topics_env" + "name": "topic_env" }, "language_info": { "codemirror_mode": { diff --git a/py/_04_topic_classification.py b/py/_04_topic_classification.py index 816edc9..3597e17 100644 --- a/py/_04_topic_classification.py +++ b/py/_04_topic_classification.py @@ -6,24 +6,29 @@ # extension: .py # format_name: light # format_version: '1.5' -# jupytext_version: 1.14.4 +# jupytext_version: 1.14.5 # kernelspec: # display_name: 04_topics_env # language: python -# name: topics_env +# name: topic_env # --- # <div style="width: 100%;display: flex; align-items: top;"> # <div style="float:left;width: 80%;text-align:left;position:relative"> # <h1>Part 4: Topic Classification of Social Media</h1> -# <p><strong>Workshop: Social Media, Data Analysis, & Cartograpy, WS 2022/23</strong><p> -# <p><em>Madalina Gugulica, <a href="mailto:alexander.dunkel@tu-dresden.de">Alexander Dunkel</a>, Institute of Cartography, TU Dresden</em><br><img src="https://kartographie.geo.tu-dresden.de/python_datascience_course/version.svg" style="float:left"></p></div> -# <div style="float:right"> -# <img src="https://kartographie.geo.tu-dresden.de/python_datascience_course/TU_Dresden_Logo_blau_HKS41.svg" style="position:relative;width:256px;margin-top:0px;margin-right:10px"/> +# <p><strong>Workshop: Social Media, Data Analysis, & Cartograpy, WS 2023/24</strong><p> +# <p><em><a href="mailto:madalina.gugulica@tu-dresden.de">Madalina Gugulica</a>, Institute of Cartography, TU Dresden</em> +# <p><em><a href="mailto:alexander.dunkel@tu-dresden.de">Alexander Dunkel</a> +# <br> Leibniz Institute of Ecological Urban and Regional Development, +# Transformative Capacities & Research Data Centre & TU Dresden, +# Institute of Cartography</em></p><br><img src="https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/version.svg" style="float:left"></p></div> +# <div style="float: right;"> +# <div style="width:300px"> +# <img src="https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/FDZ-Logo_DE_RGB-blk_bg-tra_mgn-full_h200px_web.svg" style="position:relative;width:256px;margin-top:0px;margin-right:10px;clear: both;"/> +# <img src="https://kartographie.geo.tu-dresden.de/ad/jupyter_python_datascience/TU_Dresden_Logo_blau_HKS41.svg" style="position:relative;width:256px;margin-top:0px;margin-right:10px;clear: both;"/> +# </div> # </div> # </div> -# -# <img src="https://ad.vgiscience.org/mobile_cart_workshop2020/img_topics.png" style="width:500px;text-align:left;position:relative;float:left"> # This is the fourth notebook in a series of four notebooks: # @@ -40,9 +45,18 @@ # <div class="alert alert-warning" role="alert" style="color: black;"> # <ul> -# <li>Please make sure that <strong>"04_topics_env"</strong> is shown on the +# <li>For this notebook, please make sure that <code>04_topics_env</code> is shown on the # <strong>top-right corner</strong>. If not, click & select.</li> # </ul> +# <details style="margin-left: 1em;"><summary style="cursor: pointer;"><strong>Link the environment for this notebook, if not already done.</strong></summary>Use this command in a notebook cell: +# <pre><code> +# # !/projects/p_lv_mobicart_2324/topics_env/bin/python \ +# # -m ipykernel install \ +# # --user \ +# # --name topics_env \ +# # --display-name="04_topics_env" +# </code></pre> +# </details> # </div> # <div class="alert alert-success" role="alert" style="color: black;"> @@ -508,12 +522,10 @@ tools.clean_folders( # # </div> -# + jupyter={"source_hidden": true} tags=[] root_packages = [ 'python', 'geoviews', 'holoviews', 'ipywidgets', 'geopandas', 'shapely', 'matplotlib', 'sklearn', 'numpy', 'pandas', 'bokeh', 'gensim', 'wordcloud'] tools.package_report(root_packages) -# - -- GitLab