{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "echo $SPARK_HOME\n",
    "echo $JAVA_HOME\n",
    "hostname\n",
    "if [ ! -d $HOME/jupyter-spark-conf ]\n",
    "then\n",
    "cp -r $SPARK_HOME/conf $HOME/jupyter-spark-conf\n",
    "chmod -R u+w $HOME/jupyter-spark-conf\n",
    "echo \"ml `ml -t list Spark` 2>/dev/null\" >> $HOME/jupyter-spark-conf/spark-env.sh\n",
    "fi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "os.environ['PYSPARK_PYTHON'] = sys.executable\n",
    "os.environ['SPARK_CONF_DIR'] = os.environ['HOME'] + '/cluster-conf-' + os.environ['SLURM_JOBID'] + '/spark'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!SHELL=/bin/bash bash framework-configure.sh spark $HOME/jupyter-spark-conf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!start-all.sh"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import platform\n",
    "import pyspark\n",
    "from pyspark import SparkContext"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc = SparkContext(master=\"spark://\"+platform.node()+\":7077\", appName=\"RDD basic functions App\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#your Spark workflow code here, the following is just an example:\n",
    "datafile = sc.textFile(\"SparkExample.ipynb\")\n",
    "firstTenItems = datafile.take(10)\n",
    "for item in firstTenItems:\n",
    "    print(item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!stop-all.sh"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!ps -ef | grep -i java"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pkill -f \"pyspark-shell\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "haswell-py3.7-spark",
   "language": "python",
   "name": "haswell-py3.7-spark"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}