Run pyspark on Juypter notebook
$which python
$whereis python
# install EPEL repository first
$ sudo yum install epel-release
# install python-pip
$ sudo yum -y install python-pip
sudo pip install --upgrade setuptools
wget https://repo.anaconda.com/archive/Anaconda2-5.0.1-Linux-x86_64.sh
sudo sh Anaconda2-5.0.1-Linux-x86_64.sh
1) Install PySpark
pip install pyspark
2) Install Java
3) Install Jupyter notebook
pip install jupyter
4) Install find
pip install findspark
%env SPARK_HOME=c:\spark
# To find out where the pyspark
import findspark
findspark.init()
# Creating Spark Context
from pyspark import SparkContext
sc = SparkContext("local", "first app")
# Calculating words count
text_file = sc.textFile("OneSentence.txt")
counts = text_file.flatMap(lambda line: line.split(" ")) \
.map(lambda word: (word, 1)) \
.reduceByKey(lambda a, b: a + b)
# Printing each word with its respective count
output = counts.collect()
for (word, count) in output:
print("{}: {}".format(word, count))
# Stopping Spark Context
sc.stop()
sudo yum info rh-python35
sudo yum install rh-python35
sudo scl enable rh-python35 bash
https://www.2daygeek.com/3-methods-to-install-latest-python3-package-on-centos-6-system/
$whereis python
# install EPEL repository first
$ sudo yum install epel-release
# install python-pip
$ sudo yum -y install python-pip
sudo pip install --upgrade setuptools
wget https://repo.anaconda.com/archive/Anaconda2-5.0.1-Linux-x86_64.sh
sudo sh Anaconda2-5.0.1-Linux-x86_64.sh
1) Install PySpark
pip install pyspark
2) Install Java
3) Install Jupyter notebook
pip install jupyter
4) Install find
pip install findspark
%env SPARK_HOME=c:\spark
# To find out where the pyspark
import findspark
findspark.init()
# Creating Spark Context
from pyspark import SparkContext
sc = SparkContext("local", "first app")
# Calculating words count
text_file = sc.textFile("OneSentence.txt")
counts = text_file.flatMap(lambda line: line.split(" ")) \
.map(lambda word: (word, 1)) \
.reduceByKey(lambda a, b: a + b)
# Printing each word with its respective count
output = counts.collect()
for (word, count) in output:
print("{}: {}".format(word, count))
# Stopping Spark Context
sc.stop()
Install Python 3.5 on Centos 6
sudo yum install centos-release-sclsudo yum info rh-python35
sudo yum install rh-python35
sudo scl enable rh-python35 bash
https://www.2daygeek.com/3-methods-to-install-latest-python3-package-on-centos-6-system/
Comments
Post a Comment