- Download Anaconda python and install it (https://www.continuum.io/downloads)
- Open command prompt run command " ipython notebook" or "jupyter notebook"
- Create a new python notebook and copy paste the below commands
import os
import sys
os.environ['SPARK_HOME'] = "C:/Spark1.6.1/spark-1.6.1-bin-hadoop2.6"
sys.path.append("C:/Spark1.6.1/spark-1.6.1-bin-hadoop2.6/bin")
sys.path.append("C:/Spark1.6.1/spark-1.6.1-bin-hadoop2.6/python")
sys.path.append("C:/Spark1.6.1/spark-1.6.1-bin-hadoop2.6/python/pyspark")
sys.path.append("C:/Spark1.6.1/spark-1.6.1-bin-hadoop2.6/python/lib")
sys.path.append("C:/Spark1.6.1/spark-1.6.1-bin-hadoop2.6/python/lib/pyspark.zip")
sys.path.append("C:/Spark1.6.1/spark-1.6.1-bin-hadoop2.6/python/lib/py4j-0.9-src.zip")
sys.path.append("C:/Program Files/Java/jdk1.8.0_73")
from pyspark import SparkContext
from pyspark import SparkConf
sc = SparkContext("local","test")
replace SPARK_HOME with your spark's home location similarly change the rest of the commands also.
Testing
textFile = sc.textFile("README.md")
textFile.count()