Can not find org.apache.spark.sql.cassandra when I make sqlContext.read in jupyter notebook.
What am i doing wrong?
os.environ['SPARK_HOME']='/home/.../spark-2.4.5-bin-hadoop2.7/' findspark.init() sc=SparkContext(appName="myAppName") os.environ['PYSPARK_SUBMIT_ARGS'] = '--conf spark.cassandra.connection.host=anyip --conf spark.executor.cores=2 --conf spark.cassandra.auth.username=cassandra --conf spark.cassandra.auth.password=pass --properties spark:spark.jars.packages=datastax:spark-cassandra-connector:2.3.0-s_2.11 --master spark://anyip:7077' auth_provider = PlainTextAuthProvider( username='cassandra', password='pass') cluster = Cluster(['anyip'], port=9042, connect_timeout=3600, auth_provider=auth_provider) session = cluster.connect('dbsvc') session.default_timeout = 600 from pyspark.sql import SQLContext sqlContext = SQLContext(sc) def load_and_get_table_df(keys_space_name, table_name): table_df = sqlContext.read\ .format("org.apache.spark.sql.cassandra")\ .options(table=table_name, keyspace=keys_space_name)\ .load() return table_df uc=load_and_get_table_df('dbsvc', 'usercounters')
Py4JJavaError: An error occurred while calling o27.load. : java.lang.ClassNotFoundException: Failed to find data source: org.apache.spark.sql.cassandra. Please find packages at http://spark.apache.org/third-party-projects.html