scala - How to query data stored in Hive table using SparkSession of Spark2? -
i trying query data stored in hive table spark2. environment: 1.cloudera-quickstart-vm-5.7.0-0-vmware 2. eclipse scala2.11.8 plugin 3. spark2 , maven under
i did not change spark default configuration. need configure in spark or hive?
code
import org.apache.spark._ import org.apache.spark.sql.sparksession object hivetest { def main (args: array[string]){ val sparksession = sparksession.builder. master("local") .appname("hivesql") .enablehivesupport() .getorcreate() val data= sparksession2.sql("select * test.mark") } }
getting error
16/08/29 00:18:10 info sparksqlparser: parsing command: select * test.mark exception in thread "main" java.lang.exceptionininitializererror @ org.apache.spark.sql.hive.hivesharedstate.metadatahive$lzycompute(hivesharedstate.scala:48) @ org.apache.spark.sql.hive.hivesharedstate.metadatahive(hivesharedstate.scala:47) @ org.apache.spark.sql.hive.hivesharedstate.externalcatalog$lzycompute(hivesharedstate.scala:54) @ org.apache.spark.sql.hive.hivesharedstate.externalcatalog(hivesharedstate.scala:54) @ org.apache.spark.sql.hive.hivesessionstate.catalog$lzycompute(hivesessionstate.scala:50) @ org.apache.spark.sql.hive.hivesessionstate.catalog(hivesessionstate.scala:48) @ org.apache.spark.sql.hive.hivesessionstate$$anon$1.<init>(hivesessionstate.scala:63) @ org.apache.spark.sql.hive.hivesessionstate.analyzer$lzycompute(hivesessionstate.scala:63) @ org.apache.spark.sql.hive.hivesessionstate.analyzer(hivesessionstate.scala:62) @ org.apache.spark.sql.execution.queryexecution.assertanalyzed(queryexecution.scala:49) @ org.apache.spark.sql.dataset$.ofrows(dataset.scala:64) @ org.apache.spark.sql.sparksession.sql(sparksession.scala:582) @ hivetest$.main(hivetest.scala:34) @ hivetest.main(hivetest.scala) caused by: java.lang.illegalargumentexception: requirement failed: duplicate sqlconfigentry. spark.sql.hive.convertctas has been registered @ scala.predef$.require(predef.scala:224) @ org.apache.spark.sql.internal.sqlconf$.org$apache$spark$sql$internal$sqlconf$$register(sqlconf.scala:44) @ org.apache.spark.sql.internal.sqlconf$sqlconfigbuilder$$anonfun$apply$1.apply(sqlconf.scala:51) @ org.apache.spark.sql.internal.sqlconf$sqlconfigbuilder$$anonfun$apply$1.apply(sqlconf.scala:51) @ org.apache.spark.internal.config.typedconfigbuilder$$anonfun$createwithdefault$1.apply(configbuilder.scala:122) @ org.apache.spark.internal.config.typedconfigbuilder$$anonfun$createwithdefault$1.apply(configbuilder.scala:122) @ scala.option.foreach(option.scala:257) @ org.apache.spark.internal.config.typedconfigbuilder.createwithdefault(configbuilder.scala:122) @ org.apache.spark.sql.hive.hiveutils$.<init>(hiveutils.scala:103) @ org.apache.spark.sql.hive.hiveutils$.<clinit>(hiveutils.scala) ... 14 more
any suggestion appreciated
thanks
robin
this using:
import org.apache.spark.sql.sparksession object loadcortexdatalake extends app { val spark = sparksession.builder().appname("cortex-batch").enablehivesupport().getorcreate() spark.read.parquet(file).createorreplacetempview("temp") spark.sql(s"insert overwrite table $table_nm partition(year='$yr',month='$mth',day='$dt') select * temp")
i think should use 'sparksession.sql' instead of 'sparksession2.sql'
Comments
Post a Comment