java - Colons in Apache Spark application path -
i'm submitting apache spark application yarn programmatically:
package application.restapplication; import org.apache.hadoop.conf.configuration; import org.apache.spark.sparkconf; import org.apache.spark.deploy.yarn.client; import org.apache.spark.deploy.yarn.clientarguments; public class app { public static void main(string[] args1) { string[] args = new string[] { "--class", "org.apache.spark.examples.javawordcount", "--jar", "/opt/spark/examples/jars/spark-examples_2.11-2.0.0.jar", "--arg", "hdfs://hadoop-master:9000/input/file.txt" }; configuration config = new configuration(); system.setproperty("spark_yarn_mode", "true"); sparkconf sparkconf = new sparkconf(); clientarguments cargs = new clientarguments(args); client client = new client(cargs, config, sparkconf); client.run(); } }
i have problem line: "--arg", "hdfs://hadoop-master:9000/input/file.txt"
- more colons:
16/08/29 09:54:16 error yarn.applicationmaster: uncaught exception: java.lang.numberformatexception: input string: "9000/input/plik2.txt" @ java.lang.numberformatexception.forinputstring(numberformatexception.java:65) @ java.lang.integer.parseint(integer.java:580) @ java.lang.integer.parseint(integer.java:615) @ scala.collection.immutable.stringlike$class.toint(stringlike.scala:272) @ scala.collection.immutable.stringops.toint(stringops.scala:29) @ org.apache.spark.util.utils$.parsehostport(utils.scala:935) @ org.apache.spark.deploy.yarn.applicationmaster.waitforsparkdriver(applicationmaster.scala:547) @ org.apache.spark.deploy.yarn.applicationmaster.runexecutorlauncher(applicationmaster.scala:405) @ org.apache.spark.deploy.yarn.applicationmaster.run(applicationmaster.scala:247) @ org.apache.spark.deploy.yarn.applicationmaster$$anonfun$main$1.apply$mcv$sp(applicationmaster.scala:749) @ org.apache.spark.deploy.sparkhadooputil$$anon$1.run(sparkhadooputil.scala:71) @ org.apache.spark.deploy.sparkhadooputil$$anon$1.run(sparkhadooputil.scala:70) @ java.security.accesscontroller.doprivileged(native method) @ javax.security.auth.subject.doas(subject.java:422) @ org.apache.hadoop.security.usergroupinformation.doas(usergroupinformation.java:1657) @ org.apache.spark.deploy.sparkhadooputil.runassparkuser(sparkhadooputil.scala:70) @ org.apache.spark.deploy.yarn.applicationmaster$.main(applicationmaster.scala:747) @ org.apache.spark.deploy.yarn.executorlauncher$.main(applicationmaster.scala:774) @ org.apache.spark.deploy.yarn.executorlauncher.main(applicationmaster.scala)
how write (as argument) path file colons? try various combinations slashes, backslashes, %3a, etc...
according utils#parsehostport
invoked during call, spark seems consider port text behind last :
:
def parsehostport(hostport: string): (string, int) = { // check cache first. val cached = hostportparseresults.get(hostport) if (cached != null) { return cached } val indx: int = hostport.lastindexof(':') // potentially broken - when dealing ipv6 addresses example, sigh ... // hadoop not support ipv6 right now. // now, assume if port exists, valid - not check if int > 0 if (-1 == indx) { val retval = (hostport, 0) hostportparseresults.put(hostport, retval) return retval } val retval = (hostport.substring(0, indx).trim(), hostport.substring(indx + 1).trim().toint) hostportparseresults.putifabsent(hostport, retval) hostportparseresults.get(hostport) }
as consequence, whole string 9000/input/file.txt
supposed single port number. suggests not supposed refer input file hdfs file system. guess more skilled in apache spark give better advice.
Comments
Post a Comment