Saturday, November 14, 2015

Use apache spark in intellij

Add this line in your build.sbt:

libraryDependencies += "org.apache.spark" %% "spark-core" % "1.5.2"

Do something like this:

object Script3 {
  import org.apache.spark.SparkContext
  import org.apache.spark.SparkConf
  // local[4] means that you want spark to run locally with 4 threads 
  // you can use a cluster when your app is production ready, of course
  val conf = new SparkConf().setAppName("appspark").setMaster("local[4]")
  val sc = new SparkContext(conf)
  val lines = sc.textFile(getClass.getResource("/mtcars.txt").toString)
  val lineLengths = lines.map(x => x.length)
  val totalLength = lineLengths.reduce(_ + _)
}

object Script4 {
  import Script3._

  // spark logs are too verbose by default
  // i only want to messages when there is something wrong
  import org.apache.log4j.Logger
  import org.apache.log4j.Level
  Logger.getLogger("org").setLevel(Level.WARN)
  Logger.getLogger("akka").setLevel(Level.WARN)
  println(lines)
}

0 comments: