报错内容
异常代码
package com.kmai.demo02
import java.sql.{Connection, DriverManager, PreparedStatement}
import org.apache.kafka.clients.consumer.ConsumerRecord
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}
object rua04_Error {
//1.5.1、查询出微博会员等级为5的用户,并把这些数据写入到mysql数据库中的vip_rank表中
def main(args: Array[String]): Unit = {
// val sparkConf: SparkConf = new SparkConf().setAppName("wula").setMaster("local[*]")
// val streamingContext: StreamingContext = new StreamingContext(sparkConf, Seconds(5))
val sparkConf: SparkConf = new SparkConf().setAppName("wula").setMaster("local[*]")
val sc = new SparkContext(sparkConf)
sc.setLogLevel("warn")
//创建StreamingContext
val streamingContext = new StreamingContext(sc, Seconds(2))
//准备连接Kafka的参数
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "node01:9092,node02:9092,node03:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "test",
//earliest:当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,从头开始消费
//latest:当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,消费新产生的该分区下的数据
//none:topic各分区都存在已提交的offset时,从offset后开始消费;只要有一个分区不存在已提交的offset,则抛出异常
//这里配置latest自动重置偏移量为最新的偏移量,即如果有偏移量从偏移量位置开始消费,没有偏移量从新来的数据开始消费
"auto.offset.reset" -> "latest",
//false表示关闭自动提交.由spark帮你提交到Checkpoint或程序员手动维护
"enable.auto.commit" -> (false: java.lang.Boolean)
)
// 设置Kafka参数
// val kafkaParams: Map[String, Object] = Map[String, Object](
// ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "node01:9092,node02:9092,node03:9092",
// ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
// ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
// ConsumerConfig.GROUP_ID_CONFIG -> "SparkKafka",
// ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest",
// ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest", //false表示关闭自动提交.由spark帮你提交到Checkpoint或程序员手动维护
// ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean))
val topics = Array("rng_comment")
//接受kafka数据,根据业务逻辑进行计算 //位置策略,源码强烈推荐使用该策略,该策略使Spark的Executor和Kafka的Broker均匀对应 //消费策略,源码强烈推荐使用该策略
val kafkaDatas: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(streamingContext, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))
//为了达到RDD复用的目的,就需要对想要复用的RDD进行cache,RDD的缓存与释放都是需要我们显示操作的
val kfs: DStream[String] = kafkaDatas.map(_.value()).cache()
[dangerbox title="异常位置代码"]
val conn: Connection = DriverManager.getConnection("jdbc:mysql://node01:3306/rng_comment?characterEncoding=UTF-8", "root", "123456")
kfs.foreachRDD { rdd: RDD[String] =>
rdd.foreachPartition { iter: Iterator[String] => {
iter.foreach { line: String =>
println(line)
saveToMySQL(line, conn)
}
}
}
}
conn.close()
[/dangerbox]
//开启任务
streamingContext.start()
//等待关闭
streamingContext.awaitTermination()
}
def saveToMySQL(data: String, conn: Connection): Unit = {
//将数据存入到MySQL
val mData = data.split("\t")
if (mData.size == 11 && mData(9) == "5") {
//将每一条数据存入到MySQL
val sql = "insert into vip_rank values (?,?,?,?,?,?,?,?,?,?,?)"
val ps: PreparedStatement = conn.prepareStatement(sql)
ps.setString(1, mData(0))
ps.setString(2, mData(1))
ps.setString(3, mData(2))
ps.setString(4, mData(3))
ps.setString(5, mData(4))
ps.setString(6, mData(5))
ps.setString(7, mData(6))
ps.setString(8, mData(7))
ps.setString(9, mData(8))
ps.setString(10, mData(9))
ps.setString(11, mData(10))
ps.execute() //preparedStatement.addBatch()
ps.executeUpdate()
ps.close()
}
}
// def saveToMySQL(partitionData: Iterator[rng_comment]): Unit = {
// //将数据存入到MySQL
// val conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/rng_comment?characterEncoding=UTF-8", "root", "root")
// partitionData.foreach(data => {
// val sql = "insert into vip_rank values(?,?,?,?,?,?,?,?,?,?,?)"
//
// val ps = conn.prepareCall(sql)
//
// ps.setInt(1, data.index)
// ps.setInt(2, data.child_comment)
// ps.setString(3, data.comment_time)
// ps.setString(4, data.content)
// ps.setInt(5, data.da_v)
// ps.setInt(6, data.like_status)
// ps.setString(7, data.pic)
// ps.setString(8, data.user_id)
// ps.setString(9, data.user_name)
// ps.setInt(10, data.vip_rank)
// ps.setLong(11, data.stamp)
// ps.execute() //preparedStatement.addBatch()
// })
// //ps.executeBatch()
// conn.close()
//
// }
}
原因
这里的实现了接口ForeachFunction的内部类在运行的时候Master会将其序列化传给Slave,因此这个类必须要序列化,但是内部类序列化的时候需要其所寄宿的外部类的对象,而我这个Connection类并没有实现Serializable接口,因此无法序列化,则会报上边的错误
正确代码
package com.kmai.demo02
import java.sql.{Connection, DriverManager, PreparedStatement}
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies}
object rua04 {
//1.5.1、查询出微博会员等级为5的用户,并把这些数据写入到mysql数据库中的vip_rank表中
def main(args: Array[String]): Unit = {
// val sparkConf: SparkConf = new SparkConf().setAppName("wula").setMaster("local[*]")
// val streamingContext: StreamingContext = new StreamingContext(sparkConf, Seconds(5))
val sparkConf: SparkConf = new SparkConf().setAppName("wula").setMaster("local[*]")
val sc = new SparkContext(sparkConf)
sc.setLogLevel("warn")
//创建StreamingContext
val streamingContext = new StreamingContext(sc, Seconds(2))
//准备连接Kafka的参数
val kafkaParams = Map[String, Object](
"bootstrap.servers" -> "node01:9092,node02:9092,node03:9092",
"key.deserializer" -> classOf[StringDeserializer],
"value.deserializer" -> classOf[StringDeserializer],
"group.id" -> "test",
//earliest:当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,从头开始消费
//latest:当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,消费新产生的该分区下的数据
//none:topic各分区都存在已提交的offset时,从offset后开始消费;只要有一个分区不存在已提交的offset,则抛出异常
//这里配置latest自动重置偏移量为最新的偏移量,即如果有偏移量从偏移量位置开始消费,没有偏移量从新来的数据开始消费
"auto.offset.reset" -> "latest",
//false表示关闭自动提交.由spark帮你提交到Checkpoint或程序员手动维护
"enable.auto.commit" -> (false: java.lang.Boolean)
)
// 设置Kafka参数
// val kafkaParams: Map[String, Object] = Map[String, Object](
// ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "node01:9092,node02:9092,node03:9092",
// ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
// ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
// ConsumerConfig.GROUP_ID_CONFIG -> "SparkKafka",
// ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest",
// ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "earliest", //false表示关闭自动提交.由spark帮你提交到Checkpoint或程序员手动维护
// ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean))
val topics = Array("rng_comment")
//接受kafka数据,根据业务逻辑进行计算 //位置策略,源码强烈推荐使用该策略,该策略使Spark的Executor和Kafka的Broker均匀对应 //消费策略,源码强烈推荐使用该策略
val kafkaDatas: InputDStream[ConsumerRecord[String, String]] = KafkaUtils.createDirectStream(streamingContext, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](topics, kafkaParams))
//为了达到RDD复用的目的,就需要对想要复用的RDD进行cache,RDD的缓存与释放都是需要我们显示操作的
val kfs: DStream[String] = kafkaDatas.map(_.value()).cache()
// .filter { x =>
// val tx = x.split("\t")
// tx(9) == "5" && tx.length == 11
// }.cache()
kfs.foreachRDD { rdd: RDD[String] =>
rdd.foreachPartition { iter: Iterator[String] => {
//获取mysql连接
val conn: Connection = DriverManager.getConnection("jdbc:mysql://node01:3306/rng_comment?characterEncoding=UTF-8", "root", "123456")
iter.foreach { line: String =>
println(line)
saveToMySQL(line, conn)
}
conn.close()
}
}
}
//开启任务
streamingContext.start()
//等待关闭
streamingContext.awaitTermination()
}
def saveToMySQL(data: String, conn: Connection): Unit = {
//将数据存入到MySQL
val mData = data.split("\t")
if (mData.size == 11 && mData(9) == "5") {
//将每一条数据存入到MySQL
val sql = "insert into vip_rank values (?,?,?,?,?,?,?,?,?,?,?)"
val ps: PreparedStatement = conn.prepareStatement(sql)
ps.setString(1, mData(0))
ps.setString(2, mData(1))
ps.setString(3, mData(2))
ps.setString(4, mData(3))
ps.setString(5, mData(4))
ps.setString(6, mData(5))
ps.setString(7, mData(6))
ps.setString(8, mData(7))
ps.setString(9, mData(8))
ps.setString(10, mData(9))
ps.setString(11, mData(10))
ps.execute() //preparedStatement.addBatch()
ps.executeUpdate()
ps.close()
}
}
文章评论