spark HypothesisTestingExample 源码
spark HypothesisTestingExample 代码
文件路径:/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// scalastyle:off println
package org.apache.spark.examples.mllib
import org.apache.spark.{SparkConf, SparkContext}
// $example on$
import org.apache.spark.mllib.linalg._
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.stat.Statistics
import org.apache.spark.mllib.stat.test.ChiSqTestResult
import org.apache.spark.rdd.RDD
// $example off$
object HypothesisTestingExample {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setAppName("HypothesisTestingExample")
val sc = new SparkContext(conf)
// $example on$
// a vector composed of the frequencies of events
val vec: Vector = Vectors.dense(0.1, 0.15, 0.2, 0.3, 0.25)
// compute the goodness of fit. If a second vector to test against is not supplied
// as a parameter, the test runs against a uniform distribution.
val goodnessOfFitTestResult = Statistics.chiSqTest(vec)
// summary of the test including the p-value, degrees of freedom, test statistic, the method
// used, and the null hypothesis.
println(s"$goodnessOfFitTestResult\n")
// a contingency matrix. Create a dense matrix ((1.0, 2.0), (3.0, 4.0), (5.0, 6.0))
val mat: Matrix = Matrices.dense(3, 2, Array(1.0, 3.0, 5.0, 2.0, 4.0, 6.0))
// conduct Pearson's independence test on the input contingency matrix
val independenceTestResult = Statistics.chiSqTest(mat)
// summary of the test including the p-value, degrees of freedom
println(s"$independenceTestResult\n")
val obs: RDD[LabeledPoint] =
sc.parallelize(
Seq(
LabeledPoint(1.0, Vectors.dense(1.0, 0.0, 3.0)),
LabeledPoint(1.0, Vectors.dense(1.0, 2.0, 0.0)),
LabeledPoint(-1.0, Vectors.dense(-1.0, 0.0, -0.5)
)
)
) // (label, feature) pairs.
// The contingency table is constructed from the raw (label, feature) pairs and used to conduct
// the independence test. Returns an array containing the ChiSquaredTestResult for every feature
// against the label.
val featureTestResults: Array[ChiSqTestResult] = Statistics.chiSqTest(obs)
featureTestResults.zipWithIndex.foreach { case (k, v) =>
println(s"Column ${(v + 1)} :")
println(k)
} // summary of the test
// $example off$
sc.stop()
}
}
// scalastyle:on println
相关信息
相关文章
spark AssociationRulesExample 源码
spark BinaryClassificationMetricsExample 源码
0
赞
- 所属分类: 前端技术
- 本文标签:
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
7、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦