spark RDDFunctions 源码
spark RDDFunctions 代码
文件路径:/mllib/src/main/scala/org/apache/spark/mllib/rdd/RDDFunctions.scala
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.mllib.rdd
import scala.language.implicitConversions
import scala.reflect.ClassTag
import org.apache.spark.rdd.RDD
/**
* Machine learning specific RDD functions.
*/
class RDDFunctions[T: ClassTag](self: RDD[T]) extends Serializable {
/**
* Returns an RDD from grouping items of its parent RDD in fixed size blocks by passing a sliding
* window over them. The ordering is first based on the partition index and then the ordering of
* items within each partition. This is similar to sliding in Scala collections, except that it
* becomes an empty RDD if the window size is greater than the total number of items. It needs to
* trigger a Spark job if the parent RDD has more than one partitions and the window size is
* greater than 1.
*/
def sliding(windowSize: Int, step: Int): RDD[Array[T]] = {
require(windowSize > 0, s"Sliding window size must be positive, but got $windowSize.")
if (windowSize == 1 && step == 1) {
self.map(Array(_))
} else {
new SlidingRDD[T](self, windowSize, step)
}
}
/**
* `sliding(Int, Int)*` with step = 1.
*/
def sliding(windowSize: Int): RDD[Array[T]] = sliding(windowSize, 1)
}
object RDDFunctions {
/** Implicit conversion from an RDD to RDDFunctions. */
implicit def fromRDD[T: ClassTag](rdd: RDD[T]): RDDFunctions[T] = new RDDFunctions[T](rdd)
}
相关信息
相关文章
0
赞
- 所属分类: 前端技术
- 本文标签:
热门推荐
-
2、 - 优质文章
-
3、 gate.io
-
7、 golang
-
9、 openharmony
-
10、 Vue中input框自动聚焦