Spark的shuffle算子
一、去重二、聚合三、排序四、重分区五、集合或者表操作
一、去重
def
distinct()
def
distinct(numPartitions
: Int
)
二、聚合
def
reduceByKey(func
: (V
, V
) => V
, numPartitions
: Int
): RDD
[(K
, V
)]
def
reduceByKey(partitioner
: Partitioner
, func
: (V
, V
) => V
): RDD
[(K
, V
)]
def groupBy
[K
](f
: T
=> K
, p
: Partitioner
):RDD
[(K
, Iterable
[V
])]
def
groupByKey(partitioner
: Partitioner
):RDD
[(K
, Iterable
[V
])]
def aggregateByKey
[U
: ClassTag
](zeroValue
: U
, partitioner
: Partitioner
): RDD
[(K
, U
)]
def aggregateByKey
[U
: ClassTag
](zeroValue
: U
, numPartitions
: Int
): RDD
[(K
, U
)]
def combineByKey
[C
](createCombiner
: V
=> C
, mergeValue
: (C
, V
) => C
, mergeCombiners
: (C
, C
) => C
): RDD
[(K
, C
)]
def combineByKey
[C
](createCombiner
: V
=> C
, mergeValue
: (C
, V
) => C
, mergeCombiners
: (C
, C
) => C
, numPartitions
: Int
): RDD
[(K
, C
)]
def combineByKey
[C
](createCombiner
: V
=> C
, mergeValue
: (C
, V
) => C
, mergeCombiners
: (C
, C
) =>
三、排序
def
sortByKey(ascending
: Boolean
= true, numPartitions
: Int
= self
.partitions
.length
): RDD
[(K
, V
)]
def sortBy
[K
](f
: (T
) => K
, ascending
: Boolean
= true, numPartitions
: Int
= this.partitions
.length
四、重分区
def
coalesce(numPartitions
: Int
, shuffle
: Boolean
= false, partitionCoalescer
: Option
[PartitionCoalescer
] = Option
.empty
)
def
repartition(numPartitions
: Int
)(implicit ord
: Ordering
[T
] = null
)
五、集合或者表操作
def
intersection(other
: RDD
[T
]): RDD
[T
]
def
intersection(other
: RDD
[T
], partitioner
: Partitioner
)(implicit ord
: Ordering
[T
] = null
): RDD
[T
]
def
intersection(other
: RDD
[T
], numPartitions
: Int
): RDD
[T
]
def
subtract(other
: RDD
[T
], numPartitions
: Int
): RDD
[T
]
def
subtract(other
: RDD
[T
], p
: Partitioner
)(implicit ord
: Ordering
[T
] = null
): RDD
[T
]
def subtractByKey
[W
: ClassTag
](other
: RDD
[(K
, W
)]): RDD
[(K
, V
)]
def subtractByKey
[W
: ClassTag
](other
: RDD
[(K
, W
)], numPartitions
: Int
): RDD
[(K
, V
)]
def subtractByKey
[W
: ClassTag
](other
: RDD
[(K
, W
)], p
: Partitioner
): RDD
[(K
, V
)]
def join
[W
](other
: RDD
[(K
, W
)], partitioner
: Partitioner
): RDD
[(K
, (V
, W
))]
def join
[W
](other
: RDD
[(K
, W
)]): RDD
[(K
, (V
, W
))]
def join
[W
](other
: RDD
[(K
, W
)], numPartitions
: Int
): RDD
[(K
, (V
, W
))]
def leftOuterJoin
[W
](other
: RDD
[(K
, W
)]): RDD
[(K
, (V
, Option
[W
]))]
转载请注明原文地址:https://ipadbbs.8miu.com/read-10443.html