DataFrame
org.apache.spark.sql.DataFrame => com.snowflake.snowpark.DataFrame
Spark | SnowPark | Notes |
---|---|---|
agg(expr: Column, exprs: Column*): DataFrame | ||
agg(exprs: Map[String, String]): DataFrame | N/A | |
agg(exprs: Map[String, String]): DataFrame | N/A | |
agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame | ||
apply(colName: String): Column | ||
cache(): DataFrame | cache is an alias for persist. | |
col(colName: String): Column | ||
collect(): Array[T] | ||
count(): Long | ||
createOrReplaceTempView(viewName: String): Unit | ||
createTempView(viewName: String): Unit | N/A | |
crossJoin(right: Dataset[_]): DataFrame | ||
cube(col1: String, cols: String*): RelationalGroupedDataset | ||
cube(cols: Column*): RelationalGroupedDataset | cube* | |
distinct(): Dataset[T] | ||
drop(col: Column): DataFrame | drop* | |
drop(colNames: String*): DataFrame | drop* | |
drop(colName: String): DataFrame | ||
dropDuplicates(col1: String, cols: String*): Dataset[T] | ||
dropDuplicates(): Dataset[T] | ||
except(other: Dataset[T]): Dataset[T] | ||
explain(): Unit | ||
filter(condition: Column): Dataset[T] | ||
first(): T | ||
foreach(func: ForeachFunction[T]): Unit | N/A | |
foreach(f: (T) ⇒ Unit): Unit | N/A | |
groupBy(col1: String, cols: String*): RelationalGroupedDataset | ||
groupBy(cols: Column*): RelationalGroupedDataset | groupBy * | |
intersect(other: Dataset[T]): Dataset[T] | ||
join(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame | ||
join(right: Dataset[_], joinExprs: Column): DataFrame | ||
join(right: Dataset[_], usingColumns: Seq[String], joinType: String): DataFrame | ||
join(right: Dataset[_], usingColumns: Seq[String]): DataFrame | ||
join(right: Dataset[_], usingColumn: String): DataFrame | ||
join(right: Dataset[_]): DataFrame | ||
limit(n: Int): Dataset[T] | ||
map[U](func: MapFunction[T, U], encoder: Encoder[U]): Dataset[U] | map[U]***** | |
map[U](func: (T) ⇒ U)(implicit arg0: Encoder[U]): Dataset[U] | map[U]***** | |
na: DataFrameNaFunctions | ||
persist(): DataFrame | ||
persist(newLevel: StorageLevel) | ||
randomSplit(weights: Array[Double]): Array[Dataset[T]] | ||
randomSplit(weights: Array[Double], seed: Long): Array[Dataset[T]] | randomSplit* | |
reduce(func: ReduceFunction[T]): T | reduce***** | |
reduce(func: (T, T) ⇒ T): T | reduce***** | |
rollup(col1: String, cols: String*): RelationalGroupedDataset | ||
rollup(cols: Column*): RelationalGroupedDataset | ||
repartition(cols: Column): DataFrame | N/A | Repartition is a Spark concept that is not needed in Snowpark |
repartition(numPartitions: int): DataFrame | N/A | Repartition is a Spark concept that is not needed in Snowpark |
repartition(numPartitions: int, cols: Column): DataFrame | N/A | Repartition is a Spark concept that is not needed in Snowpark |
repartitionByRange(cols: Column): DataFrame | N/A | Repartition is a Spark concept that is not needed in Snowpark |
repartitionByRange(numPartitions: int, cols: Column): DataFrame | N/A | Repartition is a Spark concept that is not needed in Snowpark |
sample(withReplacement: Boolean, fraction: Double): Dataset[T] | sample***** | |
sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T] | sample***** | |
schema: StructType | ||
select(col: String, cols: String*): DataFrame | ||
select(cols: Column*): DataFrame | select | |
select[U1, U2, U3, U4, U5](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3], c4: TypedColumn[T, U4], c5: TypedColumn[T, U5]): Dataset[(U1, U2, U3, U4, U5)] | select[U1, U2, U3, U4, U5] | |
select[U1, U2, U3, U4](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3], c4: TypedColumn[T, U4]): Dataset[(U1, U2, U3, U4)] | select[U1, U2, U3, U4] | |
select[U1, U2, U3](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2], c3: TypedColumn[T, U3]): Dataset[(U1, U2, U3)] | select[U1, U2, U3] | |
select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)] | select[U1, U2] | |
select[U1](c1: TypedColumn[T, U1]): Dataset[U1] | select[U1] | |
show(numRows: Int, truncate: Int): Unit | show | |
show(numRows: Int, truncate: Boolean): Unit | show | |
show(): Unit | ||
show(numRows: Int): Unit | ||
sort(sortExprs: Column*): Dataset[T] | sort** | |
sort(sortCol: String, sortCols: String*): Dataset[T] | ||
stat: DataFrameStatFunctions | stat | |
toDF(colNames: String*): DataFrame | toDF* | |
toDF(): DataFrame | toDF | |
union(other: Dataset[T]): Dataset[T] | ||
where(conditionExpr: String): Dataset[T] | where | |
where(condition: Column): Dataset[T] | ||
withColumn(colName: String, col: Column): DataFrame | ||
write: DataFrameWriter[T] | Transformation could cause SPRKSCL1105, SPRKSCL1106 and SPRKSCL1107 |
Last updated