Quantcast
Viewing all articles
Browse latest Browse all 24

Answer by ben jarman for How can I change column types in Spark SQL's DataFrame?

So this only really works if your having issues saving to a jdbc driver like sqlserver, but it's really helpful for errors you will run into with syntax and types.

import org.apache.spark.sql.jdbc.{JdbcDialects, JdbcType, JdbcDialect}import org.apache.spark.sql.jdbc.JdbcTypeval SQLServerDialect = new JdbcDialect {  override def canHandle(url: String): Boolean = url.startsWith("jdbc:jtds:sqlserver") || url.contains("sqlserver")  override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {    case StringType => Some(JdbcType("VARCHAR(5000)", java.sql.Types.VARCHAR))    case BooleanType => Some(JdbcType("BIT(1)", java.sql.Types.BIT))    case IntegerType => Some(JdbcType("INTEGER", java.sql.Types.INTEGER))    case LongType => Some(JdbcType("BIGINT", java.sql.Types.BIGINT))    case DoubleType => Some(JdbcType("DOUBLE PRECISION", java.sql.Types.DOUBLE))    case FloatType => Some(JdbcType("REAL", java.sql.Types.REAL))    case ShortType => Some(JdbcType("INTEGER", java.sql.Types.INTEGER))    case ByteType => Some(JdbcType("INTEGER", java.sql.Types.INTEGER))    case BinaryType => Some(JdbcType("BINARY", java.sql.Types.BINARY))    case TimestampType => Some(JdbcType("DATE", java.sql.Types.DATE))    case DateType => Some(JdbcType("DATE", java.sql.Types.DATE))    //      case DecimalType.Fixed(precision, scale) => Some(JdbcType("NUMBER("+ precision +","+ scale +")", java.sql.Types.NUMERIC))    case t: DecimalType => Some(JdbcType(s"DECIMAL(${t.precision},${t.scale})", java.sql.Types.DECIMAL))    case _ => throw new IllegalArgumentException(s"Don't know how to save ${dt.json} to JDBC")  }}JdbcDialects.registerDialect(SQLServerDialect)

Viewing all articles
Browse latest Browse all 24

Trending Articles