I created a glue job using the visual tab like below. First I connected to a mysql table as data source which is already in my data catalog. Then in the transform node, I wrote a custom sql query to select only one column from the source table. Validated with the data preview feature and the transformation node works fine. Now I want to write the data to the existing database table that has only one column with 'string' data type. Glue job succeeded but I dont see the data in the table.
Below is the automatic script generated from Glue Job Visual.
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglue import DynamicFrame
def sparkSqlQuery(glueContext, query, mapping, transformation_ctx) -> DynamicFrame:
for alias, frame in mapping.items():
frame.toDF().createOrReplaceTempView(alias)
result = spark.sql(query)
return DynamicFrame.fromDF(result, glueContext, transformation_ctx)
args = getResolvedOptions(sys.argv, ["JOB_NAME"])
sc = SparkContext()
glueContext = GlueContext(sc)
spark = glueContext.spark_session
job = Job(glueContext)
job.init(args["JOB_NAME"], args)
# Script generated for node MySQL
MySQL_node1650299412376 = glueContext.create_dynamic_frame.from_catalog(
database="glue_rds_test",
table_name="test_customer",
transformation_ctx="MySQL_node1650299412376",
)
# Script generated for node SQL
SqlQuery0 = """
select CUST_CODE from customer
"""
SQL_node1650302847690 = sparkSqlQuery(
glueContext,
query=SqlQuery0,
mapping={"customer": MySQL_node1650299412376},
transformation_ctx="SQL_node1650302847690",
)
# Script generated for node MySQL
MySQL_node1650304163076 = glueContext.write_dynamic_frame.from_catalog(
frame=SQL_node1650302847690,
database="glue_rds_test",
table_name="test_customer2",
transformation_ctx="MySQL_node1650304163076",
)
job.commit()