I tried converting my spark dataframes to dynamic to output as glueparquet files but I'm getting the error
'DataFrame' object has no attribute 'fromDF'"
My code uses heavily spark dataframes. Is there a way to convert from spark dataframe to dynamic frame so I can write out as glueparquet? If so could you please provide an example, and point out what I'm doing wrong below?
code:
# importing libraries
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
glueContext = GlueContext(SparkContext.getOrCreate())
# updated 11/19/19 for error caused in error logging function
spark = glueContext.spark_session
from pyspark.sql import Window
from pyspark.sql.functions import col
from pyspark.sql.functions import first
from pyspark.sql.functions import date_format
from pyspark.sql.functions import lit,StringType
from pyspark.sql.types import *
from pyspark.sql.functions import substring, length, min,when,format_number,dayofmonth,hour,dayofyear,month,year,weekofyear,date_format,unix_timestamp
base_pth='s3://test/'
bckt_pth1=base_pth+'test_write/glueparquet/'
test_df=glueContext.create_dynamic_frame.from_catalog(
database='test_inventory',
table_name='inventory_tz_inventory').toDF()
test_df.fromDF(test_df, glueContext, "test_nest")
glueContext.write_dynamic_frame.from_options(frame = test_nest,
connection_type = "s3",
connection_options = {"path": bckt_pth1+'inventory'},
format = "glueparquet")
error:
'DataFrame' object has no attribute 'fromDF'
Traceback (most recent call last):
File "/mnt/yarn/usercache/livy/appcache/application_1574556353910_0001/container_1574556353910_0001_01_000001/pyspark.zip/pyspark/sql/dataframe.py", line 1300, in __getattr__
"'%s' object has no attribute '%s'" % (self.__class__.__name__, name))
AttributeError: 'DataFrame' object has no attribute 'fromDF'