Here you can try this custom logic for adding new column , in this example added file name as new column in redshift COPY
import boto3
import re
s3 = boto3.client('s3')
sql = "DROPSQL , CREATE SQL , COPY SQL" ## here need to pass your actual sqls
def Filter(datalist,keyword):
# Search data based on regular expression in the list
return [val for val in datalist
if re.search(keyword, val)]
def add_new_col(table_name):
drop_sql = ''.join(Filter(sql.split(';'),keyword=table_name+' '))
create_sql = ''.join(Filter(sql.split(';'),keyword=table_name+'\('))
copy_sql = ''.join(Filter(sql.split(';'),keyword=table_name.upper()+'/'))
BUCKET = copy_sql.split(' ')[3].split('/')[2]
folder = '/'.join(copy_sql.split(' ')[3].split('/')[3:-1])+'/'
maintable = copy_sql.split(' ')[1]
print ("BUCKET {}, key_folder {}, maintable {}".format(BUCKET,folder,maintable))
temp_table_drop_sql = drop_sql.replace(table_name,'temp_table')
temp_table_create_sql = create_sql.replace(table_name,'temp_table')
temp_table_copy_sql = copy_sql.replace(table_name.upper(),'temp_table')
temp_table_name_withSchema = temp_table_copy_sql.split(' ')[1]
print ("temp_table_name_withSchema {}".format(temp_table_name_withSchema))
## replace with query execute logic
print(temp_table_drop_sql)
print(temp_table_create_sql)
#####
response = s3.list_objects_v2(
Bucket=BUCKET,
Prefix =folder)
new_column_name = 'filename'
for i in response["Contents"]:
## replace with query execute logic
temp_sql = copy_sql.replace(folder,i["Key"])
temp_sql = temp_sql.replace(table_name.upper(),'temp_table')
print(temp_sql)
## i["Key"] is filename
print("alter table {} ADD COLUMN {} varchar(256) NOT NULL DEFAULT '{}';".format(temp_table_name_withSchema, new_column_name , i["Key"].split('/')[-1]))
print("insert into {} (select * from {})".format(maintable, temp_table_name_withSchema))
print("truncate {}".format(temp_table_name_withSchema))
#####
## replace with query execute logic
print(drop_sql)
########
add_new_col(table_name)