Python
#
parser and variablesThis example show how to:
- add date filter to dataframe
- add path argument using variable of random strings up to 10 charachters
import osimport argparseimport randomimport stringfrom pyspark.sql import SparkSession
parser = argparse.ArgumentParser() parser.add_argument('-d', required=True, dest='test')parser.add_argument('-p', '--person', required=True)args = parser.parse_args()
date_filter = f"(date_part >= '{args.test}')"letters= ( ''.join(random.choice(string.ascii_lowercase) for i in range(10)) )path = '/tmp/%s/test/%s' % (str(args.person),(letters))
spark = SparkSession.builder.getOrCreate()
df = spark.sql(f"""select id,date_partitionfrom orc.`/datalake/system/customers/`where {date_filter}""").persist()
df.write.format("delta").mode("overwrite").save(path)
print ("You can check your sample data in %s" % (path) )