from google.cloud import bigquery import os import pandas as pd import csv import warnings warnings.filterwarnings("ignore") # Function to get DDL for a given table def get_table_ddl(dataset_id, output_folder): try: output_folder = os.path.join(output_folder, dataset_id) os.makedirs(output_folder, exist_ok=True) query = f""" SELECT ddl FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.TABLES` WHERE table_type = 'BASE TABLE' """ ddls = client.query(query).to_dataframe() if not ddls.empty: ddls.to_csv(os.path.join(output_folder, dataset_id+'_tables_ddl_export.sql'), index=False, mode='a', header=False, quoting=csv.QUOTE_NONE, escapechar=' ') except Exception as e: raise # Function to get DDL for a given view def get_view_ddl(dataset_id, output_folder): try: output_folder = os.path.join(output_folder, dataset_id) os.makedirs(output_folder, exist_ok=True) query = f""" SELECT ddl FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.TABLES` WHERE table_type = 'VIEW' """ ddls = client.query(query).to_dataframe() if not ddls.empty: ddls.to_csv(os.path.join(output_folder, dataset_id+'_views_ddl_export.sql'), index=False, mode='a', header=False, quoting=csv.QUOTE_NONE, escapechar=' ') except Exception as e: raise def get_mv_ddl(dataset_id, output_folder): try: output_folder = os.path.join(output_folder, dataset_id) os.makedirs(output_folder, exist_ok=True) query = f""" SELECT ddl FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.TABLES` WHERE table_type = 'MATERIALIZED VIEW' """ ddls = client.query(query).to_dataframe() if not ddls.empty: ddls.to_csv(os.path.join(output_folder, dataset_id+'_materialized_views_ddl_export.sql'), index=False, mode='a', header=False, quoting=csv.QUOTE_NONE, escapechar=' ') except Exception as e: raise # Function to get DDL for a given procedure def get_procedure_ddl(dataset_id, output_folder): try: output_folder = os.path.join(output_folder, dataset_id) os.makedirs(output_folder, exist_ok=True) query = f""" SELECT ddl FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.ROUTINES` WHERE routine_type = 'PROCEDURE' """ ddls = client.query(query).to_dataframe() if not ddls.empty: ddls.to_csv(os.path.join(output_folder, dataset_id+'_procedure_ddl_export.sql'), index=False, mode='a', header=False, quoting=csv.QUOTE_NONE, escapechar=' ') except Exception as e: raise # Function to get DDL for a given function def get_function_ddl(dataset_id, output_folder): try: output_folder = os.path.join(output_folder, dataset_id) os.makedirs(output_folder, exist_ok=True) query = f""" SELECT ddl FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.ROUTINES` WHERE routine_type = 'FUNCTION' """ ddls = client.query(query).to_dataframe() if not ddls.empty: ddls.to_csv(os.path.join(output_folder, dataset_id+'_udf_ddl_export.sql'), index=False, mode='a', header=False, quoting=csv.QUOTE_NONE, escapechar=' ') except Exception as e: raise # Function to get DDL for a given function def get_table_function_ddl(dataset_id, output_folder): try: output_folder = os.path.join(output_folder, dataset_id) os.makedirs(output_folder, exist_ok=True) query = f""" SELECT ddl FROM `{project_id}.{dataset_id}.INFORMATION_SCHEMA.ROUTINES` WHERE routine_type = 'TABLE FUNCTION' """ ddls = client.query(query).to_dataframe() if not ddls.empty: ddls.to_csv(os.path.join(output_folder, dataset_id+'_table_function_ddl_export.sql'), index=False, mode='a', header=False, quoting=csv.QUOTE_NONE, escapechar=' ') except Exception as e: raise # Export DDLs def export_ddl(datasets, output_folder): try: # Iterate over all datasets for dataset in datasets: dataset_id = dataset print(f"Processing dataset: {dataset_id}") # Export table DDLs get_table_ddl(dataset_id, output_folder) # Export view DDLs get_view_ddl(dataset_id, output_folder) # Export mv DDLs get_mv_ddl(dataset_id, output_folder) # Export procedure DDLs get_procedure_ddl(dataset_id, output_folder) # Export udf DDLs get_function_ddl(dataset_id, output_folder) # Export table function DDLs get_table_function_ddl(dataset_id, output_folder) except Exception as e: raise if __name__ == "__main__": try: # Provide Google Cloud credentials: Service Account JSON file path os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"/path/to/service_account.json" # Specify your GCP project ID project_id = 'your-gcp-project-id' # Directory to store DDL files output_dir = r"/path/to/bigquery_ddls" # os.makedirs(output_dir, exist_ok=True) # Dataset names for DDL export. Keep empty list [] for all datasets datasets = ['dataset1', 'dataset2'] # Initialize a BigQuery client client = bigquery.Client() # Get a list of all datasets in the project if not datasets: datasets = [dataset.dataset_id for dataset in client.list_datasets(project=project_id)] # export ddls export_ddl(datasets, output_dir) print("Export completed.") except Exception as e: print('Exception while executing the ddl export script:' + str(e))