API Reference

Amazon S3

copy_objects(paths, source_path, target_path)

Copy a list of S3 objects to another S3 directory.

delete_objects(path[, use_threads, ...])

Delete Amazon S3 objects from a received S3 prefix or list of S3 objects paths.

describe_objects(path[, version_id, ...])

Describe Amazon S3 objects from a received S3 prefix or list of S3 objects paths.

does_object_exist(path[, ...])

Check if object exists on S3.

download(path, local_file[, version_id, ...])

Download file from a received S3 path to local file.

get_bucket_region(bucket[, boto3_session])

Get bucket region name.

list_buckets([boto3_session])

List Amazon S3 buckets.

list_directories(path[, chunked, ...])

List Amazon S3 objects from a prefix.

list_objects(path[, suffix, ignore_suffix, ...])

List Amazon S3 objects from a prefix.

merge_datasets(source_path, target_path[, ...])

Merge a source dataset into a target dataset.

merge_upsert_table(delta_df, database, ...)

Perform Upsert (Update else Insert) onto an existing Glue table.

read_csv(path[, path_suffix, ...])

Read CSV file(s) from a received S3 prefix or list of S3 objects paths.

read_excel(path[, version_id, use_threads, ...])

Read EXCEL file(s) from a received S3 path.

read_fwf(path[, path_suffix, ...])

Read fixed-width formatted file(s) from a received S3 prefix or list of S3 objects paths.

read_json(path[, path_suffix, ...])

Read JSON file(s) from a received S3 prefix or list of S3 objects paths.

read_parquet(path[, path_root, path_suffix, ...])

Read Apache Parquet file(s) from a received S3 prefix or list of S3 objects paths.

read_parquet_metadata(path[, version_id, ...])

Read Apache Parquet file(s) metadata from a received S3 prefix or list of S3 objects paths.

read_parquet_table(table, database[, ...])

Read Apache Parquet table registered on AWS Glue Catalog.

select_query(sql, path, input_serialization, ...)

Filter contents of an Amazon S3 object based on SQL statement.

size_objects(path[, version_id, ...])

Get the size (ContentLength) in bytes of Amazon S3 objects from a received S3 prefix or list of S3 objects paths.

store_parquet_metadata(path, database, table)

Infer and store parquet metadata on AWS Glue Catalog.

to_csv(df[, path, sep, index, columns, ...])

Write CSV file or dataset on Amazon S3.

to_excel(df, path[, boto3_session, ...])

Write EXCEL file on Amazon S3.

to_json(df[, path, index, columns, ...])

Write JSON file on Amazon S3.

to_parquet(df[, path, index, compression, ...])

Write Parquet file or dataset on Amazon S3.

upload(local_file, path[, use_threads, ...])

Upload file from a local file to received S3 path.

wait_objects_exist(paths[, delay, ...])

Wait Amazon S3 objects exist.

wait_objects_not_exist(paths[, delay, ...])

Wait Amazon S3 objects not exist.

AWS Glue Catalog

add_column(database, table, column_name[, ...])

Add a column in a AWS Glue Catalog table.

add_csv_partitions(database, table, ...[, ...])

Add partitions (metadata) to a CSV Table in the AWS Glue Catalog.

add_parquet_partitions(database, table, ...)

Add partitions (metadata) to a Parquet Table in the AWS Glue Catalog.

create_csv_table(database, table, path, ...)

Create a CSV Table (Metadata Only) in the AWS Glue Catalog.

create_database(name[, description, ...])

Create a database in AWS Glue Catalog.

create_json_table(database, table, path, ...)

Create a JSON Table (Metadata Only) in the AWS Glue Catalog.

create_parquet_table(database, table, path, ...)

Create a Parquet Table (Metadata Only) in the AWS Glue Catalog.

databases([limit, catalog_id, boto3_session])

Get a Pandas DataFrame with all listed databases.

delete_column(database, table, column_name)

Delete a column in a AWS Glue Catalog table.

delete_database(name[, catalog_id, ...])

Delete a database in AWS Glue Catalog.

delete_partitions(table, database, ...[, ...])

Delete specified partitions in a AWS Glue Catalog table.

delete_all_partitions(table, database[, ...])

Delete all partitions in a AWS Glue Catalog table.

delete_table_if_exists(database, table[, ...])

Delete Glue table if exists.

does_table_exist(database, table[, ...])

Check if the table exists.

drop_duplicated_columns(df)

Drop all repeated columns (duplicated names).

extract_athena_types(df[, index, ...])

Extract columns and partitions types (Amazon Athena) from Pandas DataFrame.

get_columns_comments(database, table[, ...])

Get all columns comments.

get_csv_partitions(database, table[, ...])

Get all partitions from a Table in the AWS Glue Catalog.

get_databases([catalog_id, boto3_session])

Get an iterator of databases.

get_parquet_partitions(database, table[, ...])

Get all partitions from a Table in the AWS Glue Catalog.

get_partitions(database, table[, ...])

Get all partitions from a Table in the AWS Glue Catalog.

get_table_description(database, table[, ...])

Get table description.

get_table_location(database, table[, ...])

Get table's location on Glue catalog.

get_table_number_of_versions(database, table)

Get tatal number of versions.

get_table_parameters(database, table[, ...])

Get all parameters.

get_table_types(database, table[, ...])

Get all columns and types from a table.

get_table_versions(database, table[, ...])

Get all versions.

get_tables([catalog_id, database, ...])

Get an iterator of tables.

overwrite_table_parameters(parameters, ...)

Overwrite all existing parameters.

sanitize_column_name(column)

Convert the column name to be compatible with Amazon Athena and the AWS Glue Catalog.

sanitize_dataframe_columns_names(df[, ...])

Normalize all columns names to be compatible with Amazon Athena.

sanitize_table_name(table)

Convert the table name to be compatible with Amazon Athena and the AWS Glue Catalog.

search_tables(text[, catalog_id, boto3_session])

Get Pandas DataFrame of tables filtered by a search string.

table(database, table[, transaction_id, ...])

Get table details as Pandas DataFrame.

tables([limit, catalog_id, database, ...])

Get a DataFrame with tables filtered by a search term, prefix, suffix.

upsert_table_parameters(parameters, ...[, ...])

Insert or Update the received parameters.

Amazon Athena

create_athena_bucket([boto3_session])

Create the default Athena bucket if it doesn't exist.

create_ctas_table(sql, database[, ...])

Create a new table populated with the results of a SELECT query.

get_query_columns_types(query_execution_id)

Get the data type of all columns queried.

get_query_execution(query_execution_id[, ...])

Fetch query execution details.

get_named_query_statement(named_query_id[, ...])

Get the named query statement string from a query ID.

get_work_group(workgroup[, boto3_session])

Return information about the workgroup with the specified name.

read_sql_query(sql, database[, ...])

Execute any SQL query on AWS Athena and return the results as a Pandas DataFrame.

read_sql_table(table, database[, ...])

Extract the full table AWS Athena and return the results as a Pandas DataFrame.

repair_table(table[, database, data_source, ...])

Run the Hive's metastore consistency check: 'MSCK REPAIR TABLE table;'.

start_query_execution(sql[, database, ...])

Start a SQL Query against AWS Athena.

stop_query_execution(query_execution_id[, ...])

Stop a query execution.

unload(sql, path, database[, file_format, ...])

Write query results from a SELECT statement to the specified data format using UNLOAD.

wait_query(query_execution_id[, boto3_session])

Wait for the query end.

AWS Lake Formation

read_sql_query(sql, database[, ...])

Execute PartiQL query on AWS Glue Table (Transaction ID or time travel timestamp).

read_sql_table(table, database[, ...])

Extract all rows from AWS Glue Table (Transaction ID or time travel timestamp).

cancel_transaction(transaction_id[, ...])

Cancel the specified transaction.

commit_transaction(transaction_id[, ...])

Commit the specified transaction.

describe_transaction(transaction_id[, ...])

Return the status of a single transaction.

extend_transaction(transaction_id[, ...])

Indicate to the service that the specified transaction is still active and should not be canceled.

start_transaction([read_only, time_out, ...])

Start a new transaction and returns its transaction ID.

wait_query(query_id[, boto3_session])

Wait for the query to end.

Amazon Redshift

connect([connection, secret_id, catalog_id, ...])

Return a redshift_connector connection from a Glue Catalog or Secret Manager.

connect_temp(cluster_identifier, user[, ...])

Return a redshift_connector temporary connection (No password required).

copy(df, path, con, table, schema[, ...])

Load Pandas DataFrame as a Table on Amazon Redshift using parquet files on S3 as stage.

copy_from_files(path, con, table, schema[, ...])

Load Parquet files from S3 to a Table on Amazon Redshift (Through COPY command).

read_sql_query(sql, con[, index_col, ...])

Return a DataFrame corresponding to the result set of the query string.

read_sql_table(table, con[, schema, ...])

Return a DataFrame corresponding the table.

to_sql(df, con, table, schema[, mode, ...])

Write records stored in a DataFrame into Redshift.

unload(sql, path, con[, iam_role, ...])

Load Pandas DataFrame from a Amazon Redshift query result using Parquet files on s3 as stage.

unload_to_files(sql, path, con[, iam_role, ...])

Unload Parquet files on s3 from a Redshift query result (Through the UNLOAD command).

PostgreSQL

connect([connection, secret_id, catalog_id, ...])

Return a pg8000 connection from a Glue Catalog Connection.

read_sql_query(sql, con[, index_col, ...])

Return a DataFrame corresponding to the result set of the query string.

read_sql_table(table, con[, schema, ...])

Return a DataFrame corresponding the table.

to_sql(df, con, table, schema[, mode, ...])

Write records stored in a DataFrame into PostgreSQL.

MySQL

connect([connection, secret_id, catalog_id, ...])

Return a pymysql connection from a Glue Catalog Connection or Secrets Manager.

read_sql_query(sql, con[, index_col, ...])

Return a DataFrame corresponding to the result set of the query string.

read_sql_table(table, con[, schema, ...])

Return a DataFrame corresponding the table.

to_sql(df, con, table, schema[, mode, ...])

Write records stored in a DataFrame into MySQL.

Microsoft SQL Server

connect([connection, secret_id, catalog_id, ...])

Return a pyodbc connection from a Glue Catalog Connection.

read_sql_query(sql, con[, index_col, ...])

Return a DataFrame corresponding to the result set of the query string.

read_sql_table(table, con[, schema, ...])

Return a DataFrame corresponding the table.

to_sql(df, con, table, schema[, mode, ...])

Write records stored in a DataFrame into Microsoft SQL Server.

Oracle

connect([connection, secret_id, catalog_id, ...])

Return a oracledb connection from a Glue Catalog Connection.

read_sql_query(sql, con[, index_col, ...])

Return a DataFrame corresponding to the result set of the query string.

read_sql_table(table, con[, schema, ...])

Return a DataFrame corresponding the table.

to_sql(df, con, table, schema[, mode, ...])

Write records stored in a DataFrame into Oracle Database.

Data API Redshift

RedshiftDataApi(cluster_id, database[, ...])

Provides access to a Redshift cluster via the Data API.

connect(cluster_id, database[, secret_arn, ...])

Create a Redshift Data API connection.

read_sql_query(sql, con[, database])

Run an SQL query on a RedshiftDataApi connection and return the result as a dataframe.

Data API RDS

RdsDataApi(resource_arn, database[, ...])

Provides access to the RDS Data API.

connect(resource_arn, database[, ...])

Create a RDS Data API connection.

read_sql_query(sql, con[, database])

Run an SQL query on an RdsDataApi connection and return the result as a dataframe.

OpenSearch

connect(host[, port, boto3_session, region, ...])

Create a secure connection to the specified Amazon OpenSearch domain.

create_index(client, index[, doc_type, ...])

Create an index.

delete_index(client, index)

Delete an index.

index_csv(client, path, index[, doc_type, ...])

Index all documents from a CSV file to OpenSearch index.

index_documents(client, documents, index[, ...])

Index all documents to OpenSearch index.

index_df(client, df, index[, doc_type])

Index all documents from a DataFrame to OpenSearch index.

index_json(client, path, index[, doc_type, ...])

Index all documents from JSON file to OpenSearch index.

search(client[, index, search_body, ...])

Return results matching query DSL as pandas dataframe.

search_by_sql(client, sql_query, **kwargs)

Return results matching SQL query as pandas dataframe.

Amazon Neptune

connect(host, port[, iam_enabled])

Create a connection to a Neptune cluster.

execute_gremlin(client, query)

Return results of a Gremlin traversal as pandas dataframe.

execute_opencypher(client, query)

Return results of a openCypher traversal as pandas dataframe.

execute_sparql(client, query)

Return results of a SPARQL query as pandas dataframe.

flatten_nested_df(df[, include_prefix, ...])

Flatten the lists and dictionaries of the input data frame.

to_property_graph(client, df[, batch_size, ...])

Write records stored in a DataFrame into Amazon Neptune.

to_rdf_graph(client, df[, batch_size, ...])

Write records stored in a DataFrame into Amazon Neptune.

DynamoDB

delete_items(items, table_name[, boto3_session])

Delete all items in the specified DynamoDB table.

get_table(table_name[, boto3_session])

Get DynamoDB table object for specified table name.

put_csv(path, table_name[, boto3_session])

Write all items from a CSV file to a DynamoDB.

put_df(df, table_name[, boto3_session])

Write all items from a DataFrame to a DynamoDB.

put_items(items, table_name[, boto3_session])

Insert all items to the specified DynamoDB table.

put_json(path, table_name[, boto3_session])

Write all items from JSON file to a DynamoDB.

Amazon Timestream

create_database(database[, kms_key_id, ...])

Create a new Timestream database.

create_table(database, table, ...[, tags, ...])

Create a new Timestream database.

delete_database(database[, boto3_session])

Delete a given Timestream database.

delete_table(database, table[, boto3_session])

Delete a given Timestream table.

query(sql[, chunked, pagination_config, ...])

Run a query and retrieve the result as a Pandas DataFrame.

write(df, database, table, time_col, ...[, ...])

Store a Pandas DataFrame into a Amazon Timestream table.

Amazon EMR

build_spark_step(path[, deploy_mode, ...])

Build the Step structure (dictionary).

build_step(command[, name, ...])

Build the Step structure (dictionary).

create_cluster(subnet_id[, cluster_name, ...])

Create a EMR cluster with instance fleets configuration.

get_cluster_state(cluster_id[, boto3_session])

Get the EMR cluster state.

get_step_state(cluster_id, step_id[, ...])

Get EMR step state.

submit_ecr_credentials_refresh(cluster_id, path)

Update internal ECR credentials.

submit_spark_step(cluster_id, path[, ...])

Submit Spark Step.

submit_step(cluster_id, command[, name, ...])

Submit new job in the EMR Cluster.

submit_steps(cluster_id, steps[, boto3_session])

Submit a list of steps.

terminate_cluster(cluster_id[, boto3_session])

Terminate EMR cluster.

Amazon CloudWatch Logs

read_logs(query, log_group_names[, ...])

Run a query against AWS CloudWatchLogs Insights and convert the results to Pandas DataFrame.

run_query(query, log_group_names[, ...])

Run a query against AWS CloudWatchLogs Insights and wait the results.

start_query(query, log_group_names[, ...])

Run a query against AWS CloudWatchLogs Insights.

wait_query(query_id[, boto3_session])

Wait query ends.

Amazon QuickSight

cancel_ingestion(ingestion_id[, ...])

Cancel an ongoing ingestion of data into SPICE.

create_athena_data_source(name[, workgroup, ...])

Create a QuickSight data source pointing to an Athena/Workgroup.

create_athena_dataset(name[, database, ...])

Create a QuickSight dataset.

create_ingestion([dataset_name, dataset_id, ...])

Create and starts a new SPICE ingestion on a dataset.

delete_all_dashboards([account_id, ...])

Delete all dashboards.

delete_all_data_sources([account_id, ...])

Delete all data sources.

delete_all_datasets([account_id, boto3_session])

Delete all datasets.

delete_all_templates([account_id, boto3_session])

Delete all templates.

delete_dashboard([name, dashboard_id, ...])

Delete a dashboard.

delete_data_source([name, data_source_id, ...])

Delete a data source.

delete_dataset([name, dataset_id, ...])

Delete a dataset.

delete_template([name, template_id, ...])

Delete a tamplate.

describe_dashboard([name, dashboard_id, ...])

Describe a QuickSight dashboard by name or ID.

describe_data_source([name, data_source_id, ...])

Describe a QuickSight data source by name or ID.

describe_data_source_permissions([name, ...])

Describe a QuickSight data source permissions by name or ID.

describe_dataset([name, dataset_id, ...])

Describe a QuickSight dataset by name or ID.

describe_ingestion(ingestion_id[, ...])

Describe a QuickSight ingestion by ID.

get_dashboard_id(name[, account_id, ...])

Get QuickSight dashboard ID given a name and fails if there is more than 1 ID associated with this name.

get_dashboard_ids(name[, account_id, ...])

Get QuickSight dashboard IDs given a name.

get_data_source_arn(name[, account_id, ...])

Get QuickSight data source ARN given a name and fails if there is more than 1 ARN associated with this name.

get_data_source_arns(name[, account_id, ...])

Get QuickSight Data source ARNs given a name.

get_data_source_id(name[, account_id, ...])

Get QuickSight data source ID given a name and fails if there is more than 1 ID associated with this name.

get_data_source_ids(name[, account_id, ...])

Get QuickSight data source IDs given a name.

get_dataset_id(name[, account_id, boto3_session])

Get QuickSight Dataset ID given a name and fails if there is more than 1 ID associated with this name.

get_dataset_ids(name[, account_id, ...])

Get QuickSight dataset IDs given a name.

get_template_id(name[, account_id, ...])

Get QuickSight template ID given a name and fails if there is more than 1 ID associated with this name.

get_template_ids(name[, account_id, ...])

Get QuickSight template IDs given a name.

list_dashboards([account_id, boto3_session])

List dashboards in an AWS account.

list_data_sources([account_id, boto3_session])

List all QuickSight Data sources summaries.

list_datasets([account_id, boto3_session])

List all QuickSight datasets summaries.

list_groups([namespace, account_id, ...])

List all QuickSight Groups.

list_group_memberships(group_name[, ...])

List all QuickSight Group memberships.

list_iam_policy_assignments([status, ...])

List IAM policy assignments in the current Amazon QuickSight account.

list_iam_policy_assignments_for_user(user_name)

List all the IAM policy assignments.

list_ingestions([dataset_name, dataset_id, ...])

List the history of SPICE ingestions for a dataset.

list_templates([account_id, boto3_session])

List all QuickSight templates.

list_users([namespace, account_id, ...])

Return a list of all of the Amazon QuickSight users belonging to this account.

list_user_groups(user_name[, namespace, ...])

List the Amazon QuickSight groups that an Amazon QuickSight user is a member of.

AWS STS

get_account_id([boto3_session])

Get Account ID.

get_current_identity_arn([boto3_session])

Get current user/role ARN.

get_current_identity_name([boto3_session])

Get current user/role name.

AWS Secrets Manager

get_secret(name[, boto3_session])

Get secret value.

get_secret_json(name[, boto3_session])

Get JSON secret value.

Amazon Chime

post_message(webhook, message)

Send message on an existing Chime Chat rooms.

Global Configurations

reset([item])

Reset one or all (if None is received) configuration values.

to_pandas()

Load all configurations on a Pandas DataFrame.