aws · Feb 16, 2024 · Feb 19, 2024 · Feb 19, 2024 · Feb 19, 2024 · Feb 19, 2024
diff --git a/.bumpversion.toml b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "3.6.0"
+current_version = "3.7.0"
 commit = false
 tag = false
 tag_name = "{new_version}"

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -224,7 +224,7 @@ To run all database MySQL tests (Using 8 parallel processes):
 
 ``pytest -n 8 tests/unit/test_mysql.py``
 
-To run all tests for all python versions (assuming Amazon QuickSight is activated and the optional stacks deployed):
+To run all tests for all python versions (assuming Amazon QuickSight is activated and the optional stack deployed):
 
 ``./test.sh``
 

diff --git a/README.md b/README.md
@@ -100,27 +100,27 @@ FROM "sampleDB"."sampleTable" ORDER BY time DESC LIMIT 3
 ## At scale
 AWS SDK for pandas can also run your workflows at scale by leveraging [Modin](https://modin.readthedocs.io/en/stable/) and [Ray](https://www.ray.io/). Both projects aim to speed up data workloads by distributing processing over a cluster of workers.
 
-The quickest way to get started is to use AWS Glue with Ray. Read our [docs](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/scale.html), our blogs ([1](https://aws.amazon.com/blogs/big-data/scale-aws-sdk-for-pandas-workloads-with-aws-glue-for-ray/)/[2](https://aws.amazon.com/blogs/big-data/advanced-patterns-with-aws-sdk-for-pandas-on-aws-glue-for-ray/)), or head to our latest [tutorials](https://github.com/aws/aws-sdk-pandas/tree/main/tutorials) to discover even more features.
+The quickest way to get started is to use AWS Glue with Ray. Read our [docs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/scale.html), our blogs ([1](https://aws.amazon.com/blogs/big-data/scale-aws-sdk-for-pandas-workloads-with-aws-glue-for-ray/)/[2](https://aws.amazon.com/blogs/big-data/advanced-patterns-with-aws-sdk-for-pandas-on-aws-glue-for-ray/)), or head to our latest [tutorials](https://github.com/aws/aws-sdk-pandas/tree/main/tutorials) to discover even more features.
 
 > ⚠️ **Ray is currently not available for Python 3.12. While AWS SDK for pandas supports Python 3.12, it cannot be used at scale.**
 
 ## [Read The Docs](https://aws-sdk-pandas.readthedocs.io/)
 
-- [**What is AWS SDK for pandas?**](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/about.html)
-- [**Install**](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/install.html)
-  - [PyPi (pip)](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/install.html#pypi-pip)
-  - [Conda](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/install.html#conda)
-  - [AWS Lambda Layer](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/install.html#aws-lambda-layer)
-  - [AWS Glue Python Shell Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/install.html#aws-glue-python-shell-jobs)
-  - [AWS Glue PySpark Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/install.html#aws-glue-pyspark-jobs)
-  - [Amazon SageMaker Notebook](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/install.html#amazon-sagemaker-notebook)
-  - [Amazon SageMaker Notebook Lifecycle](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/install.html#amazon-sagemaker-notebook-lifecycle)
-  - [EMR](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/install.html#emr)
-  - [From source](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/install.html#from-source)
-- [**At scale**](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/scale.html)
-  - [Getting Started](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/scale.html#getting-started)
-  - [Supported APIs](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/scale.html#supported-apis)
-  - [Resources](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/scale.html#resources)
+- [**What is AWS SDK for pandas?**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/about.html)
+- [**Install**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html)
+  - [PyPi (pip)](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#pypi-pip)
+  - [Conda](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#conda)
+  - [AWS Lambda Layer](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#aws-lambda-layer)
+  - [AWS Glue Python Shell Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#aws-glue-python-shell-jobs)
+  - [AWS Glue PySpark Jobs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#aws-glue-pyspark-jobs)
+  - [Amazon SageMaker Notebook](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#amazon-sagemaker-notebook)
+  - [Amazon SageMaker Notebook Lifecycle](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#amazon-sagemaker-notebook-lifecycle)
+  - [EMR](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#emr)
+  - [From source](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/install.html#from-source)
+- [**At scale**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/scale.html)
+  - [Getting Started](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/scale.html#getting-started)
+  - [Supported APIs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/scale.html#supported-apis)
+  - [Resources](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/scale.html#resources)
 - [**Tutorials**](https://github.com/aws/aws-sdk-pandas/tree/main/tutorials)
   - [001 - Introduction](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/001%20-%20Introduction.ipynb)
   - [002 - Sessions](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/002%20-%20Sessions.ipynb)
@@ -153,7 +153,6 @@ The quickest way to get started is to use AWS Glue with Ray. Read our [docs](htt
   - [029 - S3 Select](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/029%20-%20S3%20Select.ipynb)
   - [030 - Data Api](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/030%20-%20Data%20Api.ipynb)
   - [031 - OpenSearch](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/031%20-%20OpenSearch.ipynb)
-  - [032 - Lake Formation Governed Tables](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/032%20-%20Lake%20Formation%20Governed%20Tables.ipynb)
   - [033 - Amazon Neptune](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/033%20-%20Amazon%20Neptune.ipynb)
   - [034 - Distributing Calls Using Ray](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/034%20-%20Distributing%20Calls%20using%20Ray.ipynb)
   - [035 - Distributing Calls on Ray Remote Cluster](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/035%20-%20Distributing%20Calls%20on%20Ray%20Remote%20Cluster.ipynb)
@@ -163,31 +162,30 @@ The quickest way to get started is to use AWS Glue with Ray. Read our [docs](htt
   - [039 - Athena Iceberg](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/039%20-%20Athena%20Iceberg.ipynb)
   - [040 - EMR Serverless](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/040%20-%20EMR%20Serverless.ipynb)
   - [041 - Apache Spark on Amazon Athena](https://github.com/aws/aws-sdk-pandas/blob/main/tutorials/041%20-%20Apache%20Spark%20on%20Amazon%20Athena.ipynb)
-- [**API Reference**](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html)
-  - [Amazon S3](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#amazon-s3)
-  - [AWS Glue Catalog](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#aws-glue-catalog)
-  - [Amazon Athena](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#amazon-athena)
-  - [AWS Lake Formation](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#aws-lake-formation)
-  - [Amazon Redshift](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#amazon-redshift)
-  - [PostgreSQL](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#postgresql)
-  - [MySQL](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#mysql)
-  - [SQL Server](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#sqlserver)
-  - [Oracle](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#oracle)
-  - [Data API Redshift](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#data-api-redshift)
-  - [Data API RDS](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#data-api-rds)
-  - [OpenSearch](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#opensearch)
-  - [AWS Glue Data Quality](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#aws-glue-data-quality)
-  - [Amazon Neptune](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#amazon-neptune)
-  - [DynamoDB](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#dynamodb)
-  - [Amazon Timestream](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#amazon-timestream)
-  - [Amazon EMR](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#amazon-emr)
-  - [Amazon CloudWatch Logs](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#amazon-cloudwatch-logs)
-  - [Amazon Chime](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#amazon-chime)
-  - [Amazon QuickSight](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#amazon-quicksight)
-  - [AWS STS](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#aws-sts)
-  - [AWS Secrets Manager](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#aws-secrets-manager)
-  - [Global Configurations](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#global-configurations)
-  - [Distributed - Ray](https://aws-sdk-pandas.readthedocs.io/en/3.6.0/api.html#distributed-ray)
+- [**API Reference**](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html)
+  - [Amazon S3](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-s3)
+  - [AWS Glue Catalog](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#aws-glue-catalog)
+  - [Amazon Athena](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-athena)
+  - [Amazon Redshift](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-redshift)
+  - [PostgreSQL](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#postgresql)
+  - [MySQL](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#mysql)
+  - [SQL Server](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#sqlserver)
+  - [Oracle](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#oracle)
+  - [Data API Redshift](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#data-api-redshift)
+  - [Data API RDS](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#data-api-rds)
+  - [OpenSearch](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#opensearch)
+  - [AWS Glue Data Quality](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#aws-glue-data-quality)
+  - [Amazon Neptune](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-neptune)
+  - [DynamoDB](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#dynamodb)
+  - [Amazon Timestream](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-timestream)
+  - [Amazon EMR](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-emr)
+  - [Amazon CloudWatch Logs](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-cloudwatch-logs)
+  - [Amazon Chime](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-chime)
+  - [Amazon QuickSight](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#amazon-quicksight)
+  - [AWS STS](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#aws-sts)
+  - [AWS Secrets Manager](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#aws-secrets-manager)
+  - [Global Configurations](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#global-configurations)
+  - [Distributed - Ray](https://aws-sdk-pandas.readthedocs.io/en/3.7.0/api.html#distributed-ray)
 - [**License**](https://github.com/aws/aws-sdk-pandas/blob/main/LICENSE.txt)
 - [**Contributing**](https://github.com/aws/aws-sdk-pandas/blob/main/CONTRIBUTING.md)
 

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-3.6.0
+3.7.0
diff --git a/awswrangler/__init__.py b/awswrangler/__init__.py
@@ -19,7 +19,6 @@
     emr,
     emr_serverless,
     exceptions,
-    lakeformation,
     mysql,
     neptune,
     opensearch,
@@ -58,7 +57,6 @@
     "s3",
     "sts",
     "redshift",
-    "lakeformation",
     "mysql",
     "neptune",
     "postgresql",

diff --git a/awswrangler/__metadata__.py b/awswrangler/__metadata__.py
@@ -7,5 +7,5 @@
 
 __title__: str = "awswrangler"
 __description__: str = "Pandas on AWS."
-__version__: str = "3.6.0"
+__version__: str = "3.7.0"
 __license__: str = "Apache License 2.0"
diff --git a/awswrangler/_arrow.py b/awswrangler/_arrow.py
@@ -70,12 +70,15 @@ def _apply_timezone(df: pd.DataFrame, metadata: dict[str, Any]) -> pd.DataFrame:
         else:
             continue
         if col_name in df.columns and c["pandas_type"] == "datetimetz":
-            timezone: datetime.tzinfo = pa.lib.string_to_tzinfo(c["metadata"]["timezone"])
-            _logger.debug("applying timezone (%s) on column %s", timezone, col_name)
-            if hasattr(df[col_name].dtype, "tz") is False:
-                df[col_name] = df[col_name].dt.tz_localize(tz="UTC")
-            if timezone is not None and timezone != pytz.UTC and hasattr(df[col_name].dt, "tz_convert"):
-                df[col_name] = df[col_name].dt.tz_convert(tz=timezone)
+            column_metadata: dict[str, Any] = c["metadata"] if c.get("metadata") else {}
+            timezone_str: str | None = column_metadata.get("timezone")
+            if timezone_str:
+                timezone: datetime.tzinfo = pa.lib.string_to_tzinfo(timezone_str)
+                _logger.debug("applying timezone (%s) on column %s", timezone, col_name)
+                if hasattr(df[col_name].dtype, "tz") is False:
+                    df[col_name] = df[col_name].dt.tz_localize(tz="UTC")
+                if timezone is not None and timezone != pytz.UTC and hasattr(df[col_name].dt, "tz_convert"):
+                    df[col_name] = df[col_name].dt.tz_convert(tz=timezone)
     return df
 
 

diff --git a/awswrangler/_config.py b/awswrangler/_config.py
@@ -44,7 +44,6 @@ class _ConfigArg(NamedTuple):
     "max_local_cache_entries": _ConfigArg(dtype=int, nullable=False, parent_parameter_key="athena_cache_settings"),
     "athena_query_wait_polling_delay": _ConfigArg(dtype=float, nullable=False),
     "cloudwatch_query_wait_polling_delay": _ConfigArg(dtype=float, nullable=False),
-    "lakeformation_query_wait_polling_delay": _ConfigArg(dtype=float, nullable=False),
     "neptune_load_wait_polling_delay": _ConfigArg(dtype=float, nullable=False),
     "timestream_batch_load_wait_polling_delay": _ConfigArg(dtype=float, nullable=False),
     "emr_serverless_job_wait_polling_delay": _ConfigArg(dtype=float, nullable=False),
@@ -61,7 +60,6 @@ class _ConfigArg(NamedTuple):
     "redshift_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
     "kms_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
     "emr_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
-    "lakeformation_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
     "dynamodb_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
     "secretsmanager_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
     "timestream_query_endpoint_url": _ConfigArg(dtype=str, nullable=True, enforced=True, loaded=True),
@@ -353,15 +351,6 @@ def cloudwatch_query_wait_polling_delay(self) -> float:
     def cloudwatch_query_wait_polling_delay(self, value: float) -> None:
         self._set_config_value(key="cloudwatch_query_wait_polling_delay", value=value)
 
-    @property
-    def lakeformation_query_wait_polling_delay(self) -> float:
-        """Property lakeformation_query_wait_polling_delay."""
-        return cast(float, self["lakeformation_query_wait_polling_delay"])
-
-    @lakeformation_query_wait_polling_delay.setter
-    def lakeformation_query_wait_polling_delay(self, value: float) -> None:
-        self._set_config_value(key="lakeformation_query_wait_polling_delay", value=value)
-
     @property
     def neptune_load_wait_polling_delay(self) -> float:
         """Property neptune_load_wait_polling_delay."""
@@ -497,15 +486,6 @@ def emr_endpoint_url(self) -> str | None:
     def emr_endpoint_url(self, value: str | None) -> None:
         self._set_config_value(key="emr_endpoint_url", value=value)
 
-    @property
-    def lakeformation_endpoint_url(self) -> str | None:
-        """Property lakeformation_endpoint_url."""
-        return cast(Optional[str], self["lakeformation_endpoint_url"])
-
-    @lakeformation_endpoint_url.setter
-    def lakeformation_endpoint_url(self, value: str | None) -> None:
-        self._set_config_value(key="lakeformation_endpoint_url", value=value)
-
     @property
     def dynamodb_endpoint_url(self) -> str | None:
         """Property dynamodb_endpoint_url."""

diff --git a/awswrangler/_data_types.py b/awswrangler/_data_types.py
@@ -563,10 +563,12 @@ def pyarrow_types_from_pandas(  # noqa: PLR0912,PLR0915
         for field in fields:
             name = str(field.name)
             # Check if any of the index columns must be ignored
-            if name not in ignore_cols:
+            if name in ignore_cols:
+                cols_dtypes[name] = None
+            else:
                 _logger.debug("Inferring PyArrow type from index: %s", name)
                 cols_dtypes[name] = field.type
-                indexes.append(name)
+            indexes.append(name)
 
     # Merging Index
     sorted_cols: list[str] = indexes + list(df.columns) if index_left is True else list(df.columns) + indexes
@@ -693,13 +695,26 @@ def pyarrow_schema_from_pandas(
         df=df, index=index, ignore_cols=ignore_plus
     )
     for k, v in casts.items():
-        if (k in df.columns) and (k not in ignore):
+        if (k not in ignore) and (k in df.columns or _is_index_name(k, df.index)):
             columns_types[k] = athena2pyarrow(dtype=v)
     columns_types = {k: v for k, v in columns_types.items() if v is not None}
     _logger.debug("columns_types: %s", columns_types)
     return pa.schema(fields=columns_types)
 
 
+def _is_index_name(name: str, index: pd.Index) -> bool:
+    if name in index.names:
+        # named index level
+        return True
+
+    if (match := re.match(r"__index_level_(?P<level>\d+)__", name)) is not None:
+        # unnamed index level
+        if len(index.names) > (level := int(match.group("level"))):
+            return index.names[level] is None
+
+    return False
+
+
 def athena_types_from_pyarrow_schema(
     schema: pa.Schema,
     ignore_null: bool = False,