diff --git a/.coverage b/.coverage index 73d2b11..f31411b 100644 Binary files a/.coverage and b/.coverage differ diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a14e82b..264d139 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -8,9 +8,15 @@ on: branches: [master] jobs: - test: + release: runs-on: ubuntu-latest - steps: + environment: pypi + + permissions: + id-token: write + contents: write + + steps: - name: Checkout uses: actions/checkout@v4 @@ -23,30 +29,15 @@ jobs: - name: Install dependencies run: | uv sync --dev --all-extras + + - name: Build package + run: uv build --no-sources - name: Run tests (small dataset) run: | uv run pytest -m "not large" env: TEST_SIZE: small - - build: - needs: test - runs-on: ubuntu-latest - if: startsWith(github.ref, 'refs/tags/v') - steps: - - uses: actions/checkout@v4 - - - name: Set Up Python - uses: actions/setup-python@v5 - - - name: Install uv - uses: astral-sh/setup-uv@v4 - - - name: Build package - run: uv build --no-sources - + - name: Publish to PyPI - run: uv publish - env: - UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }} + run: uv publish \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6861dd5..00bc6ba 100644 --- a/.gitignore +++ b/.gitignore @@ -18,12 +18,14 @@ lib64/ .installed.cfg *.manifest + # Pytest cache .pytest_cache/ # Virtual environment .env/ venv/ +.pypirc # IDE specific files .vscode/ @@ -33,4 +35,5 @@ venv/ .DS_Store Thumbs.db -.old/ \ No newline at end of file +.old/ +.test/ \ No newline at end of file diff --git a/README.md b/README.md index 95cff4f..f6fab40 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,23 @@ Extensions for DataFrames to make statistical and analysis operations much, *much* more comfortable and convenient. Turns your `DataFrame` into a `StatFrame`, composing Mindhunter's new features *over* it, supercharging its capabilities without sacrificing compatibility. +Example: + +```python +import pandas as pd + +from mindhunter import StatFrame +from mindhunter.visualization import StatPlotter + +dataset = pd.read_csv('Fish.csv') # load your data +data = StatFrame(dataset) # create a StatFrame +data.clean_df() # clean your data +plottable = StatPlotter(data) # turn your StatFrame into a StatPlotter +plottable.plot_normal_distr(data_to_test=data.df['width']) # create a set of normal distribution validation graphs +``` + +fish_nd + --- ## 📦 Installation @@ -79,3 +96,4 @@ In short: it uses basic OOP **composition**, against all advise, to pass the `St This library will be updated fairly regularly, as I start collecting and tidying up more and more little tools, and taking more advantage of the internal mechanisms. I am *much* more of a developer than a data analyst, so I need much more help knowing what the community *needs* for me to keep on improving the library. If you have any issue, suggestion or comment, feel free to create a new issue! + diff --git a/pyproject.toml b/pyproject.toml index 4daa4e2..74d6c3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ addopts = "--cov=src/mindhunter --cov-report=term-missing" [project] name = "mindhunter" -version = "0.1.0" +version = "0.1.1" description = "DataFrame extensions for data analysis." readme = "README.md" requires-python = ">=3.12" @@ -35,4 +35,8 @@ dependencies = [ "pytest>=7.4.0", "pytest-cov>=4.1.0", "scipy>=1.10.0", -] \ No newline at end of file + "statsmodels>=0.14.5", + "patsy>=1.0.1", +] +authors =[ + { "name" = "Sebastian Torres Sagredo (Framebuffer(s))", "email" = " 26495556+Framebuffers@users.noreply.github.com" } ] \ No newline at end of file diff --git a/src/mindhunter/mindhunter.py b/src/mindhunter/mindhunter.py index f85490f..6ab6c95 100644 --- a/src/mindhunter/mindhunter.py +++ b/src/mindhunter/mindhunter.py @@ -27,52 +27,52 @@ def clean_df(self, *chars_to_remove) -> None: normalized_columns = [ re.sub(pattern, '_', col.lower()).replace(' ', '_') - for col in self.df_copy.columns + for col in self._df.columns ] - self.df_copy.columns = normalized_columns - self.df_copy.dropna(inplace=True) - self.df_copy.drop_duplicates(inplace=True) + self._df.columns = normalized_columns + self._df.dropna(inplace=True) + self._df.drop_duplicates(inplace=True) def locate_zero_rows(self, columns: list[str] = None, # type: ignore return_indices: bool = False) -> pd.DataFrame | list: if columns is None: - check_columns = self.df_copy.select_dtypes(include=[np.number]).columns.tolist() + check_columns = self._df.select_dtypes(include=[np.number]).columns.tolist() else: check_columns = columns - zero_mask = (self.df_copy[check_columns] == 0).any(axis=1) - zero_rows = self.df_copy[zero_mask] + zero_mask = (self._df[check_columns] == 0).any(axis=1) + zero_rows = self._df[zero_mask] if return_indices: return zero_rows.index.tolist() return zero_rows def analyze_zero_removal(self) -> pd.DataFrame: - numeric_cols = self.df_copy.select_dtypes(include=[np.number]).columns + numeric_cols = self._df.select_dtypes(include=[np.number]).columns analysis = [] for col in numeric_cols: - zero_count = (self.df_copy[col] == 0).sum() - zero_pct = (zero_count / len(self.df_copy)) * 100 + zero_count = (self._df[col] == 0).sum() + zero_pct = (zero_count / len(self._df)) * 100 analysis.append({ 'column': col, 'zero_count': zero_count, 'zero_percentage': f"{zero_pct:.1f}%", - 'total_rows': len(self.df_copy) + 'total_rows': len(self._df) }) return pd.DataFrame(analysis) def remove_exact_zeros(self, update_cache: bool = True) -> dict: - numeric_cols = self.df_copy.select_dtypes(include=[np.number]).columns + numeric_cols = self._df.select_dtypes(include=[np.number]).columns - zero_mask = (self.df_copy[numeric_cols] == 0.0).any(axis=1) + zero_mask = (self._df[numeric_cols] == 0.0).any(axis=1) - original_length = len(self.df_copy) - self.df_copy = self.df_copy[~zero_mask].reset_index(drop=True) + original_length = len(self._df) + self._df = self._df[~zero_mask].reset_index(drop=True) if update_cache: self._cached_stats = {} @@ -80,19 +80,19 @@ def remove_exact_zeros(self, update_cache: bool = True) -> dict: return { 'method': 'exact_zeros', - 'rows_removed': original_length - len(self.df_copy), - 'new_length': len(self.df_copy) + 'rows_removed': original_length - len(self._df), + 'new_length': len(self._df) } def remove_near_zeros(self, tolerance: float = 1e-10, columns: list[str] = None, update_cache: bool = True) -> dict: # type: ignore if columns is None: - columns = self.df_copy.select_dtypes(include=[np.number]).columns.tolist() + columns = self._df.select_dtypes(include=[np.number]).columns.tolist() - near_zero_mask = (abs(self.df_copy[columns]) <= tolerance).any(axis=1) + near_zero_mask = (abs(self._df[columns]) <= tolerance).any(axis=1) - original_length = len(self.df_copy) - self.df_copy = self.df_copy[~near_zero_mask].reset_index(drop=True) + original_length = len(self._df) + self._df = self._df[~near_zero_mask].reset_index(drop=True) if update_cache: self._cached_stats = {} @@ -101,13 +101,16 @@ def remove_near_zeros(self, tolerance: float = 1e-10, return { 'method': 'near_zeros', 'tolerance_used': tolerance, - 'rows_removed': original_length - len(self.df_copy), + 'rows_removed': original_length - len(self._df), 'columns_checked': columns } def describe_columns(self, *columns: str) -> pd.DataFrame: return self._df[list(columns)].describe() if columns else self._df.describe() + def get_stats(self) -> pd.DataFrame: + return pd.DataFrame.from_dict(self._cached_stats) + def _compute_essential_stats(self): """ Compute and cache essential statistical measures. @@ -180,32 +183,7 @@ def _compute_essential_stats(self): def _compute_column_stats(self, column_name: str) -> None: data = self._df[column_name].dropna() - # { - # 'mean': data.mean(), - # 'median': data.median(), - # 'mode': data.mode().iloc[0] if not data.mode().empty else np.nan, - - # 'std': data.std(), - # 'variance': data.var(), - # 'range': data.max() - data.min(), - # 'iqr': data.quantile(0.75) - data.quantile(0.25), - # 'mad': (data - data.median()).abs().median(), - - # 'skewness': data.skew(), - # 'kurtosis': data.kurtosis(), - - # 'count': len(data), - # 'missing_count': self._df[col].isna().sum(), - # 'missing_pct': self._df[col].isna().mean(), - - # 'min': data.min(), - # 'max': data.max(), - # 'q1': data.quantile(0.25), - # 'q3': data.quantile(0.75), - - # 'cv': data.std() / data.mean() if data.mean() != 0 else np.inf, - # 'sem': data.std() / np.sqrt(len(data)) - # } + self._df[column_name] = { 'mean': data.mean(), 'std': data.std(), diff --git a/src/mindhunter/visualization/stat_plotter.py b/src/mindhunter/visualization/stat_plotter.py index 1e908c4..7e4173e 100644 --- a/src/mindhunter/visualization/stat_plotter.py +++ b/src/mindhunter/visualization/stat_plotter.py @@ -29,33 +29,35 @@ def plot_z_scores(self, *columns: str) -> None: plt.xticks(rotation=45) plt.show() - def plot_coefficient_variation(self, - title: str = 'Coefficient of Variation of Indicators', - x_label: str = 'Indicators', - y_label: str = 'Coefficient of Variation', - rotation: int = 90, - ha: str = 'right') -> None: - """ Graphs the Coefficient of Variation for each column. + #TODO: fix + # def plot_coefficient_variation(self, + # column_name: str, + # title: str = 'Coefficient of Variation of Indicators', + # x_label: str = 'Indicators', + # y_label: str = 'Coefficient of Variation', + # rotation: int = 90, + # ha: str = 'right') -> None: + # """ Graphs the Coefficient of Variation for each column. - The Coefficient of Variation is calculated as the standard deviation divided by the mean. - It is graphed as a barplot with the 5 most volatile indicators. + # The Coefficient of Variation is calculated as the standard deviation divided by the mean. + # It is graphed as a barplot with the 5 most volatile indicators. - Args: - overall (pd.DataFrame): DataFrame held within the StatisticalObject. - Returns: - None - Raises: - None + # Args: + # overall (pd.DataFrame): DataFrame held within the StatisticalObject. + # Returns: + # None + # Raises: + # None - """ - data_frame = self.da.df - plt.figure(figsize=(10, 6)) - plt.title(title) - sns.barplot(x=data_frame.columns, y=data_frame.std() / data_frame.mean(), width=.5) - plt.ylabel(y_label) - plt.xlabel(x_label) - plt.xticks(rotation=rotation, ha=ha) - plt.show() + # """ + # data_frame = self.da.df + # plt.figure(figsize=(10, 6)) + # plt.title(title) + # sns.barplot(x=data_frame[column_name], y=data_frame[column_name].std() / data_frame[column_name].mean(), width=.5) + # plt.ylabel(y_label) + # plt.xlabel(x_label) + # plt.xticks(rotation=rotation, ha=ha) + # plt.show() def plot_normality_check(self, column_name: str) -> None: basic_stats = self.da._cached_stats diff --git a/src/mindhunter/visualization/visualizer.py b/src/mindhunter/visualization/visualizer.py index 0d2cbd9..7d70b4a 100644 --- a/src/mindhunter/visualization/visualizer.py +++ b/src/mindhunter/visualization/visualizer.py @@ -18,7 +18,7 @@ def create_scatterplot(self, columns: List[str]) -> None: if col1 in self.da.df.columns and col2 in self.da.df.columns: plt.figure(figsize=(8, 6)) sns.scatterplot(data=self.da.df, x=col1, y=col2) - plt.title(f'Scatterplot de {col1} vs {col2}') + plt.title(f'{col1} vs {col2}') plt.xlabel(col1) plt.ylabel(col2) plt.show() @@ -28,7 +28,7 @@ def create_boxplot(self, columns: List[str]) -> None: if col in self.da.df.columns: plt.figure(figsize=(8, 6)) sns.boxplot(y=self.da.df[col]) - plt.title(f'Boxplot de {col}') + plt.title(f'{col} Boxplot') plt.ylabel(col) plt.show() diff --git a/tests/test_cleaning.py b/tests/test_cleaning.py new file mode 100644 index 0000000..0b995a1 --- /dev/null +++ b/tests/test_cleaning.py @@ -0,0 +1,39 @@ +import pandas as pd +import numpy as np +import statsmodels.api as sm +import seaborn as sns +import matplotlib.pyplot as plt +import scipy as sp +from faker import Faker +from scipy.stats import norm +from scipy import stats +from mindhunter import StatFrame +import pytest +import random + +@pytest.fixture +def sample_statframe(): + fake = Faker() + rand = random.Random() + records = 50 + data = [] + for _ in range(records): + record = { + 'name': fake.name_nonbinary, # string + 'email': fake.email, # string + 'category': fake.boolean(25), # categoric + 'weight': rand.uniform(30.0, 200.0), # numerical + 'height': rand.uniform(100.0, 220.0), # numerical + 'age': rand.randint(18, 90), # numerical (int) + } + data.append(record) + return StatFrame(pd.DataFrame(data)) + +def test_clean_sf(sample_statframe: StatFrame): + + assert sample_statframe._cached_stats is not None + """ Check if the StatFrame has been loaded correctly and the internal cache is populated.""" + + sample_statframe.clean_df() + assert sample_statframe.df is not None + """ Check if the DF is loaded, and it can be editable. """ \ No newline at end of file diff --git a/tests/test_dataframe_creation.py b/tests/test_dataframe_creation.py index 96a1d5b..c2150f1 100644 --- a/tests/test_dataframe_creation.py +++ b/tests/test_dataframe_creation.py @@ -20,7 +20,7 @@ def get_test_size(): }[size] @pytest.fixture -def sample_analyzer(): +def sample_statframe(): """ Generate randomized DataFrame based on test environment with both numerical and categorical values. @@ -38,16 +38,16 @@ def sample_analyzer(): return StatFrame(test_df) -def test_cache_not_none(sample_analyzer): +def test_cache_not_none(sample_statframe): """ Always runs - uses environment-based size. """ - assert sample_analyzer._cached_stats is not None + assert sample_statframe._cached_stats is not None @pytest.mark.large -def test_heavy_computation(sample_analyzer: StatFrame): +def test_heavy_computation(sample_statframe: StatFrame): """ Skipped in CI - only runs locally. @@ -77,5 +77,5 @@ def test_heavy_computation(sample_analyzer: StatFrame): da = StatFrame(df) assert da is not None - result = sample_analyzer._cached_stats + result = sample_statframe._cached_stats assert result is not None \ No newline at end of file diff --git a/uv.lock b/uv.lock index 98392ea..3c73dad 100644 --- a/uv.lock +++ b/uv.lock @@ -350,17 +350,19 @@ wheels = [ [[package]] name = "mindhunter" -version = "0.1.0" +version = "0.1.1" source = { editable = "." } dependencies = [ { name = "faker" }, { name = "matplotlib" }, { name = "numpy" }, { name = "pandas" }, + { name = "patsy" }, { name = "pytest" }, { name = "pytest-cov" }, { name = "scipy" }, { name = "seaborn" }, + { name = "statsmodels" }, ] [package.metadata] @@ -369,10 +371,12 @@ requires-dist = [ { name = "matplotlib", specifier = ">=3.10.6" }, { name = "numpy", specifier = ">=2.3.3" }, { name = "pandas", specifier = ">=2.3.3" }, + { name = "patsy", specifier = ">=1.0.1" }, { name = "pytest", specifier = ">=7.4.0" }, { name = "pytest-cov", specifier = ">=4.1.0" }, { name = "scipy", specifier = ">=1.10.0" }, { name = "seaborn", specifier = ">=0.13.2" }, + { name = "statsmodels", specifier = ">=0.14.5" }, ] [[package]] @@ -494,6 +498,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/44/5191d2e4026f86a2a109053e194d3ba7a31a2d10a9c2348368c63ed4e85a/pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87", size = 13202175, upload-time = "2025-09-29T23:31:59.173Z" }, ] +[[package]] +name = "patsy" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/81/74f6a65b848ffd16c18f920620ce999fe45fe27f01ab3911260ce4ed85e4/patsy-1.0.1.tar.gz", hash = "sha256:e786a9391eec818c054e359b737bbce692f051aee4c661f4141cc88fb459c0c4", size = 396010, upload-time = "2024-11-12T14:10:54.642Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c", size = 232923, upload-time = "2024-11-12T14:10:52.85Z" }, +] + [[package]] name = "pillow" version = "11.3.0" @@ -722,6 +738,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "statsmodels" +version = "0.14.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "patsy" }, + { name = "scipy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/64/cc/8c1bf59bf8203dea1bf2ea811cfe667d7bcc6909c83d8afb02b08e30f50b/statsmodels-0.14.5.tar.gz", hash = "sha256:de260e58cccfd2ceddf835b55a357233d6ca853a1aa4f90f7553a52cc71c6ddf", size = 20525016, upload-time = "2025-07-07T12:14:23.195Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/a5/fcc4f5f16355660ce7a1742e28a43e3a9391b492fc4ff29fdd6893e81c05/statsmodels-0.14.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:37e7364a39f9aa3b51d15a208c2868b90aadb8412f868530f5cba9197cb00eaa", size = 10042891, upload-time = "2025-07-07T12:13:41.671Z" }, + { url = "https://files.pythonhosted.org/packages/1c/6f/db0cf5efa48277ac6218d9b981c8fd5e63c4c43e0d9d65015fdc38eed0ef/statsmodels-0.14.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4263d7f4d0f1d5ac6eb4db22e1ee34264a14d634b9332c975c9d9109b6b46e12", size = 9698912, upload-time = "2025-07-07T12:07:54.674Z" }, + { url = "https://files.pythonhosted.org/packages/4a/93/4ddc3bc4a59c51e6a57c49df1b889882c40d9e141e855b3517f6a8de3232/statsmodels-0.14.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:86224f6e36f38486e471e75759d241fe2912d8bc25ab157d54ee074c6aedbf45", size = 10237801, upload-time = "2025-07-07T14:23:12.593Z" }, + { url = "https://files.pythonhosted.org/packages/66/de/dc6bf2f6e8c8eb4c5815560ebdbdf2d69a767bc0f65fde34bc086cf5b36d/statsmodels-0.14.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c3dd760a6fa80cd5e0371685c697bb9c2c0e6e1f394d975e596a1e6d0bbb9372", size = 10424154, upload-time = "2025-07-07T14:23:25.365Z" }, + { url = "https://files.pythonhosted.org/packages/16/4f/2d5a8d14bebdf2b03b3ea89b8c6a2c837bb406ba5b7a41add8bd303bce29/statsmodels-0.14.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6264fb00e02f858b86bd01ef2dc05055a71d4a0cc7551b9976b07b0f0e6cf24f", size = 10652915, upload-time = "2025-07-07T14:23:39.337Z" }, + { url = "https://files.pythonhosted.org/packages/df/4c/2feda3a9f0e17444a84ba5398ada6a4d2e1b8f832760048f04e2b8ea0c41/statsmodels-0.14.5-cp312-cp312-win_amd64.whl", hash = "sha256:b2ed065bfbaf8bb214c7201656df840457c2c8c65e1689e3eb09dc7440f9c61c", size = 9611236, upload-time = "2025-07-07T12:08:06.794Z" }, + { url = "https://files.pythonhosted.org/packages/84/fd/4c374108cf108b3130240a5b45847a61f70ddf973429044a81a05189b046/statsmodels-0.14.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:906263134dd1a640e55ecb01fda4a9be7b9e08558dba9e4c4943a486fdb0c9c8", size = 10013958, upload-time = "2025-07-07T14:35:01.04Z" }, + { url = "https://files.pythonhosted.org/packages/5a/36/bf3d7f0e36acd3ba9ec0babd79ace25506b6872780cbd710fb7cd31f0fa2/statsmodels-0.14.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9118f76344f77cffbb3a9cbcff8682b325be5eed54a4b3253e09da77a74263d3", size = 9674243, upload-time = "2025-07-07T12:08:22.571Z" }, + { url = "https://files.pythonhosted.org/packages/90/ce/a55a6f37b5277683ceccd965a5828b24672bbc427db6b3969ae0b0fc29fb/statsmodels-0.14.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9dc4ee159070557c9a6c000625d85f653de437772fe7086857cff68f501afe45", size = 10219521, upload-time = "2025-07-07T14:23:52.646Z" }, + { url = "https://files.pythonhosted.org/packages/1e/48/973da1ee8bc0743519759e74c3615b39acdc3faf00e0a0710f8c856d8c9d/statsmodels-0.14.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a085d47c8ef5387279a991633883d0e700de2b0acc812d7032d165888627bef", size = 10453538, upload-time = "2025-07-07T14:24:06.959Z" }, + { url = "https://files.pythonhosted.org/packages/c7/d6/18903fb707afd31cf1edaec5201964dbdacb2bfae9a22558274647a7c88f/statsmodels-0.14.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9f866b2ebb2904b47c342d00def83c526ef2eb1df6a9a3c94ba5fe63d0005aec", size = 10681584, upload-time = "2025-07-07T14:24:21.038Z" }, + { url = "https://files.pythonhosted.org/packages/44/d6/80df1bbbfcdc50bff4152f43274420fa9856d56e234d160d6206eb1f5827/statsmodels-0.14.5-cp313-cp313-win_amd64.whl", hash = "sha256:2a06bca03b7a492f88c8106103ab75f1a5ced25de90103a89f3a287518017939", size = 9604641, upload-time = "2025-07-07T12:08:36.23Z" }, +] + [[package]] name = "tzdata" version = "2025.2"