Skip to content

API Reference

Auto-generated code documentation.

analytics_project

data_prep

Module 2: Initial Script to Verify Project Setup.

File: src/analytics_project/data_prep.py.

main

main() -> None

Process raw data.

Source code in src/analytics_project/data_prep.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def main() -> None:
    """Process raw data."""
    logger.info("Starting data preparation...")

    # Build explicit paths for each file under data/raw
    customer_path = RAW_DATA_DIR.joinpath("customers_data.csv")
    product_path = RAW_DATA_DIR.joinpath("products_data.csv")
    sales_path = RAW_DATA_DIR.joinpath("sales_data.csv")

    # Call the function once per file
    read_and_log(customer_path)
    read_and_log(product_path)
    read_and_log(sales_path)

    logger.info("Data preparation complete.")

read_and_log

read_and_log(path: Path) -> pd.DataFrame

Read a CSV at the given path into a DataFrame, with friendly logging.

We know reading a csv file can fail (the file might not exist, it could be corrupted), so we put the statement in a try block. It could fail due to a FileNotFoundError or other exceptions. If it succeeds, we log the shape of the DataFrame. If it fails, we log an error and return an empty DataFrame.

Source code in src/analytics_project/data_prep.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def read_and_log(path: pathlib.Path) -> pd.DataFrame:
    """Read a CSV at the given path into a DataFrame, with friendly logging.

    We know reading a csv file can fail
    (the file might not exist, it could be corrupted),
    so we put the statement in a try block.
    It could fail due to a FileNotFoundError or other exceptions.
    If it succeeds, we log the shape of the DataFrame.
    If it fails, we log an error and return an empty DataFrame.
    """
    try:
        # Typically, we log the start of a file read operation
        logger.info(f"Reading raw data from {path}.")
        df = pd.read_csv(path)
        # Typically, we log the successful completion of a file read operation
        logger.info(
            f"{path.name}: loaded DataFrame with shape {df.shape[0]} rows x {df.shape[1]} cols"
        )
        return df
    except FileNotFoundError:
        logger.error(f"File not found: {path}")
        return pd.DataFrame()
    except Exception as e:
        logger.error(f"Error reading {path}: {e}")
        return pd.DataFrame()

utils_logger

Provide centralized logging for professional analytics projects.

This module configures project-wide logging to track events, debug issues, and maintain audit trails during data analysis workflows.

Module Information
  • Filename: utils_logger.py
  • Module: utils_logger
  • Location: src/analytics_project/
Key Concepts
  • Centralized logging configuration
  • Log levels (DEBUG, INFO, WARNING, ERROR)
  • File-based log persistence
  • Colorized console output with Loguru
Professional Applications
  • Production debugging and troubleshooting
  • Audit trails for regulatory compliance
  • Performance monitoring and optimization
  • Error tracking in data pipelines

get_log_file_path

get_log_file_path() -> pathlib.Path

Return the path to the active log file, or default path if not initialized.

Source code in src/analytics_project/utils_logger.py
48
49
50
51
52
53
def get_log_file_path() -> pathlib.Path:
    """Return the path to the active log file, or default path if not initialized."""
    if _log_file_path is not None:
        return _log_file_path
    # Fallback: predictable location even before init_logger() runs
    return project_root / "project.log"

init_logger

init_logger(
    level: str = 'INFO',
    *,
    log_dir: str | Path = project_root,
    log_file_name: str = 'project.log',
) -> pathlib.Path

Initialize the logger and return the log file path.

Ensures the log folder exists and configures logging to write to a file.

Parameters:

Name Type Description Default
level str

Logging level (e.g., "INFO", "DEBUG").

'INFO'
log_dir str | Path

Directory where the log file will be written.

project_root
log_file_name str

File name for the log file.

'project.log'

Returns:

Type Description
Path

pathlib.Path: The resolved path to the log file.

Source code in src/analytics_project/utils_logger.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def init_logger(
    level: str = "INFO",
    *,
    log_dir: str | pathlib.Path = project_root,
    log_file_name: str = "project.log",
) -> pathlib.Path:
    """Initialize the logger and return the log file path.

    Ensures the log folder exists and configures logging to write to a file.

    Args:
        level (str): Logging level (e.g., "INFO", "DEBUG").
        log_dir: Directory where the log file will be written.
        log_file_name: File name for the log file.

    Returns:
        pathlib.Path: The resolved path to the log file.
    """
    global _is_configured
    if _is_configured:
        # If already configured once for this process
        return pathlib.Path(log_dir) / log_file_name

    # print a visual separator before logs
    print("-----------------------")

    # Resolve and ensure log folder exists
    log_folder = pathlib.Path(log_dir).expanduser().resolve()
    log_folder.mkdir(parents=True, exist_ok=True)

    # Build log file path
    log_file = log_folder / log_file_name

    try:
        fmt = "{time:YYYY-MM-DD HH:mm}:{level:<7} AT {file}:{line}: {message}"
        # Remove any existing Loguru handlers to avoid duplicate output
        logger.remove()
        logger.add(sys.stderr, level=level, format=fmt)
        logger.add(
            log_file,
            level=level,
            enqueue=True,
            backtrace=True,
            diagnose=False,
            rotation="10 MB",
            retention="7 days",
            encoding="utf-8",
            format=fmt,
        )
        logger.info(f"Logging to file: {log_file.resolve()}")
        _is_configured = True
        _log_file_path = log_file  # cache for retrieval
    except Exception as e:
        logger.error(f"Error configuring logger to write to file: {e}")

    return log_file

log_example

log_example() -> None

Demonstrate logging behavior with example messages.

Source code in src/analytics_project/utils_logger.py
114
115
116
117
118
def log_example() -> None:
    """Demonstrate logging behavior with example messages."""
    logger.info("This is an example info message.")
    logger.warning("This is an example warning message.")
    logger.error("This is an example error message.")

main

main() -> None

Execute logger setup and demonstrate its usage.

Source code in src/analytics_project/utils_logger.py
121
122
123
124
125
def main() -> None:
    """Execute logger setup and demonstrate its usage."""
    log_file = init_logger()
    log_example()
    logger.info(f"View the log output at {log_file}")