Skip to content

API Reference

This page is auto-generated from Python docstrings.

Early on, it may not be easy to read. It becomes more useful as we gain experience.

mlstudio

ML Studio package.

mlstudio.app_case

app_case.py - example.

An example of a supervised regression case. This app is used to verify project workflow.

Author: Denise Case Date: 2026-06

Process
  • Load a CSV dataset.
  • Train a supervised regression model.
  • Evaluate model performance.
  • Predict one new case.
  • Create useful charts.

Data Source: - data/raw/hours_scores_case.csv

Terminal command to run this file from the root project folder:

uv run python -m mlstudio.app_case

OBS

Don't edit this file - it should remain a working example. It is used in each module to test the installation and workflow. You never need to do anything with it, but if would like, you can copy it, rename it, and modify your copy. If you do, include your command to run it in the docstring above and in README.md.

DATASET_NAME module-attribute

DATASET_NAME: Final[str] = 'hours_scores_case'

FEATURE_COLS module-attribute

FEATURE_COLS: Final[list[str]] = [
    'hours_studied',
    'practice_quizzes',
    'attendance_pct',
    'sleep_hours',
    'prior_score',
]

LOG module-attribute

LOG: Logger = get_logger('ML', level='DEBUG')

RANDOM_STATE module-attribute

RANDOM_STATE: Final[int] = 42

TARGET_COL module-attribute

TARGET_COL: Final[str] = 'score'

TEST_SIZE module-attribute

TEST_SIZE: Final[float] = 0.3

check_quality

check_quality(df: DataFrame) -> None

Check missing values and duplicate rows.

Source code in src/mlstudio/app_case.py
def check_quality(df: pd.DataFrame) -> None:
    """Check missing values and duplicate rows."""
    LOG.info("Missing values by column")
    LOG.debug(f"\n{df.isna().sum()}")

    duplicate_count: int = df.duplicated().sum()
    LOG.info(f"Duplicate row count: {duplicate_count}")

inspect_basic

inspect_basic(df: DataFrame) -> None

Inspect basic dataset structure.

Source code in src/mlstudio/app_case.py
def inspect_basic(df: pd.DataFrame) -> None:
    """Inspect basic dataset structure."""
    LOG.info("Column names")
    LOG.debug(f"{list(df.columns)}")

    LOG.info("DataFrame info")
    df.info()

    LOG.info(f"Dataset shape: {df.shape[0]} rows, {df.shape[1]} columns")

load_data

load_data() -> pd.DataFrame

Load the case dataset from the data/raw folder.

Source code in src/mlstudio/app_case.py
def load_data() -> pd.DataFrame:
    """Load the case dataset from the data/raw folder."""
    LOG.info(f"Loading dataset: {DATASET_NAME}")

    df: pd.DataFrame = pd.read_csv(f"data/raw/{DATASET_NAME}.csv")

    LOG.info(f"Loaded: {df.shape[0]} rows, {df.shape[1]} columns")
    LOG.debug(f"\n{df.head()}")

    return df

main

main() -> None

Main function to run the supervised ML workflow.

Source code in src/mlstudio/app_case.py
def main() -> None:
    """Main function to run the supervised ML workflow."""
    log_header(LOG, "ML")

    LOG.info("========================")
    LOG.info("START main()")
    LOG.info("========================")

    LOG.info("Load dataset..............")
    df = load_data()

    LOG.info("Inspect dataset...........")
    inspect_basic(df)

    LOG.info("Check data quality........")
    check_quality(df)

    LOG.info("Create clean view.........")
    df_clean = make_clean_view(df)

    LOG.info("Train supervised model....")
    model = train_model(df_clean)

    LOG.info("Predict one case..........")
    predict_example(model)

    LOG.info("Create charts.............")
    make_plots(df_clean, model)

    LOG.info("Summarize workflow........")
    summarize(df, df_clean)

    LOG.info(
        "----- in a script, call plt.show() once at the end to display all charts -----"
    )
    LOG.info(
        "----- in a script, CLOSE the chart windows with the close button to CONTINUE -----"
    )

    plt.show()

    LOG.info("Workflow complete")
    LOG.info("IMPORTANT: This script creates chart windows.")
    LOG.info("Close chart windows and terminate this process with CTRL+c as needed.")
    LOG.info("========================")
    LOG.info("Executed successfully!")
    LOG.info("========================")

make_clean_view

make_clean_view(df: DataFrame) -> pd.DataFrame

Create a cleaned view for modeling.

Source code in src/mlstudio/app_case.py
def make_clean_view(df: pd.DataFrame) -> pd.DataFrame:
    """Create a cleaned view for modeling."""
    LOG.info("Creating clean modeling view")

    selected_cols: list[str] = FEATURE_COLS + [TARGET_COL]

    # Select only the columns we need.
    df_selected: pd.DataFrame = df[selected_cols]  # type: ignore[assignment]

    # Drop rows with any missing values.
    df_no_missing: pd.DataFrame = df_selected.dropna()

    # Assign a copy of the no-missing DataFrame to df_clean to avoid SettingWithCopyWarning.
    df_clean: pd.DataFrame = df_no_missing.copy()

    LOG.info(f"Clean view: {df_clean.shape[0]} rows, {df_clean.shape[1]} columns")
    return df_clean

make_plots

make_plots(
    df_clean: DataFrame, model: LinearRegression
) -> None

Create charts for the supervised regression case.

Source code in src/mlstudio/app_case.py
def make_plots(df_clean: pd.DataFrame, model: LinearRegression) -> None:
    """Create charts for the supervised regression case."""
    LOG.info("Creating chart: hours studied vs score")

    fig, ax = plt.subplots(figsize=(9, 5))

    scatter_plt: Axes = sns.scatterplot(
        data=df_clean,
        x="hours_studied",
        y=TARGET_COL,
        ax=ax,
    )

    scatter_plt.set_title("Hours Studied vs Score (CLOSE chart to continue)")
    scatter_plt.set_xlabel("Hours Studied")
    scatter_plt.set_ylabel("Score")

    LOG.info("Creating chart: model coefficients")

    fig, ax = plt.subplots(figsize=(9, 5))

    LOG.info(f"Got a figure {fig} and axes {ax} from plt.subplots().")

    coefficient_df = pd.DataFrame(
        {
            "feature": FEATURE_COLS,
            "coefficient": model.coef_,
        }
    ).sort_values("coefficient", ascending=False)

    bar_plt: Axes = sns.barplot(
        data=coefficient_df,
        x="coefficient",
        y="feature",
        ax=ax,
    )

    bar_plt.set_title("Model Coefficients (CLOSE chart to continue)")
    bar_plt.set_xlabel("Coefficient")
    bar_plt.set_ylabel("Feature")

predict_example

predict_example(model: LinearRegression) -> None

Use the trained model to predict one new student score.

Source code in src/mlstudio/app_case.py
def predict_example(model: LinearRegression) -> None:
    """Use the trained model to predict one new student score."""
    LOG.info("Predicting one new case")

    new_case = pd.DataFrame(
        [
            {
                "hours_studied": 6.5,
                "practice_quizzes": 4,
                "attendance_pct": 92,
                "sleep_hours": 7.0,
                "prior_score": 72,
            }
        ]
    )

    predicted_score: float = model.predict(new_case)[0]

    LOG.info(f"New case:\n{new_case}")
    LOG.info(f"Predicted score: {predicted_score:.1f}")

summarize

summarize(df: DataFrame, df_clean: DataFrame) -> None

Log a brief summary.

Source code in src/mlstudio/app_case.py
def summarize(df: pd.DataFrame, df_clean: pd.DataFrame) -> None:
    """Log a brief summary."""
    LOG.info("========================")
    LOG.info("SUMMARY")
    LOG.info("========================")
    LOG.info(f"Dataset: {DATASET_NAME}")
    LOG.info(f"Original rows: {df.shape[0]}")
    LOG.info(f"Clean rows: {df_clean.shape[0]}")
    LOG.info(f"Features: {FEATURE_COLS}")
    LOG.info(f"Target: {TARGET_COL}")

train_model

train_model(df_clean: DataFrame) -> LinearRegression

Train a supervised regression model.

Source code in src/mlstudio/app_case.py
def train_model(df_clean: pd.DataFrame) -> LinearRegression:
    """Train a supervised regression model."""
    LOG.info("Training LinearRegression model")

    x = df_clean[FEATURE_COLS]
    y = df_clean[TARGET_COL]

    x_train, x_test, y_train, y_test = train_test_split(
        x,
        y,
        test_size=TEST_SIZE,
        random_state=RANDOM_STATE,
    )

    model = LinearRegression()
    model.fit(x_train, y_train)

    y_pred = model.predict(x_test)

    mae: float = mean_absolute_error(y_test, y_pred)
    r2: float = r2_score(y_test, y_pred)

    LOG.info(f"Mean absolute error: {mae:.2f}")
    LOG.info(f"R-squared: {r2:.2f}")

    return model