Coverage for kataglyphispythonpackage/dummy.py: 73%
41 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 20:49 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 20:49 +0000
1import numpy as np
2from loguru import logger
5class SimpleMLPreprocessor:
6 def __init__(self, n_samples: int):
7 self.n_samples = n_samples
8 self.features = np.array([])
9 self.labels = np.array([])
10 self.normalized = np.array([])
11 self.stats = {}
13 def generate_synthetic_data(self) -> tuple:
14 logger.debug(
15 f"Generating {self.n_samples} samples of synthetic features and labels..."
16 )
17 self.features = np.random.normal(loc=5.0, scale=2.0, size=(self.n_samples, 3))
18 self.labels = (self.features.sum(axis=1) > 15).astype(int)
19 logger.info(f"First 5 feature vectors: {self.features[:5]}")
20 logger.info(f"First 5 labels: {self.labels[:5]}")
21 return self.features, self.labels
23 def normalize_features(self) -> np.ndarray:
24 if self.features.size == 0:
25 logger.warning("No features to normalize.")
26 return np.array([])
28 mean = self.features.mean(axis=0)
29 std = self.features.std(axis=0)
30 self.normalized = (self.features - mean) / std
32 self.stats = {"mean": mean.tolist(), "std": std.tolist()}
33 logger.debug(f"Feature normalization stats: {self.stats}")
34 return self.normalized
36 def apply_joke_labeling(self) -> np.ndarray:
37 if self.labels.size == 0:
38 logger.warning("No labels to convert into jokes.")
39 return np.array([])
41 jokes = np.where(self.labels == 1, "Definitely ML", "Possibly Not")
42 logger.info(f"First 5 joke labels: {jokes[:5]}")
43 return jokes
45 def run_pipeline(self) -> dict:
46 logger.info(
47 f"Running ML preprocessing pipeline for {self.n_samples} samples..."
48 )
49 self.generate_synthetic_data()
50 self.normalize_features()
51 jokes = self.apply_joke_labeling()
53 result = {
54 "features": self.features,
55 "labels": self.labels,
56 "normalized": self.normalized,
57 **self.stats,
58 "joke_labels": jokes,
59 }
60 logger.success("ML pipeline complete!")
61 return result