Coverage for kataglyphispythonpackage/dummy.py: 73%

41 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-29 20:49 +0000

1import numpy as np 

2from loguru import logger 

3 

4 

5class SimpleMLPreprocessor: 

6 def __init__(self, n_samples: int): 

7 self.n_samples = n_samples 

8 self.features = np.array([]) 

9 self.labels = np.array([]) 

10 self.normalized = np.array([]) 

11 self.stats = {} 

12 

13 def generate_synthetic_data(self) -> tuple: 

14 logger.debug( 

15 f"Generating {self.n_samples} samples of synthetic features and labels..." 

16 ) 

17 self.features = np.random.normal(loc=5.0, scale=2.0, size=(self.n_samples, 3)) 

18 self.labels = (self.features.sum(axis=1) > 15).astype(int) 

19 logger.info(f"First 5 feature vectors: {self.features[:5]}") 

20 logger.info(f"First 5 labels: {self.labels[:5]}") 

21 return self.features, self.labels 

22 

23 def normalize_features(self) -> np.ndarray: 

24 if self.features.size == 0: 

25 logger.warning("No features to normalize.") 

26 return np.array([]) 

27 

28 mean = self.features.mean(axis=0) 

29 std = self.features.std(axis=0) 

30 self.normalized = (self.features - mean) / std 

31 

32 self.stats = {"mean": mean.tolist(), "std": std.tolist()} 

33 logger.debug(f"Feature normalization stats: {self.stats}") 

34 return self.normalized 

35 

36 def apply_joke_labeling(self) -> np.ndarray: 

37 if self.labels.size == 0: 

38 logger.warning("No labels to convert into jokes.") 

39 return np.array([]) 

40 

41 jokes = np.where(self.labels == 1, "Definitely ML", "Possibly Not") 

42 logger.info(f"First 5 joke labels: {jokes[:5]}") 

43 return jokes 

44 

45 def run_pipeline(self) -> dict: 

46 logger.info( 

47 f"Running ML preprocessing pipeline for {self.n_samples} samples..." 

48 ) 

49 self.generate_synthetic_data() 

50 self.normalize_features() 

51 jokes = self.apply_joke_labeling() 

52 

53 result = { 

54 "features": self.features, 

55 "labels": self.labels, 

56 "normalized": self.normalized, 

57 **self.stats, 

58 "joke_labels": jokes, 

59 } 

60 logger.success("ML pipeline complete!") 

61 return result