Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import re 

2from typing import List, Union 


4import pandas as pd 

5from pandas import DataFrame 


7from ..utils import PhytestObject, assert_or_warn 



10class Data(PhytestObject, DataFrame): 

11 @classmethod 

12 def read(cls, data_path, data_format) -> 'Data': 

13 allowed_formats = ['csv', 'tsv', 'excel'] 

14 if data_format not in allowed_formats: 

15 raise ValueError(f'Data format must be one of {", ".join(allowed_formats)}.') 

16 if data_format == 'csv': 

17 df = pd.read_csv(data_path) 

18 elif data_format == 'tsv': 

19 df = pd.read_csv(data_path, sep='\t') 

20 elif data_format == 'excel': 

21 df = pd.read_excel(data_path, engine='openpyxl') 

22 return Data(df) 


24 def assert_contains( 

25 self, 

26 column: str, 

27 value: str, 

28 *, 

29 warning: bool = False, 

30 ) -> None: 

31 """ 

32 Asserts that specified column contains the specified value. 


34 Args: 

35 column (str, required): The column to check. 

36 value (str, required): the value to look for. 

37 warning (bool): If True, raise a warning instead of an exception. Defaults to False. 

38 This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 

39 """ 

40 column_values = self[column].values 

41 summary = f"The values of column '{column}' are '{column_values}'." 

42 assert_or_warn( 

43 value in column_values, 

44 warning, 

45 summary, 

46 f"The column '{column}' does not contain '{value}'.", 

47 ) 


49 def assert_match( 

50 self, 

51 column: str, 

52 pattern: str, 

53 *, 

54 warning: bool = False, 

55 ) -> None: 

56 """ 

57 Asserts that all values of the specified column match the specified pattern. 


59 Args: 

60 column (str, required): The column to check. 

61 pattern (str, required): The pattern to match. 

62 warning (bool): If True, raise a warning instead of an exception. Defaults to False. 

63 This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 

64 """ 

65 column_values = self[column].values 

66 summary = f"The values of column '{column}' are '{column_values}'." 

67 not_matched = self[~self[column].str.contains(re.compile(pattern))].index.values 

68 assert_or_warn( 

69 len(not_matched) == 0, 

70 warning, 

71 summary, 

72 f"The row(s) '{not_matched}' of the column '{column}' do not match the pattern '{pattern}'.", 

73 ) 


75 def assert_columns( 

76 self, 

77 allowed_columns: List[str], 

78 *, 

79 exact: bool = False, 

80 warning: bool = False, 

81 ) -> None: 

82 """ 

83 Asserts that the specified column(s) are in the DataFrame. 


85 Args: 

86 allowed_columns (List[str], required): The list of allowed columns. 

87 exact (bool): If True, the list of allowed columns must be exactly the same as the list of columns in the DataFrame. 

88 warning (bool): If True, raise a warning instead of an exception. Defaults to False. 

89 This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 

90 """ 

91 columns = self.columns.values 

92 summary = f"The names of the columns are '{columns}'." 

93 if exact: 

94 not_allowed = list(set(allowed_columns).symmetric_difference(set(columns))) 

95 message = f"The column names do not exactly match the list of allowed columns '{allowed_columns}'." 

96 else: 

97 not_allowed = [column for column in columns if column not in allowed_columns] 

98 message = f"The columns '{not_allowed}' are not in the list of allowed columns '{allowed_columns}'." 

99 assert_or_warn(len(not_allowed) == 0, warning, summary, message) 


101 def assert_values( 

102 self, 

103 column: str, 

104 values: list, 

105 *, 

106 allow_nan: bool = False, 

107 exact: bool = False, 

108 warning: bool = False, 

109 ) -> None: 

110 """ 

111 Asserts that all values of the specified column are in the specified list of allowed values. 


113 Args: 

114 column (str, required): The column to check. 

115 values (list, required): The list of allowed values. 

116 allow_nan (bool): If True, allow NaN values. 

117 exact (bool): If True, the list of allowed values must be exactly the same as the list of values in the DataFrame. 

118 warning (bool): If True, raise a warning instead of an exception. Defaults to False. 

119 This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 

120 """ 


122 column_values = self[column].values 

123 summary = f"The values of column '{column}' are '{column_values}'." 

124 if allow_nan: 

125 values.append(float('nan')) 

126 if exact: 

127 not_allowed = list(set(values).symmetric_difference(set(column_values))) 

128 message = f"The values column '{column}' do not exactly match the allowed values '{values}'" 

129 else: 

130 not_allowed = self[~self[column].isin(values)].index.values 

131 message = ( 

132 f"The row(s) '{not_allowed}' of the column '{column}' are not in the list of allowed values '{values}'." 

133 ) 

134 assert_or_warn(len(not_allowed) == 0, warning, summary, message) 


136 def assert_range( 

137 self, 

138 column: str, 

139 *, 

140 min: Union[int, float] = None, 

141 max: Union[int, float] = None, 

142 warning: bool = False, 

143 ) -> None: 

144 """ 

145 Asserts that all values of the specified column are in the specified range. 


147 Args: 

148 column (str, required): The column to check. 

149 min (Union[int, float]): The minimum value of the range. 

150 max (Union[int, float]): The maximum value of the range. 

151 warning (bool): If True, raise a warning instead of an exception. Defaults to False. 

152 This flag can be set by running this method with the prefix `warn_` instead of `assert_`. 

153 """ 

154 column_values = self[column].values 

155 summary = f"The values of column '{column}' are '{column_values}'." 

156 if min is not None: 

157 assert_or_warn( 

158 min <= column_values.min(), 

159 warning, 

160 summary, 

161 f"The minimum value of column '{column}' is '{column_values.min()}', which is less than '{min}'.", 

162 ) 

163 if max is not None: 

164 assert_or_warn( 

165 max >= column_values.max(), 

166 warning, 

167 summary, 

168 f"The maximum value of column '{column}' is '{column_values.max()}', which is greater than '{max}'.", 

169 )