Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import re
2from typing import List, Union
4import pandas as pd
5from pandas import DataFrame
7from ..utils import PhytestObject, assert_or_warn
10class Data(PhytestObject, DataFrame):
11 @classmethod
12 def read(cls, data_path, data_format) -> 'Data':
13 allowed_formats = ['csv', 'tsv', 'excel']
14 if data_format not in allowed_formats:
15 raise ValueError(f'Data format must be one of {", ".join(allowed_formats)}.')
16 if data_format == 'csv':
17 df = pd.read_csv(data_path)
18 elif data_format == 'tsv':
19 df = pd.read_csv(data_path, sep='\t')
20 elif data_format == 'excel':
21 df = pd.read_excel(data_path, engine='openpyxl')
22 return Data(df)
24 def assert_contains(
25 self,
26 column: str,
27 value: str,
28 *,
29 warning: bool = False,
30 ) -> None:
31 """
32 Asserts that specified column contains the specified value.
34 Args:
35 column (str, required): The column to check.
36 value (str, required): the value to look for.
37 warning (bool): If True, raise a warning instead of an exception. Defaults to False.
38 This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
39 """
40 column_values = self[column].values
41 summary = f"The values of column '{column}' are '{column_values}'."
42 assert_or_warn(
43 value in column_values,
44 warning,
45 summary,
46 f"The column '{column}' does not contain '{value}'.",
47 )
49 def assert_match(
50 self,
51 column: str,
52 pattern: str,
53 *,
54 warning: bool = False,
55 ) -> None:
56 """
57 Asserts that all values of the specified column match the specified pattern.
59 Args:
60 column (str, required): The column to check.
61 pattern (str, required): The pattern to match.
62 warning (bool): If True, raise a warning instead of an exception. Defaults to False.
63 This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
64 """
65 column_values = self[column].values
66 summary = f"The values of column '{column}' are '{column_values}'."
67 not_matched = self[~self[column].str.contains(re.compile(pattern))].index.values
68 assert_or_warn(
69 len(not_matched) == 0,
70 warning,
71 summary,
72 f"The row(s) '{not_matched}' of the column '{column}' do not match the pattern '{pattern}'.",
73 )
75 def assert_columns(
76 self,
77 allowed_columns: List[str],
78 *,
79 exact: bool = False,
80 warning: bool = False,
81 ) -> None:
82 """
83 Asserts that the specified column(s) are in the DataFrame.
85 Args:
86 allowed_columns (List[str], required): The list of allowed columns.
87 exact (bool): If True, the list of allowed columns must be exactly the same as the list of columns in the DataFrame.
88 warning (bool): If True, raise a warning instead of an exception. Defaults to False.
89 This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
90 """
91 columns = self.columns.values
92 summary = f"The names of the columns are '{columns}'."
93 if exact:
94 not_allowed = list(set(allowed_columns).symmetric_difference(set(columns)))
95 message = f"The column names do not exactly match the list of allowed columns '{allowed_columns}'."
96 else:
97 not_allowed = [column for column in columns if column not in allowed_columns]
98 message = f"The columns '{not_allowed}' are not in the list of allowed columns '{allowed_columns}'."
99 assert_or_warn(len(not_allowed) == 0, warning, summary, message)
101 def assert_values(
102 self,
103 column: str,
104 values: list,
105 *,
106 allow_nan: bool = False,
107 exact: bool = False,
108 warning: bool = False,
109 ) -> None:
110 """
111 Asserts that all values of the specified column are in the specified list of allowed values.
113 Args:
114 column (str, required): The column to check.
115 values (list, required): The list of allowed values.
116 allow_nan (bool): If True, allow NaN values.
117 exact (bool): If True, the list of allowed values must be exactly the same as the list of values in the DataFrame.
118 warning (bool): If True, raise a warning instead of an exception. Defaults to False.
119 This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
120 """
122 column_values = self[column].values
123 summary = f"The values of column '{column}' are '{column_values}'."
124 if allow_nan:
125 values.append(float('nan'))
126 if exact:
127 not_allowed = list(set(values).symmetric_difference(set(column_values)))
128 message = f"The values column '{column}' do not exactly match the allowed values '{values}'"
129 else:
130 not_allowed = self[~self[column].isin(values)].index.values
131 message = (
132 f"The row(s) '{not_allowed}' of the column '{column}' are not in the list of allowed values '{values}'."
133 )
134 assert_or_warn(len(not_allowed) == 0, warning, summary, message)
136 def assert_range(
137 self,
138 column: str,
139 *,
140 min: Union[int, float] = None,
141 max: Union[int, float] = None,
142 warning: bool = False,
143 ) -> None:
144 """
145 Asserts that all values of the specified column are in the specified range.
147 Args:
148 column (str, required): The column to check.
149 min (Union[int, float]): The minimum value of the range.
150 max (Union[int, float]): The maximum value of the range.
151 warning (bool): If True, raise a warning instead of an exception. Defaults to False.
152 This flag can be set by running this method with the prefix `warn_` instead of `assert_`.
153 """
154 column_values = self[column].values
155 summary = f"The values of column '{column}' are '{column_values}'."
156 if min is not None:
157 assert_or_warn(
158 min <= column_values.min(),
159 warning,
160 summary,
161 f"The minimum value of column '{column}' is '{column_values.min()}', which is less than '{min}'.",
162 )
163 if max is not None:
164 assert_or_warn(
165 max >= column_values.max(),
166 warning,
167 summary,
168 f"The maximum value of column '{column}' is '{column_values.max()}', which is greater than '{max}'.",
169 )