Coverage for tests\test_request.py: 100%

106 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2024-02-15 16:38 -0700

1import os 

2from contextlib import redirect_stdout 

3from io import StringIO 

4from tempfile import TemporaryDirectory 

5 

6import pandas 

7import pytest 

8 

9import receptiviti 

10 

11receptiviti.readin_env() 

12 

13 

14@pytest.mark.skipif(os.getenv("RECEPTIVITI_KEY") is None, reason="no API key") 

15class TestRequest: 

16 def test_unreachable(self): 

17 url = "http://localhost:0/not_served" 

18 assert receptiviti.status(url) is None 

19 with pytest.raises(RuntimeError, match="URL is not reachable"): 

20 receptiviti.request("a text", url=url) 

21 

22 def test_single_text(self): 

23 res = receptiviti.request("text to score") 

24 assert res["summary.word_count"][0] == 3 

25 

26 def test_invalid_text(self): 

27 with pytest.raises(RuntimeError, match="one of your texts is over the bundle size limit"): 

28 receptiviti.request(" " * int(1e7)) 

29 with pytest.raises(RuntimeError, match="no valid texts to process"): 

30 receptiviti.request("") 

31 

32 def test_multi_text(self): 

33 res = receptiviti.request(["text to score", float("nan"), "another text", "another text"]) 

34 assert str(res["summary.word_count"][1]) == "nan" 

35 assert res["summary.word_count"].iloc[pandas.Index([0, 2, 3])].to_list() == [3, 2, 2] 

36 

37 def test_framework_selection(self): 

38 res = receptiviti.request(["text to score", "another text"], frameworks="summary") 

39 assert res.shape == (2, 10) 

40 assert res["word_count"].to_list() == [3, 2] 

41 

42 def test_framework_prefix(self): 

43 res = receptiviti.request( 

44 ["text to score", "another text"], frameworks="summary", framework_prefix=True 

45 ) 

46 assert res["summary.word_count"].to_list() == [3, 2] 

47 

48 def test_id_text(self): 

49 res = receptiviti.request( 

50 ["text to score", "another text"], ids=["a", "b"], return_text=True 

51 ) 

52 assert res["text"].to_list() == ["text to score", "another text"] 

53 assert res["id"].to_list() == ["a", "b"] 

54 

55 def test_verbose(self): 

56 with redirect_stdout(StringIO()) as out: 

57 receptiviti.request("text to score", frameworks=["summary", "sallee"], verbose=True) 

58 messages = out.getvalue().split("\n") 

59 expected = ["prep"] * 3 + ["requ", "done", "prep", "sele", "done", ""] 

60 assert len(messages) == len(expected) and all( 

61 line[:4] == expected[i] for i, line in enumerate(messages) 

62 ) 

63 

64 def test_cache_initialization(self): 

65 with TemporaryDirectory() as tempdir: 

66 receptiviti.request("a text to score", cache=tempdir, clear_cache=True) 

67 assert os.path.isdir(tempdir + "/bin=h") 

68 

69 def test_id_assignment(self): 

70 text = ["text to score", "another text"] 

71 with TemporaryDirectory() as tempdir: 

72 txt_file = f"{tempdir}/text.txt" 

73 with open(txt_file, "w", encoding="utf-8") as txt: 

74 txt.write("\n".join(text)) 

75 csv_file = f"{tempdir}/text.csv" 

76 pandas.DataFrame({"text": text}).to_csv(csv_file) 

77 assert receptiviti.request(txt_file)["id"].to_list() == [ 

78 txt_file + "1", 

79 txt_file + "2", 

80 ] 

81 assert receptiviti.request(csv_file, text_column="text")["id"].to_list() == [ 

82 csv_file + "1", 

83 csv_file + "2", 

84 ] 

85 

86 @pytest.mark.skipif(not os.path.isfile("../data.txt"), reason="no txt test file present") 

87 def test_from_directory(self): 

88 with TemporaryDirectory() as tempdir: 

89 cache = tempdir + "/cache" 

90 res_single = receptiviti.request("../data.txt", cache=cache) 

91 nth_text = 0 

92 txt_files = [] 

93 csv_files = [] 

94 with open("../data.txt", encoding="utf-8") as texts: 

95 for text in texts: 

96 nth_text += 1 

97 txt_file = f"{tempdir}/{nth_text}.txt" 

98 txt_files.append(txt_file) 

99 with open(txt_file, "w", encoding="utf-16") as txt: 

100 txt.write(text) 

101 csv_file = f"{tempdir}/{nth_text}.csv" 

102 csv_files.append(csv_file) 

103 pandas.DataFrame({"text": [text]}).to_csv(csv_file, encoding="cp1252") 

104 res_misencode = receptiviti.request( 

105 tempdir, encoding="utf-8", return_text=True, cache=cache 

106 ) 

107 res_multi = receptiviti.request(tempdir, return_text=True, cache=cache) 

108 res_multi_txt = receptiviti.request(txt_files, cache=cache) 

109 res_multi_csv = receptiviti.request(csv_files, text_column="text", cache=cache) 

110 res_multi_txt_collapse = receptiviti.request( 

111 txt_files, collapse_lines=True, cache=cache 

112 ) 

113 res_multi_csv_collapse = receptiviti.request( 

114 csv_files, text_column="text", collapse_lines=True, cache=cache 

115 ) 

116 assert not all((a == b for a, b in zip(res_multi["text"], res_misencode["text"]))) 

117 assert res_single["summary.word_count"].sum() == res_multi["summary.word_count"].sum() 

118 assert res_multi["summary.word_count"].sum() == res_multi_txt["summary.word_count"].sum() 

119 assert res_multi["summary.word_count"].sum() == res_multi_csv["summary.word_count"].sum() 

120 assert ( 

121 res_multi["summary.word_count"].sum() 

122 == res_multi_txt_collapse["summary.word_count"].sum() 

123 ) 

124 assert ( 

125 res_multi["summary.word_count"].sum() 

126 == res_multi_csv_collapse["summary.word_count"].sum() 

127 ) 

128 

129 @pytest.mark.skipif(not os.path.isfile("../data.csv"), reason="no csv test file present") 

130 def test_from_file(self): 

131 with TemporaryDirectory() as tempdir: 

132 res_parallel = receptiviti.request( 

133 "../data.csv", 

134 text_column="texts", 

135 bundle_size=20, 

136 cores=2, 

137 cache=tempdir, 

138 in_memory=False, 

139 ) 

140 res_serial = receptiviti.request( 

141 "../data.csv", 

142 text_column="texts", 

143 bundle_size=20, 

144 cache=tempdir, 

145 ) 

146 ids = ["../data.csv" + str(i + 1) for i in range(len(res_parallel))] 

147 assert all(res_parallel["id"] == ids) 

148 assert all(res_serial["id"] == ids) 

149 assert res_parallel["summary.word_count"].sum() == res_serial["summary.word_count"].sum() 

150 

151 @pytest.mark.skipif( 

152 receptiviti.status(os.getenv("RECEPTIVITI_URL_TEST", "")) is None, 

153 reason="test API is not reachable", 

154 ) 

155 def test_endpoint_version(self): 

156 with TemporaryDirectory() as tempdir: 

157 receptiviti.request( 

158 "text to process", 

159 url=os.getenv("RECEPTIVITI_URL_TEST"), 

160 key=os.getenv("RECEPTIVITI_KEY_TEST"), 

161 secret=os.getenv("RECEPTIVITI_SECRET_TEST"), 

162 cache=tempdir, 

163 ) 

164 with redirect_stdout(StringIO()) as out: 

165 receptiviti.request( 

166 "text to process", 

167 url=os.getenv("RECEPTIVITI_URL_TEST") + "v2/taxonomies", 

168 key=os.getenv("RECEPTIVITI_KEY_TEST"), 

169 secret=os.getenv("RECEPTIVITI_SECRET_TEST"), 

170 cache=tempdir, 

171 verbose=True, 

172 ) 

173 messages = out.getvalue().split("\n") 

174 expected = ["prep"] * 3 + ["requ", "done", "clea", "addi", "prep", "done", ""] 

175 assert len(messages) == len(expected) and all( 

176 line[:4] == expected[i] for i, line in enumerate(messages) 

177 )