Coverage for tests\test_request.py: 100%

137 statements  

« prev     ^ index     » next       coverage.py v7.8.2, created at 2025-06-04 09:07 -0400

1import os 

2from contextlib import redirect_stdout 

3from io import StringIO 

4from tempfile import TemporaryDirectory 

5 

6import pandas 

7import pytest 

8 

9import receptiviti 

10 

11receptiviti.readin_env() 

12 

13 

14def test_invalid_inputs(): 

15 with pytest.raises(RuntimeError, match="enter text"): 

16 receptiviti.request() 

17 with pytest.raises(RuntimeError, match="Unauthorized"): 

18 receptiviti.request("a text", key="123", secret="123") 

19 

20 

21@pytest.mark.skipif(os.getenv("RECEPTIVITI_KEY") is None, reason="no API key") 

22class TestRequest: 

23 def test_unreachable(self): 

24 url = "http://localhost:0/not_served" 

25 assert receptiviti.status(url) is None 

26 with pytest.raises(RuntimeError, match="URL is not reachable"): 

27 receptiviti.request("a text", url=url) 

28 

29 def test_invalid_version(self): 

30 with pytest.raises(RuntimeError, match="invalid version: 1"): 

31 receptiviti.request("text to score", version="1") 

32 

33 def test_invalid_endpoint(self): 

34 with pytest.raises(RuntimeError, match="invalid endpoint: v1"): 

35 receptiviti.request("text to score", endpoint="framework/v1") 

36 

37 def test_single_text(self): 

38 res = receptiviti.request("text to score") 

39 assert res is not None and res["summary.word_count"][0] == 3 

40 

41 def test_single_text_v2(self): 

42 res = receptiviti.request("text to score", version="v2") 

43 assert res is not None and res["summary.word_count"][0] == 3 

44 

45 def test_contexts(self): 

46 res = receptiviti.request("text to score", version="v2", context="spoken") 

47 assert res is not None and res["drives.power"][0] > 0.05 

48 

49 def test_invalid_text(self): 

50 with pytest.raises(RuntimeError, match="one of your texts is over the bundle size limit"): 

51 receptiviti.request(" " * int(1e7)) 

52 with pytest.raises(RuntimeError, match="no valid texts to process"): 

53 receptiviti.request("") 

54 

55 def test_multi_text(self): 

56 res = receptiviti.request(["text to score", float("nan"), "another text", "another text"]) 

57 assert res is not None 

58 assert str(res["summary.word_count"][1]) == "nan" 

59 assert res["summary.word_count"].iloc[pandas.Index([0, 2, 3])].to_list() == [3, 2, 2] 

60 

61 def test_framework_selection(self): 

62 res = receptiviti.request(["text to score", "another text"], frameworks="summary") 

63 assert res is not None 

64 assert res.shape == (2, 10) 

65 assert res["word_count"].to_list() == [3, 2] 

66 

67 def test_framework_prefix(self): 

68 res = receptiviti.request(["text to score", "another text"], frameworks="summary", framework_prefix=True) 

69 assert res is not None 

70 assert res["summary.word_count"].to_list() == [3, 2] 

71 

72 def test_id_text(self): 

73 res = receptiviti.request(["text to score", "another text"], ids=["a", "b"], return_text=True) 

74 assert res is not None 

75 assert res["text"].to_list() == ["text to score", "another text"] 

76 assert res["id"].to_list() == ["a", "b"] 

77 

78 def test_verbose(self): 

79 with redirect_stdout(StringIO()) as out: 

80 receptiviti.request("text to score", frameworks=["summary", "sallee"], version="v2", verbose=True) 

81 messages = out.getvalue().split("\n") 

82 expected = ["prep"] * 3 + ["requ", "done", "prep", "sele", "done", ""] 

83 assert len(messages) == len(expected) and all(line[:4] == expected[i] for i, line in enumerate(messages)) 

84 

85 def test_id_assignment(self): 

86 text = ["text to score", "another text"] 

87 with TemporaryDirectory() as tempdir: 

88 txt_file = f"{tempdir}/text.txt" 

89 with open(txt_file, "w", encoding="utf-8") as txt: 

90 txt.write("\n".join(text)) 

91 csv_file = f"{tempdir}/text.csv" 

92 pandas.DataFrame({"text": text}).to_csv(csv_file) 

93 res = receptiviti.request(txt_file) 

94 assert res is not None 

95 assert res["id"].to_list() == [ 

96 txt_file + "1", 

97 txt_file + "2", 

98 ] 

99 res = receptiviti.request(csv_file, text_column="text") 

100 assert res is not None 

101 assert res["id"].to_list() == [ 

102 csv_file + "1", 

103 csv_file + "2", 

104 ] 

105 

106 @pytest.mark.skipif(not os.path.isfile("../data.txt"), reason="no txt test file present") 

107 def test_from_directory(self): 

108 with TemporaryDirectory() as tempdir: 

109 cache = tempdir + "/cache" 

110 res_background = receptiviti.request("../data.txt", cache=cache, collect_results=False) 

111 assert res_background is None 

112 res_single = receptiviti.request("../data.txt", cache=cache, request_cache=False, make_request=False) 

113 nth_text = 0 

114 txt_files = [] 

115 csv_files = [] 

116 with open("../data.txt", encoding="utf-8") as texts: 

117 for text in texts: 

118 nth_text += 1 

119 txt_file = f"{tempdir}/{nth_text}.txt" 

120 txt_files.append(txt_file) 

121 with open(txt_file, "w", encoding="utf-16") as txt: 

122 txt.write(text) 

123 csv_file = f"{tempdir}/{nth_text}.csv" 

124 csv_files.append(csv_file) 

125 pandas.DataFrame({"raw_text": [text]}).to_csv(csv_file, encoding="cp1252") 

126 res_misencode = receptiviti.request(tempdir, encoding="utf-8", return_text=True, cache=cache) 

127 res_multi = receptiviti.request(tempdir, return_text=True, cache=cache) 

128 res_multi_txt = receptiviti.request(txt_files, cache=cache) 

129 res_multi_csv = receptiviti.request(csv_files, text_column="raw_text", cache=cache) 

130 res_multi_txt_collapse = receptiviti.request(txt_files, collapse_lines=True, cache=cache) 

131 res_multi_csv_collapse = receptiviti.request( 

132 csv_files, text_column="raw_text", collapse_lines=True, cache=cache 

133 ) 

134 assert res_single is not None 

135 assert res_multi is not None 

136 assert res_misencode is not None 

137 assert res_multi_txt is not None 

138 assert res_multi_csv is not None 

139 assert res_multi_txt_collapse is not None 

140 assert res_multi_csv_collapse is not None 

141 assert not all((a == b for a, b in zip(res_multi["text"], res_misencode["text"]))) 

142 assert res_single["summary.word_count"].sum() == res_multi["summary.word_count"].sum() 

143 assert res_multi["summary.word_count"].sum() == res_multi_txt["summary.word_count"].sum() 

144 assert res_multi["summary.word_count"].sum() == res_multi_csv["summary.word_count"].sum() 

145 assert res_multi["summary.word_count"].sum() == res_multi_txt_collapse["summary.word_count"].sum() 

146 assert res_multi["summary.word_count"].sum() == res_multi_csv_collapse["summary.word_count"].sum() 

147 

148 @pytest.mark.skipif(not os.path.isfile("../data.csv"), reason="no csv test file present") 

149 def test_from_file(self): 

150 with TemporaryDirectory() as tempdir: 

151 res_parallel = receptiviti.request( 

152 "../data.csv", 

153 text_column="texts", 

154 bundle_size=20, 

155 cores=2, 

156 cache=tempdir, 

157 in_memory=False, 

158 ) 

159 res_serial = receptiviti.request( 

160 "../data.csv", 

161 text_column="texts", 

162 bundle_size=20, 

163 cache=tempdir, 

164 ) 

165 assert res_parallel is not None 

166 assert res_serial is not None 

167 ids = ["../data.csv" + str(i + 1) for i in range(len(res_parallel))] 

168 assert all(res_parallel["id"] == ids) 

169 assert all(res_serial["id"] == ids) 

170 assert res_parallel["summary.word_count"].sum() == res_serial["summary.word_count"].sum() 

171 

172 def test_endpoint_version(self): 

173 with TemporaryDirectory() as tempdir: 

174 receptiviti.request("text to process", cache=tempdir) 

175 with redirect_stdout(StringIO()) as out: 

176 receptiviti.request( 

177 "text to process", 

178 url=os.getenv("RECEPTIVITI_URL", "") + "v2/analyze", 

179 cache=tempdir, 

180 verbose=True, 

181 ) 

182 messages = out.getvalue().split("\n") 

183 expected = ["prep"] * 3 + ["requ", "done", "prep", "done", ""] 

184 assert len(messages) == len(expected) and all(line[:4] == expected[i] for i, line in enumerate(messages))