Coverage for tests\test_request.py: 100%
103 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-20 11:35 -0500
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-20 11:35 -0500
1import os
2from contextlib import redirect_stdout
3from io import StringIO
4from tempfile import TemporaryDirectory
6import pandas
7import pytest
9import receptiviti
11receptiviti.readin_env()
14@pytest.mark.skipif(os.getenv("RECEPTIVITI_KEY") is None, reason="no API key")
15class TestRequest:
16 def test_unreachable(self):
17 url = "http://localhost:0/not_served"
18 assert receptiviti.status(url) is None
19 with pytest.raises(RuntimeError, match="URL is not reachable"):
20 receptiviti.request("a text", url=url)
22 def test_single_text(self):
23 res = receptiviti.request("text to score")
24 assert res["summary.word_count"][0] == 3
26 def test_invalid_text(self):
27 with pytest.raises(RuntimeError, match="one of your texts is over the bundle size limit"):
28 receptiviti.request(" " * int(1e7))
29 with pytest.raises(RuntimeError, match="no valid texts to process"):
30 receptiviti.request("")
32 def test_multi_text(self):
33 res = receptiviti.request(["text to score", float("nan"), "another text", "another text"])
34 assert str(res["summary.word_count"][1]) == "nan"
35 assert res["summary.word_count"].iloc[pandas.Index([0, 2, 3])].to_list() == [3, 2, 2]
37 def test_framework_selection(self):
38 res = receptiviti.request(["text to score", "another text"], frameworks="summary")
39 assert res.shape == (2, 10)
40 assert res["word_count"].to_list() == [3, 2]
42 def test_framework_prefix(self):
43 res = receptiviti.request(
44 ["text to score", "another text"], frameworks="summary", framework_prefix=True
45 )
46 assert res["summary.word_count"].to_list() == [3, 2]
48 def test_id_text(self):
49 res = receptiviti.request(
50 ["text to score", "another text"], ids=["a", "b"], return_text=True
51 )
52 assert res["text"].to_list() == ["text to score", "another text"]
53 assert res["id"].to_list() == ["a", "b"]
55 def test_verbose(self):
56 with redirect_stdout(StringIO()) as out:
57 receptiviti.request("text to score", frameworks=["summary", "sallee"], verbose=True)
58 messages = out.getvalue().split("\n")
59 expected = ["prep"] * 3 + ["requ", "done", "prep", "sele", "done", ""]
60 assert len(messages) == len(expected) and all(
61 line[:4] == expected[i] for i, line in enumerate(messages)
62 )
64 def test_cache_initialization(self):
65 with TemporaryDirectory() as tempdir:
66 receptiviti.request("a text to score", cache=tempdir, clear_cache=True)
67 assert os.path.isdir(tempdir + "/bin=h")
69 def test_id_assignment(self):
70 text = ["text to score", "another text"]
71 with TemporaryDirectory() as tempdir:
72 txt_file = f"{tempdir}/text.txt"
73 with open(txt_file, "w", encoding="utf-8") as txt:
74 txt.write("\n".join(text))
75 csv_file = f"{tempdir}/text.csv"
76 pandas.DataFrame({"text": text}).to_csv(csv_file)
77 assert receptiviti.request(txt_file)["id"].to_list() == [
78 txt_file + "1",
79 txt_file + "2",
80 ]
81 assert receptiviti.request(csv_file, text_column="text")["id"].to_list() == [
82 csv_file + "1",
83 csv_file + "2",
84 ]
86 @pytest.mark.skipif(not os.path.isfile("../data.txt"), reason="no txt test file present")
87 def test_from_directory(self):
88 with TemporaryDirectory() as tempdir:
89 cache = tempdir + "/cache"
90 res_single = receptiviti.request("../data.txt", cache=cache)
91 nth_text = 0
92 txt_files = []
93 csv_files = []
94 with open("../data.txt", encoding="utf-8") as texts:
95 for text in texts:
96 nth_text += 1
97 txt_file = f"{tempdir}/{nth_text}.txt"
98 txt_files.append(txt_file)
99 with open(txt_file, "w", encoding="utf-16") as txt:
100 txt.write(text)
101 csv_file = f"{tempdir}/{nth_text}.csv"
102 csv_files.append(csv_file)
103 pandas.DataFrame({"text": [text]}).to_csv(csv_file, encoding="utf-16")
104 res_misencode = receptiviti.request(
105 tempdir, encoding="utf-8", return_text=True, cache=cache
106 )
107 res_multi = receptiviti.request(tempdir, return_text=True, cache=cache)
108 res_multi_txt = receptiviti.request(txt_files, cache=cache)
109 res_multi_csv = receptiviti.request(csv_files, text_column="text", cache=cache)
110 res_multi_txt_collapse = receptiviti.request(
111 txt_files, collapse_lines=True, cache=cache
112 )
113 res_multi_csv_collapse = receptiviti.request(
114 csv_files, text_column="text", collapse_lines=True, cache=cache
115 )
116 assert not all((a == b for a, b in zip(res_multi["text"], res_misencode["text"])))
117 assert res_single["summary.word_count"].sum() == res_multi["summary.word_count"].sum()
118 assert res_multi["summary.word_count"].sum() == res_multi_txt["summary.word_count"].sum()
119 assert res_multi["summary.word_count"].sum() == res_multi_csv["summary.word_count"].sum()
120 assert (
121 res_multi["summary.word_count"].sum()
122 == res_multi_txt_collapse["summary.word_count"].sum()
123 )
124 assert (
125 res_multi["summary.word_count"].sum()
126 == res_multi_csv_collapse["summary.word_count"].sum()
127 )
129 @pytest.mark.skipif(not os.path.isfile("../data.csv"), reason="no csv test file present")
130 def test_from_file(self):
131 with TemporaryDirectory() as tempdir:
132 res_parallel = receptiviti.request(
133 "../data.csv",
134 text_column="texts",
135 id_column="id",
136 bundle_size=20,
137 cache=tempdir,
138 in_memory=False,
139 )
140 res_serial = receptiviti.request(
141 "../data.csv",
142 text_column="texts",
143 id_column="id",
144 bundle_size=20,
145 cores=1,
146 cache=tempdir,
147 )
148 assert res_parallel["summary.word_count"].sum() == res_serial["summary.word_count"].sum()
150 @pytest.mark.skipif(
151 receptiviti.status(os.getenv("RECEPTIVITI_URL_TEST", "")) is None,
152 reason="test API is not reachable",
153 )
154 def test_endpoint_version(self):
155 with TemporaryDirectory() as tempdir:
156 receptiviti.request(
157 "text to process",
158 url=os.getenv("RECEPTIVITI_URL_TEST"),
159 key=os.getenv("RECEPTIVITI_KEY_TEST"),
160 secret=os.getenv("RECEPTIVITI_SECRET_TEST"),
161 cache=tempdir,
162 )
163 with redirect_stdout(StringIO()) as out:
164 receptiviti.request(
165 "text to process",
166 url=os.getenv("RECEPTIVITI_URL_TEST") + "v2/taxonomies",
167 key=os.getenv("RECEPTIVITI_KEY_TEST"),
168 secret=os.getenv("RECEPTIVITI_SECRET_TEST"),
169 cache=tempdir,
170 verbose=True,
171 )
172 messages = out.getvalue().split("\n")
173 expected = ["prep"] * 3 + ["requ", "done", "clea", "addi", "prep", "done", ""]
174 assert len(messages) == len(expected) and all(
175 line[:4] == expected[i] for i, line in enumerate(messages)
176 )