Coverage for tests\test_request.py: 100%
120 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-12-01 10:33 -0500
« prev ^ index » next coverage.py v7.6.1, created at 2024-12-01 10:33 -0500
1import os
2from contextlib import redirect_stdout
3from io import StringIO
4from tempfile import TemporaryDirectory
6import pandas
7import pytest
9import receptiviti
11receptiviti.readin_env()
14def test_invalid_inputs():
15 with pytest.raises(RuntimeError, match="enter text"):
16 receptiviti.request()
17 with pytest.raises(RuntimeError, match="Unauthorized"):
18 receptiviti.request("a text", key="123", secret="123")
21@pytest.mark.skipif(os.getenv("RECEPTIVITI_KEY") is None, reason="no API key")
22class TestRequest:
23 def test_unreachable(self):
24 url = "http://localhost:0/not_served"
25 assert receptiviti.status(url) is None
26 with pytest.raises(RuntimeError, match="URL is not reachable"):
27 receptiviti.request("a text", url=url)
29 def test_invalid_version(self):
30 with pytest.raises(RuntimeError, match="invalid version: 1"):
31 receptiviti.request("text to score", version="1")
33 def test_invalid_endpoint(self):
34 with pytest.raises(RuntimeError, match="invalid endpoint: v1"):
35 receptiviti.request("text to score", endpoint="framework/v1")
37 def test_single_text(self):
38 res = receptiviti.request("text to score")
39 assert res["summary.word_count"][0] == 3
41 def test_single_text_v2(self):
42 res = receptiviti.request("text to score", version="v2")
43 assert res["summary.word_count"][0] == 3
45 def test_contexts(self):
46 res = receptiviti.request("text to score", version="v2", context="spoken")
47 assert res["drives.power"][0] > 0.05
49 def test_invalid_text(self):
50 with pytest.raises(RuntimeError, match="one of your texts is over the bundle size limit"):
51 receptiviti.request(" " * int(1e7))
52 with pytest.raises(RuntimeError, match="no valid texts to process"):
53 receptiviti.request("")
55 def test_multi_text(self):
56 res = receptiviti.request(["text to score", float("nan"), "another text", "another text"])
57 assert str(res["summary.word_count"][1]) == "nan"
58 assert res["summary.word_count"].iloc[pandas.Index([0, 2, 3])].to_list() == [3, 2, 2]
60 def test_framework_selection(self):
61 res = receptiviti.request(["text to score", "another text"], frameworks="summary")
62 assert res.shape == (2, 10)
63 assert res["word_count"].to_list() == [3, 2]
65 def test_framework_prefix(self):
66 res = receptiviti.request(["text to score", "another text"], frameworks="summary", framework_prefix=True)
67 assert res["summary.word_count"].to_list() == [3, 2]
69 def test_id_text(self):
70 res = receptiviti.request(["text to score", "another text"], ids=["a", "b"], return_text=True)
71 assert res["text"].to_list() == ["text to score", "another text"]
72 assert res["id"].to_list() == ["a", "b"]
74 def test_verbose(self):
75 with redirect_stdout(StringIO()) as out:
76 receptiviti.request("text to score", frameworks=["summary", "sallee"], version="v2", verbose=True)
77 messages = out.getvalue().split("\n")
78 expected = ["prep"] * 3 + ["requ", "done", "prep", "sele", "done", ""]
79 assert len(messages) == len(expected) and all(line[:4] == expected[i] for i, line in enumerate(messages))
81 def test_id_assignment(self):
82 text = ["text to score", "another text"]
83 with TemporaryDirectory() as tempdir:
84 txt_file = f"{tempdir}/text.txt"
85 with open(txt_file, "w", encoding="utf-8") as txt:
86 txt.write("\n".join(text))
87 csv_file = f"{tempdir}/text.csv"
88 pandas.DataFrame({"text": text}).to_csv(csv_file)
89 assert receptiviti.request(txt_file)["id"].to_list() == [
90 txt_file + "1",
91 txt_file + "2",
92 ]
93 assert receptiviti.request(csv_file, text_column="text")["id"].to_list() == [
94 csv_file + "1",
95 csv_file + "2",
96 ]
98 @pytest.mark.skipif(not os.path.isfile("../data.txt"), reason="no txt test file present")
99 def test_from_directory(self):
100 with TemporaryDirectory() as tempdir:
101 cache = tempdir + "/cache"
102 res_background = receptiviti.request("../data.txt", cache=cache, collect_results=False)
103 assert res_background is None
104 res_single = receptiviti.request("../data.txt", cache=cache, request_cache=False, make_request=False)
105 nth_text = 0
106 txt_files = []
107 csv_files = []
108 with open("../data.txt", encoding="utf-8") as texts:
109 for text in texts:
110 nth_text += 1
111 txt_file = f"{tempdir}/{nth_text}.txt"
112 txt_files.append(txt_file)
113 with open(txt_file, "w", encoding="utf-16") as txt:
114 txt.write(text)
115 csv_file = f"{tempdir}/{nth_text}.csv"
116 csv_files.append(csv_file)
117 pandas.DataFrame({"text": [text]}).to_csv(csv_file, encoding="cp1252")
118 res_misencode = receptiviti.request(tempdir, encoding="utf-8", return_text=True, cache=cache)
119 res_multi = receptiviti.request(tempdir, return_text=True, cache=cache)
120 res_multi_txt = receptiviti.request(txt_files, cache=cache)
121 res_multi_csv = receptiviti.request(csv_files, text_column="text", cache=cache)
122 res_multi_txt_collapse = receptiviti.request(txt_files, collapse_lines=True, cache=cache)
123 res_multi_csv_collapse = receptiviti.request(
124 csv_files, text_column="text", collapse_lines=True, cache=cache
125 )
126 assert not all((a == b for a, b in zip(res_multi["text"], res_misencode["text"])))
127 assert res_single["summary.word_count"].sum() == res_multi["summary.word_count"].sum()
128 assert res_multi["summary.word_count"].sum() == res_multi_txt["summary.word_count"].sum()
129 assert res_multi["summary.word_count"].sum() == res_multi_csv["summary.word_count"].sum()
130 assert res_multi["summary.word_count"].sum() == res_multi_txt_collapse["summary.word_count"].sum()
131 assert res_multi["summary.word_count"].sum() == res_multi_csv_collapse["summary.word_count"].sum()
133 @pytest.mark.skipif(not os.path.isfile("../data.csv"), reason="no csv test file present")
134 def test_from_file(self):
135 with TemporaryDirectory() as tempdir:
136 res_parallel = receptiviti.request(
137 "../data.csv",
138 text_column="texts",
139 bundle_size=20,
140 cores=2,
141 cache=tempdir,
142 in_memory=False,
143 )
144 res_serial = receptiviti.request(
145 "../data.csv",
146 text_column="texts",
147 bundle_size=20,
148 cache=tempdir,
149 )
150 ids = ["../data.csv" + str(i + 1) for i in range(len(res_parallel))]
151 assert all(res_parallel["id"] == ids)
152 assert all(res_serial["id"] == ids)
153 assert res_parallel["summary.word_count"].sum() == res_serial["summary.word_count"].sum()
155 def test_endpoint_version(self):
156 with TemporaryDirectory() as tempdir:
157 receptiviti.request("text to process", cache=tempdir)
158 with redirect_stdout(StringIO()) as out:
159 receptiviti.request(
160 "text to process",
161 url=os.getenv("RECEPTIVITI_URL") + "v2/analyze",
162 cache=tempdir,
163 verbose=True,
164 )
165 messages = out.getvalue().split("\n")
166 expected = ["prep"] * 3 + ["requ", "done", "prep", "done", ""]
167 assert len(messages) == len(expected) and all(line[:4] == expected[i] for i, line in enumerate(messages))