Spaces:

mikeee
/

radiobee-aligner

Build error

radiobee-aligner / tests /test_text2lists.py

freemt

Update slow-track for more lang pairs

7fd4e54 almost 3 years ago

1.89 kB

	"""Test text2lists."""
	from pathlib import Path
	from radiobee.loadtext import loadtext
	from radiobee.text2lists import text2lists


	def test_text2lists_dual1():
	"""Test text2lists data\test-dual.txt."""
	filename = r"data\test-dual.txt"
	text = loadtext(filename) # noqa
	l1, l2 = text2lists(text)
	assert l2[0] in [""]
	assert "国际\n中\n双语"[:2] in l1[0]
	assert '2021' in l2[5]


	def test_shakespeare1000():
	"""Separate first 1000.

	from pathlib import Path
	import zipfile
	dir_loc = r""
	filename = r"莎士比亚 - 莎士比亚全集（套装共39本英汉双语）-外语教学与研究出版社 (2016).txt.zip"
	zfile = zipfile.ZipFile(Path(dir_loc) / filename)
	res_bytes = zfile.read(zfile.infolist()[0])
	encoding = cchardet.detect(res_bytes).get("encoding")

	text1000 = []
	line = 0
	numb_lines = 4000
	for elm in res_bytes.splitlines():
	if elm.decode(encoding).strip():
	text1000.append(elm.decode(encoding))
	if line >= numb_lines - 1:
	break
	line += 1
	Path(f"data/shakespeare-zh-en-{numb_lines}.txt").write_text("\n".join(text1000), encoding="utf8")

	tset = cmat2test(cmat)
	df = pd.DataFrame(tset).rename(columns=dict(zip(range(0, 3), ['x', 'y', 'cos'])))
	plot_df(df)

	"""
	# text1000a = Path("data/shakespeare-zh-en-1000.txt").read_text(encoding="utf8")
	# text2000 = Path("data/shakespeare-zh-en-1000.txt").read_text(encoding="utf8")
	text4000 = Path("data/shakespeare-zh-en-4000.txt").read_text(encoding="utf8")

	# l1000a, l10002b = text2lists(text1000)
	# l2000a, l2000b = text2lists(text2000)

	l4000, r4000 = text2lists(text4000)


	def test_test_dual2():
	"""Test data/test-dual.txt."""
	test_dual = Path("data/test-dual.txt").read_text(encoding="utf8")

	l_dual, r_dual = text2lists(test_dual)