Spaces:

imansarraf
/

Persian_ASR

Sleeping

App Files Files Community

Persian_ASR / sad_tf /export_funcs.py

imansarraf

Upload 9 files

84b1bab verified 3 months ago

raw

history blame

6.97 kB

	#!/usr/bin/env python
	# encoding: utf-8

	# The MIT License

	# Copyright (c) 2018 Ina (David Doukhan - http://www.ina.fr/)

	# Permission is hereby granted, free of charge, to any person obtaining a copy
	# of this software and associated documentation files (the "Software"), to deal
	# in the Software without restriction, including without limitation the rights
	# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	# copies of the Software, and to permit persons to whom the Software is
	# furnished to do so, subject to the following conditions:

	# The above copyright notice and this permission notice shall be included in
	# all copies or substantial portions of the Software.

	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	# THE SOFTWARE.

	import pandas as pd
	from pytextgrid.PraatTextGrid import PraatTextGrid, Interval, Tier
	import os
	import json

	def seg2csv(lseg, fout=None):
	df = pd.DataFrame.from_records(lseg, columns=['labels', 'start', 'stop'])
	df.to_csv(fout, sep='\t', index=False)

	def seg2textgrid1(lseg, fout=None):
	tier = Tier(name='inaSpeechSegmenter')
	for label, start, stop,_ in lseg:
	if (label=='noEnergy'):
	label=''
	tier.append(Interval(start, stop, label))
	ptg = PraatTextGrid(xmin=lseg[0][1], xmax=lseg[-1][2])
	ptg.append(tier)
	ptg.save(fout)


	def seg2json(lseg) :
	try:
	return(seg2json5(lseg))
	except:
	return(seg2json4(lseg))




	def seg2Info(lseg):


	x=[]
	nch=0
	for segs in lseg:
	f=0
	nch = nch+1
	data_list=[]
	if (segs!=-1):
	for y in segs:
	if (y[0]!='noEnergy'):
	f = f + y[2] - y[1]


	data = {
	'channel' : nch,
	'speech': f
	}
	x.append(data)
	return(json.dumps(x))


	def seg2Gender_Info(lseg):


	x=[]
	nch=0
	for segs in lseg:
	f=0
	m=0
	nch = nch+1
	data_list=[]
	if (segs!=-1):
	for y in segs:
	if (y[0]!='noEnergy'):
	if (y[0] == "female"):
	f = f + y[2] - y[1]
	elif(y[0] == "male"):
	m = m + y[2] - y[1]


	data = {
	'channel' : nch,
	'male': m,
	'female': f
	}
	x.append(data)
	return(json.dumps(x))

	def seg2json5(lseg):


	x=[]
	nch=0
	for segs in lseg:
	nch = nch+1
	data_list=[]
	if (segs!=-1):
	for label, start, stop ,_,_ in segs:
	if (label!='noEnergy'):
	data = {
	'startTime': start,
	'endTime': stop,
	'gender': label[0]
	}
	data_list.append(data)
	data = {
	'channel' : nch,
	'segments' : data_list
	}
	x.append(data)
	return(json.dumps(x))

	def seg2json4(lseg):

	x=[]
	nch=0
	for segs in lseg:
	nch = nch+1
	data_list=[]
	if (segs!=-1):
	for label, start, stop ,_ in segs:
	if (label!='noEnergy'):
	data = {
	'startTime': start,
	'endTime': stop,
	'gender': label[0]
	}
	data_list.append(data)
	data = {
	'channel' : nch,
	'segments' : data_list
	}
	x.append(data)
	return(json.dumps(x))




	def seg2aud(lseg , fout=None) :
	try:
	seg2aud5(lseg , fout)
	except:
	seg2aud4(lseg , fout)

	def seg2aud5(lseg , fout=None):
	if (lseg==-1):
	return
	with open(fout , 'w') as fid:
	for label, start, stop ,_,_ in lseg:
	if (label!='noEnergy'):
	fid.write('%s\t%s\t%s\n' %(start , stop , label))

	def seg2aud4(lseg , fout=None):
	if (lseg==-1):
	return
	with open(fout , 'w') as fid:
	for label, start, stop ,_ in lseg:
	if (label!='noEnergy'):
	fid.write('%s\t%s\t%s\n' %(start , stop , label))

	def seg2textgrid(data , fout=None):
	ghabli=False
	kh=[]
	if (True):
	kh.append('File type = "ooTextFile"\n')
	kh.append('Object class = "TextGrid"\n')
	kh.append('\n')
	kh.append('xmin = 0 \n')
	kh.append('xmax = %s \n' %(data[-1][2]))
	kh.append('tiers? <exists> \n')
	kh.append('size = 1 \n')
	kh.append('item []: \n')
	kh.append(' item [1]:\n')
	kh.append(' class = "IntervalTier" \n')
	kh.append(' name = "sen" \n')
	kh.append(' xmin = 0 \n')
	kh.append(' xmax = %s \n' %(data[-1][2]))
	kh.append(' intervals: size = %s \n' %(0))
	x=1

	if (float(data[0][1])>0):
	kh.append(' intervals [%s]:\n' %(x))
	kh.append(' xmin = 0\n')
	kh.append(' xmax = %s \n' %(data[0][1]))
	kh.append(' text = "" \n')
	x=x+1


	for i in range(len(data)):
	kh.append(' intervals [%s]:\n' %(x))
	if (ghabli):
	kh.append(' xmin = %s \n' %(data[i-1][2]))
	else:
	kh.append(' xmin = %s \n' %(data[i][1]))
	kh.append(' xmax = %s \n' %(data[i][2]))
	kh.append(' text = "%s" \n' %(data[i][0].strip()))
	x=x+1

	if (i+1 >= len(data)):
	break

	if (data[i][2] != data[i+1][1]):


	if (float(data[i+1][1]) - float(data[i][2]) > 0.5):
	kh.append(' intervals [%s]:\n' %(x))

	kh.append(' xmin = %s \n' %(data[i][2]))
	kh.append(' xmax = %s \n' %(data[i+1][1]))
	kh.append(' text = "" \n')
	x=x+1
	ghabli=False
	else:
	ghabli=True


	kh[13] = (' intervals: size = %s \n' %(kh[-4].strip().split(' ')[1].replace('[','').replace(']','').replace(':','')))


	with open(fout, mode='w') as fid:
	for line in kh:
	fid.write(line)