Procházet zdrojové kódy

First commit, examples in Python included

jjorgeDSIC před 4 roky
rodič
revize
a188d1bd44

binární
MLLPStreamingClient_mllp-1.0.0-py3-none-any.whl


+ 70 - 1
README.md

@@ -1,2 +1,71 @@
-# MLLP_Streaming_API
+# Streaming API example 1.0
+
+Streaming API example, tested in Ubuntu 16.04 and Python3.6
+
+## Setup and installation
+
+System dependencies:
+
+```bash
+sudo apt install libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg libav-tools
+```
+
+Installation
+
+```bash
+#Prepare virtual environment
+virtualenv env --python=/path/to/python3.6 env
+
+#Activate environment
+source env/bin/activate
+
+#Install requirements
+pip install -r requirements.txt
+
+#Download & Install Streaming client library
+wget https://ttp.mllp.upv.es/mllp-streaming-api/MLLPStreamingClient_mllp-1.0.0-py3-none-any.whl
+pip install MLLPStreamingClient_mllp-1.0.0-py3-none-any.whl
+```
+
+## Examples
+
+Replace this lines in the examples with your credentials and server information:
+
+```python
+server_hostname = "<SERVER_ADDRESS>"
+server_port = "<PORT>"
+api_user = "<YOU_API_USER>"
+api_secret = "<YOUR_API_KEY>"
+server_ssl_cert_file = "<CRT_FILE>"
+```
+
+To get the server SSL cert, use this commands:
+
+```bash
+apt-get install openssl
+echo -n | openssl s_client -connect ttp.mllp.upv.es:443 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' > ttp.mllp.upv.es.crt
+```
+
+To run the examples:
+
+```bash
+#Activate the environment
+sourve env/bin/activate
+
+#Sending WAV file
+python3 python_examples/client.short.wav.py wav_example/AAFA0016.wav
+
+#Sending audio from mic
+python3 python_examples/client.mic.py
+```
+
+## Detailed documentation (login required):
+
+
+* [Streaming service API](https://ttp.mllp.upv.es/index.php?page=api)
+* [Python API client](https://ttp.mllp.upv.es/mllp-streaming-api/MLLPStreamingClient.html)
+
+## Contact and support
+
+Email: mllp-support@upv.es
 

+ 69 - 0
python_examples/client.mic.py

@@ -0,0 +1,69 @@
+import json
+import pyaudio
+import sys
+from MLLPStreamingClient import MLLPStreamingClient
+
+server_hostname = "<SERVER_ADDRESS>"
+server_port = "<PORT>"
+api_user = "<YOU_API_USER>"
+api_secret = "<YOUR_API_KEY>"
+server_ssl_cert_file = "<CRT_FILE>"
+
+#Client object creation
+cli = MLLPStreamingClient(server_hostname, server_port, api_user,api_secret, server_ssl_cert_file)
+#Get Token for the session
+cli.getAuthToken()
+#Get available systems, a dictionary with the information related to the available systems and languages
+systems = cli.getTranscribeSystemsInfo()
+
+#Audio streaming iterator to send the audio file
+def myStreamIterator():
+    #Audio features
+    CHUNK = 1024
+    #1 channel, 16Khz, int16 audio
+    FORMAT = pyaudio.paInt16
+    CHANNELS = 1
+    RATE = 16000
+    #Limits the recording to 120 seconds
+    RECORD_SECONDS = 120
+    p = pyaudio.PyAudio()
+    stream = p.open(format=FORMAT,
+                     channels=CHANNELS,
+                     rate=RATE,
+                     input=True,
+                     frames_per_buffer=CHUNK)
+
+    print("Sending data..")
+    #Sends data in chunks of 1024 bytes
+    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
+        data = stream.read(CHUNK)
+        yield data
+    #Stops and close the stream
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+    
+
+#Select Spanish system for testing
+for system in systems:
+    if system['info']['langs'][0]['code'] == "es":
+        es_system = system    
+
+#Selects the system with the system id for the Spanish system, updated in Feb20
+for resp in cli.transcribe(es_system, myStreamIterator):
+    # Hyp_var contains part of the hypothesis that is not consolidated yet (hypothesis could change)
+    if resp["hyp_var"] != "":
+        print("VAR")
+        sys.stdout.write("{} ".format(resp["hyp_var"].strip().replace("[SILENCE]","")))
+        if resp["eos"]:
+            sys.stdout.write("\n")
+        sys.stdout.flush()
+    # Hyp_novar contains part of the hypothesis that is consolidated (hypothesis will not change)
+    if resp["hyp_novar"] != "":
+        print("\nNOVAR")
+        sys.stdout.write("{} ".format(resp["hyp_novar"].strip()))
+        if resp["eos"]:
+            sys.stdout.write("\n")
+        sys.stdout.flush()
+
+

+ 52 - 0
python_examples/client.short.wav.py

@@ -0,0 +1,52 @@
+import json
+import sys
+from MLLPStreamingClient import MLLPStreamingClient
+
+def main(wav):
+    server_hostname = "<SERVER_ADDRESS>"
+    server_port = "<PORT>"   
+    api_user = "<YOU_API_USER>"
+    api_secret = "<YOUR_API_KEY>"
+    server_ssl_cert_file = "<CRT_FILE>"
+
+    #Client object creation
+    cli = MLLPStreamingClient(server_hostname, server_port, api_user,api_secret, server_ssl_cert_file)
+    #Get Token for the session
+    cli.getAuthToken()
+    #Get available systems, a dictionary with the information related to the available systems and languages
+    systems = cli.getTranscribeSystemsInfo()
+
+    #Audio streaming iterator, sends audio in chunks of 250 bytes
+    def myStreamIterator():
+        with open(wav,"rb") as fd:
+            data = fd.read(250)
+            while data != b"":
+                yield data
+                data = fd.read(250)
+
+    es_system = {}
+
+    #Select Spanish system for testing
+    for system in systems:
+        if system['info']['langs'][0]['code'] == "es":
+            es_system = system
+
+    if es_system == {}:
+        raise Exception("Spanish system not found")
+
+    
+    for resp in cli.transcribe(es_system['id'], myStreamIterator):
+        # Hyp_novar contains part of the hypothesis that is consolidated (hypothesis will not change)
+        if resp["hyp_novar"] != "":
+            sys.stdout.write("{} ".format(resp["hyp_novar"].strip()))
+            if resp["eos"]:
+                sys.stdout.write("\n")
+            sys.stdout.flush()
+
+if __name__ == "__main__":
+    
+    if len(sys.argv) != 2:
+        print("client.short.wav.py <WAV>")
+    else:
+        main(sys.argv[1])
+        

+ 36 - 0
requirements.txt

@@ -0,0 +1,36 @@
+backcall==0.2.0
+cachetools==4.1.1
+certifi==2020.6.20
+chardet==3.0.4
+dataclasses==0.7
+decorator==4.4.2
+grpcio==1.33.1
+grpcio-tools==1.33.1
+idna==2.10
+jedi==0.17.2
+libcst==0.3.13
+mypy-extensions==0.4.3
+parso==0.7.1
+pexpect==4.8.0
+pickleshare==0.7.5
+pkg-resources==0.0.0
+prompt-toolkit==3.0.8
+proto-plus==1.11.0
+protobuf==3.13.0
+ptyprocess==0.6.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+PyAudio==0.2.11
+pydub==0.24.1
+Pygments==2.7.2
+pytz==2020.1
+PyYAML==5.3.1
+requests==2.24.0
+rsa==4.6
+six==1.15.0
+termcolor==1.1.0
+traitlets==4.3.3
+typing-extensions==3.7.4.3
+typing-inspect==0.6.0
+urllib3==1.25.11
+wcwidth==0.2.5

binární
wav_example/AAFA0016.wav