diff --git a/scripts/py_modules/__init__.py b/scripts/py_modules/__init__.py
new file mode 100644
index 0000000000..1bb8bf6d7f
--- /dev/null
+++ b/scripts/py_modules/__init__.py
@@ -0,0 +1 @@
+# empty
diff --git a/scripts/py_modules/speechtools.py b/scripts/py_modules/speechtools.py
new file mode 100644
index 0000000000..88e030db9b
--- /dev/null
+++ b/scripts/py_modules/speechtools.py
@@ -0,0 +1,198 @@
+from freeswitch import *
+from xml.dom import minidom
+VOICE_ENGINE = "cepstral"
+VOICE = "William"
+A few classes that make it easier to write speech applications
+using Python. It is roughly modelled after the equivalent that
+is written in JavaScript.
+Status: should work, but not yet complete. some pending items
+are mentioned in comments
+class Grammar:
+ def __init__(self, name, path, obj_path,
+ min_score=1, confirm_score=400, halt=False):
+ """
+ @param name - name of grammar to reference it later
+ @param path - path to xml grammar file
+ @param obj_path - xml path to find interpretation from root
+ in result xml, eg, 'interpretation'
+ @param min_score - score threshold to accept result
+ @param confirm_score - if score below this threshold, ask user
+ if they are sure this is correct
+ @param halt - not sure what was used for in js, currently unused
+ """
+ self.name=name
+ self.path=path
+ self.obj_path=obj_path
+ self.min_score=min_score
+ self.confirm_score=confirm_score
+ self.halt=halt
+class SpeechDetect:
+ def __init__(self, session, module_name, ip_addr):
+ self.session=session
+ self.module_name=module_name
+ self.ip_addr=ip_addr
+ self.grammars = {}
+ def addGrammar(self, grammar):
+ self.grammars[grammar.name]=grammar
+ def setGrammar(self, name):
+ self.grammar = self.grammars[name]
+ def detectSpeech(self):
+ # TODO: we might not always want to call detect_speech
+ # with this cmd, see js version for other options
+ # also see detect_speech_function() in mod_dptools.c
+ cmd = "%s %s %s %s" % (self.module_name,
+ self.grammar.name,
+ self.grammar.path,
+ self.ip_addr)
+ console_log("debug", "calling detect_speech with: %s\n" % cmd)
+ self.session.execute("detect_speech", cmd)
+ console_log("debug", "finished calling detect_speech\n")
+class SpeechObtainer:
+ def __init__(self, speech_detect, required_phrases, wait_time, max_tries):
+ """
+ @param speech_detect - the speech detect object, which holds a
+ reference to underlying session and can
+ be re-used by many SpeechObtainers
+ @param required_phrases - the number of required phrases from the
+ grammar. for example if its prompting for
+ the toppings on a sandwhich and min toppings
+ is 3, use 3. normally will be 1.
+ @param wait_time - the time, in millisconds, to wait for
+ input during each loop iteration
+ @param max_tries - this number multiplied by wait time gives the
+ 'total wait time' before we give up and return
+ partial or no result
+ """
+ self.speech_detect=speech_detect
+ self.required_phrases=required_phrases
+ self.wait_time=wait_time
+ self.max_tries=max_tries
+ self.detected_phrases = []
+ def setGrammar(self, grammar):
+ """
+ @param grammar - instance of grammar class
+ """
+ self.grammar=grammar
+ self.speech_detect.addGrammar(grammar)
+ self.speech_detect.setGrammar(self.grammar.name)
+ def detectSpeech(self):
+ self.speech_detect.detectSpeech()
+ def run(self):
+ """
+ start speech detection with the current grammar,
+ and listen for results from asr engine. once a result
+ has been returned, return it to caller
+ """
+ def dtmf_handler(input, itype, funcargs):
+ console_log("INFO","\n\nDTMF itype: %s\n" % itype)
+ if itype == 1: # TODO!! use names for comparison instead of number
+ return self.handle_event(input, funcargs)
+ elif itype== 0:
+ console_log("INFO","\n\nDTMF input: %s\n" % input)
+ else:
+ console_log("INFO","\n\nUnknown input type: %s\n" % itype)
+ return None
+ num_tries = 0
+ session = self.speech_detect.session
+ console_log("debug", "setting dtmf callback\n")
+ session.setDTMFCallback(dtmf_handler, "")
+ console_log("debug", "calling getDigits\n")
+ console_log("debug", "starting run() while loop\n")
+ while (session.ready() and
+ num_tries < self.max_tries and
+ len(self.detected_phrases) < self.required_phrases):
+ console_log("debug", "top of run() while loop\n")
+ session.collectDigits(self.wait_time)
+ num_tries += 1
+ console_log("debug", "while loop finished\n")
+ return self.detected_phrases
+ def handle_event(self, event, funcargs):
+ """
+ when the dtmf handler receives an event, it calls back
+ this method. event is a dictionary with subdictionaries ..
+ Example 1
+ =========
+ {'body': None, 'headers': {'Speech-Type': 'begin-speaking'}}
+ Example 2
+ =========
+ {'body': '
+ waffles',
+ 'headers': {'Speech-Type': 'detected-speech'}}
+ This dictionary is constructed in run_dtmf_callback() in
+ freeswitch_python.cpp
+ """
+ # what kind of event?
+ headers = event['headers']
+ speech_type = headers['Speech-Type']
+ if speech_type == "begin-speaking":
+ # not sure what to do with this, try returning "stop"
+ # so that it might stop playing a sound file once
+ # speech has been detected
+ return "stop"
+ elif speech_type == "detected-speech":
+ # extract the detected phrase. from result
+ # BUG: this assumes only ONE interpretation in the xml
+ # result. rest will get igored
+ # NOTE: have to wrap everything with str() (at least
+ # calls to console_log because otherwise it chokes on
+ # unicode strings.
+ # TODO: check the score
+ body = event['body']
+ dom = minidom.parseString(body)
+ phrase = dom.getElementsByTagName(self.grammar.obj_path)[0]
+ phrase_text = self.getText(phrase)
+ if phrase_text:
+ self.detected_phrases.append(str(phrase_text))
+ # do we want to return stop? what should we return?
+ return "stop"
+ else:
+ raise Exception("Unknown speech event: %s" % speech_type)
+ def getText(self, elt):
+ """ given an element, get its text. if there is more than
+ one text node child, just append all the text together.
+ """
+ result = ""
+ children = elt.childNodes
+ for child in children:
+ if child.nodeType == child.TEXT_NODE:
+ result += str(child.nodeValue)
+ return result
diff --git a/scripts/recipewizard.py b/scripts/recipewizard.py
new file mode 100644
index 0000000000..797a381f3f
--- /dev/null
+++ b/scripts/recipewizard.py
@@ -0,0 +1,87 @@
+from freeswitch import *
+from py_modules.speechtools import Grammar, SpeechDetect
+from py_modules.speechtools import SpeechObtainer
+import time, os
+VOICE_ENGINE = "cepstral"
+VOICE = "William"
+GRAMMAR_ROOT = "/usr/src/freeswitch_trunk/scripts"
+Example speech recognition application in python.
+How to make this work:
+* Get mod_openmrcp working along with an MRCP asr server
+* Add /usr/src/freeswitch/scripts or equivalent to your PYTHONPATH
+* Restart freeswitch
+* Create $GRAMMAR_ROOT/mainmenu.xml from contents in mainmenu() comments
+class RecipeWizard:
+ def __init__(self, session):
+ self.session=session
+ self.session.set_tts_parms(VOICE_ENGINE, VOICE)
+ self.main()
+ def main(self):
+ console_log("debug", "recipe wizard main()\n")
+ self.speechdetect = SpeechDetect(self.session, "openmrcp", "");
+ self.speechobtainer = SpeechObtainer(speech_detect=self.speechdetect,
+ required_phrases=1,
+ wait_time=5000,
+ max_tries=3)
+ gfile = os.path.join(GRAMMAR_ROOT, "mainmenu.xml")
+ self.grammar = Grammar("mainmenu", gfile,"input",80,90)
+ self.speechobtainer.setGrammar(self.grammar);
+ console_log("debug", "calling speechobtainer.run()\n")
+ self.speechobtainer.detectSpeech()
+ self.session.speak("Hello. Welcome to the recipe wizard. Drinks or food?")
+ result = self.speechobtainer.run()
+ console_log("debug", "speechobtainer.run() result: %s\n" % result)
+ if result:
+ self.session.speak("Received result. Result is: %s" % result[0])
+ else:
+ self.session.speak("Sorry, I did not hear you")
+ console_log("debug", "speechobtainer.run() finished\n")
+def mainmenu():
+ """
+ drinks
+ food
+ """
+ pass
+def handler(uuid):
+ session = PySession(uuid)
+ session.answer()
+ rw = RecipeWizard(session)
+ session.hangup("1")