From e3c2fac4f49859268d2f337ecaa64c41e3a6bd1d Mon Sep 17 00:00:00 2001 From: navanchauhan Date: Wed, 8 Feb 2023 17:40:27 -0700 Subject: new post --- ...02-08-Interact-with-siri-from-the-terminal.html | 300 +++++++++++++++++++++ docs/posts/index.html | 21 ++ 2 files changed, 321 insertions(+) create mode 100644 docs/posts/2023-02-08-Interact-with-siri-from-the-terminal.html (limited to 'docs/posts') diff --git a/docs/posts/2023-02-08-Interact-with-siri-from-the-terminal.html b/docs/posts/2023-02-08-Interact-with-siri-from-the-terminal.html new file mode 100644 index 0000000..2db536b --- /dev/null +++ b/docs/posts/2023-02-08-Interact-with-siri-from-the-terminal.html @@ -0,0 +1,300 @@ + + + + + + + + + Interacting with Siri using the command line + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +

Interacting with Siri using the command line

+ +

My main objective was to see if I could issue multi-intent commands in one go. Obviously, Siri cannot do that (neither can Alexa, Cortana, or Google Assistant). The script here can issue either a single command, or use the help of OpenAI's DaVinci model to extract multiple commands and pass them onto siri.

+ +

Prerequisites

+ + + +

Show me ze code

+ +

If you are here just for the code:

+ +
+
import argparse
+import applescript
+import openai
+
+from os import getenv
+
+openai.api_key = getenv("OPENAI_KEY")
+engine = "text-davinci-003"
+
+def execute_with_llm(command_text: str) -> None:
+    llm_prompt = f"""You are provided with multiple commands as a single command. Break down all the commands and return them in a list of strings. If you are provided with a single command, return a list with a single string, trying your best to understand the command.
+
+    Example:
+    Q: "Turn on the lights and turn off the lights"
+    A: ["Turn on the lights", "Turn off the lights"]
+
+    Q: "Switch off the lights and then play some music"
+    A: ["Switch off the lights", "Play some music"]
+
+    Q: "I am feeling sad today, play some music"
+    A: ["Play some cheerful music"]
+
+    Q: "{command_text}"
+    A: 
+    """
+
+    completion = openai.Completion.create(engine=engine, prompt=llm_prompt, max_tokens=len(command_text.split(" "))*2)
+
+    for task in eval(completion.choices[0].text):
+        execute_command(task)
+
+
+def execute_command(command_text: str) -> None:
+    """Execute a Siri command."""
+
+    script = applescript.AppleScript(f"""
+        tell application "System Events" to tell the front menu bar of process "SystemUIServer"
+            tell (first menu bar item whose description is "Siri")
+                perform action "AXPress"
+            end tell
+        end tell
+
+        delay 2
+
+        tell application "System Events"
+            set textToType to "{command_text}"
+            keystroke textToType
+            key code 36
+        end tell
+    """)
+
+    script.run()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("command", nargs="?", type=str, help="The command to pass to Siri", default="What time is it?")
+    parser.add_argument('--openai', action=argparse.BooleanOptionalAction, help="Use OpenAI to detect multiple intents", default=False)
+    args = parser.parse_args()
+
+    if args.openai:
+        execute_with_llm(args.command)
+    else:
+        execute_command(args.command)
+
+
+ +

Usage:

+ +
+
python3 main.py "play some taylor swift"
+python3 main.py "turn off the lights and play some music" --openai
+
+
+ +

ELI5

+ +

I am not actually going to explain it as if I am explaining to a five-year old kid.

+ +

AppleScript

+ +

In the age of Siri Shortcuts, AppleScript can still do more. It is a scripting language created by Apple that can help you automate pretty much anything you see on your screen.

+ +

We use the following AppleScript to trigger Siri and then type in our command:

+ +
+
tell application "System Events" to tell the front menu bar of process "SystemUIServer"
+    tell (first menu bar item whose description is "Siri")
+        perform action "AXPress"
+    end tell
+end tell
+
+delay 2
+
+tell application "System Events"
+    set textToType to "Play some rock music"
+    keystroke textToType
+    key code 36
+end tell
+
+
+ +

This first triggers Siri, waits for a couple of seconds, and then types in our command. In the script, this functionality is handled by the execute_command function.

+ +
+
import applescript
+
+def execute_command(command_text: str) -> None:
+    """Execute a Siri command."""
+
+    script = applescript.AppleScript(f"""
+        tell application "System Events" to tell the front menu bar of process "SystemUIServer"
+            tell (first menu bar item whose description is "Siri")
+                perform action "AXPress"
+            end tell
+        end tell
+
+        delay 2
+
+        tell application "System Events"
+            set textToType to "{command_text}"
+            keystroke textToType
+            key code 36
+        end tell
+    """)
+
+    script.run()
+
+
+ +

Multi-Intent Commands

+ +

We can call OpenAI's API to autocomplete our prompt and extract multiple commands. We don't need to use OpenAI's API, and can also simply use Google's Flan-T5 model using HuggingFace's transformers library.

+ +

Ze Prompt

+ +
+
You are provided with multiple commands as a single command. Break down all the commands and return them in a list of strings. If you are provided with a single command, return a list with a single string, trying your best to understand the command.
+
+    Example:
+    Q: "Turn on the lights and turn off the lights"
+    A: ["Turn on the lights", "Turn off the lights"]
+
+    Q: "Switch off the lights and then play some music"
+    A: ["Switch off the lights", "Play some music"]
+
+    Q: "I am feeling sad today, play some music"
+    A: ["Play some cheerful music"]
+
+    Q: "{command_text}"
+    A:
+
+
+ +

This prompt gives the model a few examples to increase the generation accuracy, along with instructing it to return a Python list.

+ +

Ze Code

+ +
+
import openai
+
+from os import getenv
+
+openai.api_key = getenv("OPENAI_KEY")
+engine = "text-davinci-003"
+
+def execute_with_llm(command_text: str) -> None:
+    llm_prompt = f"""You are provided with multiple commands as a single command. Break down all the commands and return them in a list of strings. If you are provided with a single command, return a list with a single string, trying your best to understand the command.
+
+    Example:
+    Q: "Turn on the lights and turn off the lights"
+    A: ["Turn on the lights", "Turn off the lights"]
+
+    Q: "Switch off the lights and then play some music"
+    A: ["Switch off the lights", "Play some music"]
+
+    Q: "I am feeling sad today, play some music"
+    A: ["Play some cheerful music"]
+
+    Q: "{command_text}"
+    A: 
+    """
+
+    completion = openai.Completion.create(engine=engine, prompt=llm_prompt, max_tokens=len(command_text.split(" "))*2)
+
+    for task in eval(completion.choices[0].text): # NEVER EVAL IN PROD RIGHT LIKE THIS
+        execute_command(task)
+
+
+ +

Gluing together code

+ +

To finish it all off, we can use argparse to only send the input command to OpenAI when asked to do so.

+ +
+
import argparse
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("command", nargs="?", type=str, help="The command to pass to Siri", default="What time is it?")
+    parser.add_argument('--openai', action=argparse.BooleanOptionalAction, help="Use OpenAI to detect multiple intents", default=False)
+    args = parser.parse_args()
+
+    if args.openai:
+        execute_with_llm(args.command)
+    else:
+        execute_command(args.command)
+
+
+ +

Conclusion

+ +

Siri is still dumb. When I ask it to Switch off the lights, it default to the home thousands of miles away. But, this code snippet definitely does work!

+ +
If you have scrolled this far, consider subscribing to my mailing list here. You can subscribe to either a specific type of post you are interested in, or subscribe to everything with the "Everything" list.
+ +
+ +
+
+ + + + + + \ No newline at end of file diff --git a/docs/posts/index.html b/docs/posts/index.html index ace6ce7..5fed354 100644 --- a/docs/posts/index.html +++ b/docs/posts/index.html @@ -62,6 +62,27 @@