Flatten scope type list (#2628)

Brings our dfa compilation time from around 700ms down to about 320ms Fixes #2614 I had forgot, but we actually do parse `<user.any_alphanumeric_key>` se we can show the users spoken form in the tutorial. I've now utilize this further and actually add the spoken forms for the glyph scope type in the flattened list. Note that this implementation is somewhat hacky on purpose. Basically I didn't want to touch our csv parser without first talking to pokey since a lot of these list and spoken forms are used in places like the cheat sheet and the tutorial. What I'm instead doing is keeping all the existing lists and then creating a new list that is a flattened version of them. That way the sheet sheet and other places can still use the individual list and we are only using this flattened larger list for the actual scope type capture. This is probably something we want to revisit later, but for now we're getting a huge boast in dfa compilation time with no changes to the speakable grammar and that I think is a clear win. ## Checklist - [/] I have added [tests](https://www.cursorless.org/docs/contributing/test-case-recorder/) - [/] I have updated the [docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and [cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet) - [x] I have not broken the cheatsheet - [x] Run Talon grammar tests --------- Co-authored-by: Phil Cohen <[email protected]>
cursorless-dev · Aug 4, 2024 · 19849e8 · 19849e8
1 parent 0e01381
commit 19849e8
Show file tree

Hide file tree

Showing 11 changed files with 149 additions and 236 deletions.
diff --git a/cursorless-talon/src/csv_overrides.py b/cursorless-talon/src/csv_overrides.py
@@ -49,6 +49,14 @@ class SpokenFormEntry:
     spoken_forms: list[str]
 
 
+def csv_get_ctx():
+    return ctx
+
+
+def csv_get_normalized_ctx():
+    return normalized_ctx
+
+
 def init_csv_and_watch_changes(
     filename: str,
     default_values: ListToSpokenForms,

diff --git a/cursorless-talon/src/get_grapheme_spoken_form_entries.py b/cursorless-talon/src/get_grapheme_spoken_form_entries.py
@@ -11,7 +11,9 @@
 grapheme_capture_name = "user.any_alphanumeric_key"
 
 
-def get_grapheme_spoken_form_entries() -> list[SpokenFormOutputEntry]:
+def get_grapheme_spoken_form_entries(
+    grapheme_talon_list: dict[str, str],
+) -> list[SpokenFormOutputEntry]:
     if grapheme_capture_name not in registry.captures:
         # We require this capture, and expect it to be defined. We want to show a user friendly error if it isn't present (usually indicating a problem with their community.git setup) and we think the user is going to use Cursorless.
         # However, sometimes users use different dictation engines (Vosk, Webspeech) with entirely different/smaller grammars that don't have the capture, and this code will run then, and falsely error. We don't want to show an error in that case because they don't plan to actually use Cursorless.
@@ -28,11 +30,20 @@ def get_grapheme_spoken_form_entries() -> list[SpokenFormOutputEntry]:
             "id": id,
             "spokenForms": spoken_forms,
         }
-        for symbol_list in generate_lists_from_capture(grapheme_capture_name)
-        for id, spoken_forms in get_id_to_spoken_form_map(symbol_list).items()
+        for id, spoken_forms in talon_list_to_spoken_form_map(
+            grapheme_talon_list
+        ).items()
     ]
 
 
+def get_graphemes_talon_list() -> dict[str, str]:
+    return {
+        spoken_form: id
+        for symbol_list in generate_lists_from_capture(grapheme_capture_name)
+        for spoken_form, id in get_id_to_talon_list(symbol_list).items()
+    }
+
+
 def generate_lists_from_capture(capture_name) -> Iterator[str]:
     """
     Given the name of a capture, yield the names of each list that the capture
@@ -68,20 +79,27 @@ def generate_lists_from_capture(capture_name) -> Iterator[str]:
             )
 
 
-def get_id_to_spoken_form_map(list_name: str) -> Mapping[str, list[str]]:
+def get_id_to_talon_list(list_name: str) -> dict[str, str]:
     """
-    Given the name of a Talon list, return a mapping from the values in that
-    list to the list of spoken forms that map to the given value.
+    Given the name of a Talon list, return that list
     """
     try:
         # NB: [-1] because the last list is the active one
-        raw_list = typing.cast(dict[str, str], registry.lists[list_name][-1]).copy()
+        return typing.cast(dict[str, str], registry.lists[list_name][-1]).copy()
     except Error:
         app.notify(f"Error getting list {list_name}")
         return {}
 
+
+def talon_list_to_spoken_form_map(
+    talon_list: dict[str, str],
+) -> Mapping[str, list[str]]:
+    """
+    Given a Talon list, return a mapping from the values in that
+    list to the list of spoken forms that map to the given value.
+    """
     inverted_list: defaultdict[str, list[str]] = defaultdict(list)
-    for key, value in raw_list.items():
+    for key, value in talon_list.items():
         inverted_list[value].append(key)
 
     return inverted_list
diff --git a/cursorless-talon/src/modifiers/glyph_scope.py b/cursorless-talon/src/modifiers/glyph_scope.py
diff --git a/cursorless-talon/src/modifiers/modifiers.py b/cursorless-talon/src/modifiers/modifiers.py
@@ -27,7 +27,6 @@ def cursorless_simple_modifier(m) -> dict[str, str]:
     "<user.cursorless_simple_scope_modifier>",  # funk, state, class, every funk
     "<user.cursorless_ordinal_scope>",  # first past second word
     "<user.cursorless_relative_scope>",  # next funk, 3 funks
-    "<user.cursorless_surrounding_pair_force_direction>",  # DEPRECATED "left quad" / "right quad"
 ]
 
 modifiers = [

diff --git a/cursorless-talon/src/modifiers/scopes.py b/cursorless-talon/src/modifiers/scopes.py
@@ -4,6 +4,25 @@
 
 mod.list("cursorless_scope_type", desc="Supported scope types")
 mod.list("cursorless_scope_type_plural", desc="Supported plural scope types")
+
+mod.list(
+    "cursorless_glyph_scope_type",
+    desc="Cursorless glyph scope type",
+)
+mod.list(
+    "cursorless_glyph_scope_type_plural",
+    desc="Plural version of Cursorless glyph scope type",
+)
+
+mod.list(
+    "cursorless_surrounding_pair_scope_type",
+    desc="Scope types that can function as surrounding pairs",
+)
+mod.list(
+    "cursorless_surrounding_pair_scope_type_plural",
+    desc="Plural form of scope types that can function as surrounding pairs",
+)
+
 mod.list(
     "cursorless_custom_regex_scope_type",
     desc="Supported custom regular expression scope types",
@@ -13,60 +32,49 @@
     desc="Supported plural custom regular expression scope types",
 )
 
-
-@mod.capture(
-    rule="{user.cursorless_scope_type}"
-    " | <user.cursorless_surrounding_pair_scope_type>"
-    " | <user.cursorless_glyph_scope_type>"
-    " | {user.cursorless_custom_regex_scope_type}"
+mod.list(
+    "cursorless_scope_type_flattened",
+    desc="All supported scope types flattened",
+)
+mod.list(
+    "cursorless_scope_type_flattened_plural",
+    desc="All supported plural scope types flattened",
 )
-def cursorless_scope_type(m) -> dict[str, str]:
-    """Cursorless scope type singular"""
-    try:
-        return {"type": m.cursorless_scope_type}
-    except AttributeError:
-        pass
-
-    try:
-        return m.cursorless_surrounding_pair_scope_type
-    except AttributeError:
-        pass
 
-    try:
-        return m.cursorless_glyph_scope_type
-    except AttributeError:
-        pass
 
-    return {
-        "type": "customRegex",
-        "regex": m.cursorless_custom_regex_scope_type,
-    }
+@mod.capture(rule="{user.cursorless_scope_type_flattened}")
+def cursorless_scope_type(m) -> dict[str, str]:
+    """Cursorless scope type singular"""
+    return creates_scope_type(m.cursorless_scope_type_flattened)
 
 
-@mod.capture(
-    rule="{user.cursorless_scope_type_plural}"
-    " | <user.cursorless_surrounding_pair_scope_type_plural>"
-    " | <user.cursorless_glyph_scope_type_plural>"
-    " | {user.cursorless_custom_regex_scope_type_plural}"
-)
+@mod.capture(rule="{user.cursorless_scope_type_flattened_plural}")
 def cursorless_scope_type_plural(m) -> dict[str, str]:
     """Cursorless scope type plural"""
-    try:
-        return {"type": m.cursorless_scope_type_plural}
-    except AttributeError:
-        pass
-
-    try:
-        return m.cursorless_surrounding_pair_scope_type_plural
-    except AttributeError:
-        pass
+    return creates_scope_type(m.cursorless_scope_type_flattened_plural)
 
-    try:
-        return m.cursorless_glyph_scope_type_plural
-    except AttributeError:
-        pass
 
-    return {
-        "type": "customRegex",
-        "regex": m.cursorless_custom_regex_scope_type_plural,
-    }
+def creates_scope_type(id: str) -> dict[str, str]:
+    grouping, value = id.split(".", 1)
+    match grouping:
+        case "simple":
+            return {
+                "type": value,
+            }
+        case "surroundingPair":
+            return {
+                "type": "surroundingPair",
+                "delimiter": value,
+            }
+        case "customRegex":
+            return {
+                "type": "customRegex",
+                "regex": value,
+            }
+        case "glyph":
+            return {
+                "type": "glyph",
+                "character": value,
+            }
+        case _:
+            raise ValueError(f"Unsupported scope type grouping: {grouping}")
diff --git a/cursorless-talon/src/modifiers/surrounding_pair.py b/cursorless-talon/src/modifiers/surrounding_pair.py
diff --git a/cursorless-talon/src/paired_delimiter.py b/cursorless-talon/src/paired_delimiter.py
@@ -54,29 +54,3 @@ def cursorless_wrapper_paired_delimiter(m) -> list[str]:
     except AttributeError:
         id = m.cursorless_wrapper_selectable_paired_delimiter
     return paired_delimiters[id]
-
-
-@mod.capture(
-    rule=(
-        "{user.cursorless_selectable_only_paired_delimiter} |"
-        "{user.cursorless_wrapper_selectable_paired_delimiter}"
-    )
-)
-def cursorless_selectable_paired_delimiter(m) -> str:
-    try:
-        return m.cursorless_selectable_only_paired_delimiter
-    except AttributeError:
-        return m.cursorless_wrapper_selectable_paired_delimiter
-
-
-@mod.capture(
-    rule=(
-        "{user.cursorless_selectable_only_paired_delimiter_plural} |"
-        "{user.cursorless_wrapper_selectable_paired_delimiter_plural}"
-    )
-)
-def cursorless_selectable_paired_delimiter_plural(m) -> str:
-    try:
-        return m.cursorless_selectable_only_paired_delimiter_plural
-    except AttributeError:
-        return m.cursorless_wrapper_selectable_paired_delimiter_plural
diff --git a/cursorless-talon/src/spoken_forms.py b/cursorless-talon/src/spoken_forms.py
@@ -13,10 +13,12 @@
 )
 from .get_grapheme_spoken_form_entries import (
     get_grapheme_spoken_form_entries,
+    get_graphemes_talon_list,
     grapheme_capture_name,
 )
 from .marks.decorated_mark import init_hats
 from .spoken_forms_output import SpokenFormsOutput
+from .spoken_scope_forms import init_scope_spoken_forms
 
 JSON_FILE = Path(__file__).parent / "spoken_forms.json"
 disposables: list[Callable] = []
@@ -99,6 +101,7 @@ def update():
     custom_spoken_forms: dict[str, list[SpokenFormEntry]] = {}
     spoken_forms_output = SpokenFormsOutput()
     spoken_forms_output.init()
+    graphemes_talon_list = get_graphemes_talon_list()
 
     def update_spoken_forms_output():
         spoken_forms_output.write(
@@ -113,7 +116,7 @@ def update_spoken_forms_output():
                     for entry in spoken_form_list
                     if entry.list_name in LIST_TO_TYPE_MAP
                 ],
-                *get_grapheme_spoken_form_entries(),
+                *get_grapheme_spoken_form_entries(graphemes_talon_list),
             ]
         )
 
@@ -193,6 +196,7 @@ def handle_new_values(csv_name: str, values: list[SpokenFormEntry]):
         ),
     ]
 
+    init_scope_spoken_forms(graphemes_talon_list)
     update_spoken_forms_output()
     initialized = True