Skip to content

Commit

Permalink
Flatten scope type list (#2628)
Browse files Browse the repository at this point in the history
Brings our dfa compilation time from around 700ms down to about 320ms

Fixes #2614

I had forgot, but we actually do parse `<user.any_alphanumeric_key>` se
we can show the users spoken form in the tutorial. I've now utilize this
further and actually add the spoken forms for the glyph scope type in
the flattened list.

Note that this implementation is somewhat hacky on purpose. Basically I
didn't want to touch our csv parser without first talking to pokey since
a lot of these list and spoken forms are used in places like the cheat
sheet and the tutorial. What I'm instead doing is keeping all the
existing lists and then creating a new list that is a flattened version
of them. That way the sheet sheet and other places can still use the
individual list and we are only using this flattened larger list for the
actual scope type capture. This is probably something we want to revisit
later, but for now we're getting a huge boast in dfa compilation time
with no changes to the speakable grammar and that I think is a clear
win.



## Checklist

- [/] I have added
[tests](https://www.cursorless.org/docs/contributing/test-case-recorder/)
- [/] I have updated the
[docs](https://github.com/cursorless-dev/cursorless/tree/main/docs) and
[cheatsheet](https://github.com/cursorless-dev/cursorless/tree/main/cursorless-talon/src/cheatsheet)
- [x] I have not broken the cheatsheet
- [x] Run Talon grammar tests

---------

Co-authored-by: Phil Cohen <[email protected]>
  • Loading branch information
AndreasArvidsson and phillco committed Aug 4, 2024
1 parent 0e01381 commit 19849e8
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 236 deletions.
8 changes: 8 additions & 0 deletions cursorless-talon/src/csv_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ class SpokenFormEntry:
spoken_forms: list[str]


def csv_get_ctx():
return ctx


def csv_get_normalized_ctx():
return normalized_ctx


def init_csv_and_watch_changes(
filename: str,
default_values: ListToSpokenForms,
Expand Down
34 changes: 26 additions & 8 deletions cursorless-talon/src/get_grapheme_spoken_form_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
grapheme_capture_name = "user.any_alphanumeric_key"


def get_grapheme_spoken_form_entries() -> list[SpokenFormOutputEntry]:
def get_grapheme_spoken_form_entries(
grapheme_talon_list: dict[str, str],
) -> list[SpokenFormOutputEntry]:
if grapheme_capture_name not in registry.captures:
# We require this capture, and expect it to be defined. We want to show a user friendly error if it isn't present (usually indicating a problem with their community.git setup) and we think the user is going to use Cursorless.
# However, sometimes users use different dictation engines (Vosk, Webspeech) with entirely different/smaller grammars that don't have the capture, and this code will run then, and falsely error. We don't want to show an error in that case because they don't plan to actually use Cursorless.
Expand All @@ -28,11 +30,20 @@ def get_grapheme_spoken_form_entries() -> list[SpokenFormOutputEntry]:
"id": id,
"spokenForms": spoken_forms,
}
for symbol_list in generate_lists_from_capture(grapheme_capture_name)
for id, spoken_forms in get_id_to_spoken_form_map(symbol_list).items()
for id, spoken_forms in talon_list_to_spoken_form_map(
grapheme_talon_list
).items()
]


def get_graphemes_talon_list() -> dict[str, str]:
return {
spoken_form: id
for symbol_list in generate_lists_from_capture(grapheme_capture_name)
for spoken_form, id in get_id_to_talon_list(symbol_list).items()
}


def generate_lists_from_capture(capture_name) -> Iterator[str]:
"""
Given the name of a capture, yield the names of each list that the capture
Expand Down Expand Up @@ -68,20 +79,27 @@ def generate_lists_from_capture(capture_name) -> Iterator[str]:
)


def get_id_to_spoken_form_map(list_name: str) -> Mapping[str, list[str]]:
def get_id_to_talon_list(list_name: str) -> dict[str, str]:
"""
Given the name of a Talon list, return a mapping from the values in that
list to the list of spoken forms that map to the given value.
Given the name of a Talon list, return that list
"""
try:
# NB: [-1] because the last list is the active one
raw_list = typing.cast(dict[str, str], registry.lists[list_name][-1]).copy()
return typing.cast(dict[str, str], registry.lists[list_name][-1]).copy()
except Error:
app.notify(f"Error getting list {list_name}")
return {}


def talon_list_to_spoken_form_map(
talon_list: dict[str, str],
) -> Mapping[str, list[str]]:
"""
Given a Talon list, return a mapping from the values in that
list to the list of spoken forms that map to the given value.
"""
inverted_list: defaultdict[str, list[str]] = defaultdict(list)
for key, value in raw_list.items():
for key, value in talon_list.items():
inverted_list[value].append(key)

return inverted_list
30 changes: 0 additions & 30 deletions cursorless-talon/src/modifiers/glyph_scope.py

This file was deleted.

1 change: 0 additions & 1 deletion cursorless-talon/src/modifiers/modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def cursorless_simple_modifier(m) -> dict[str, str]:
"<user.cursorless_simple_scope_modifier>", # funk, state, class, every funk
"<user.cursorless_ordinal_scope>", # first past second word
"<user.cursorless_relative_scope>", # next funk, 3 funks
"<user.cursorless_surrounding_pair_force_direction>", # DEPRECATED "left quad" / "right quad"
]

modifiers = [
Expand Down
104 changes: 56 additions & 48 deletions cursorless-talon/src/modifiers/scopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,25 @@

mod.list("cursorless_scope_type", desc="Supported scope types")
mod.list("cursorless_scope_type_plural", desc="Supported plural scope types")

mod.list(
"cursorless_glyph_scope_type",
desc="Cursorless glyph scope type",
)
mod.list(
"cursorless_glyph_scope_type_plural",
desc="Plural version of Cursorless glyph scope type",
)

mod.list(
"cursorless_surrounding_pair_scope_type",
desc="Scope types that can function as surrounding pairs",
)
mod.list(
"cursorless_surrounding_pair_scope_type_plural",
desc="Plural form of scope types that can function as surrounding pairs",
)

mod.list(
"cursorless_custom_regex_scope_type",
desc="Supported custom regular expression scope types",
Expand All @@ -13,60 +32,49 @@
desc="Supported plural custom regular expression scope types",
)


@mod.capture(
rule="{user.cursorless_scope_type}"
" | <user.cursorless_surrounding_pair_scope_type>"
" | <user.cursorless_glyph_scope_type>"
" | {user.cursorless_custom_regex_scope_type}"
mod.list(
"cursorless_scope_type_flattened",
desc="All supported scope types flattened",
)
mod.list(
"cursorless_scope_type_flattened_plural",
desc="All supported plural scope types flattened",
)
def cursorless_scope_type(m) -> dict[str, str]:
"""Cursorless scope type singular"""
try:
return {"type": m.cursorless_scope_type}
except AttributeError:
pass

try:
return m.cursorless_surrounding_pair_scope_type
except AttributeError:
pass

try:
return m.cursorless_glyph_scope_type
except AttributeError:
pass

return {
"type": "customRegex",
"regex": m.cursorless_custom_regex_scope_type,
}
@mod.capture(rule="{user.cursorless_scope_type_flattened}")
def cursorless_scope_type(m) -> dict[str, str]:
"""Cursorless scope type singular"""
return creates_scope_type(m.cursorless_scope_type_flattened)


@mod.capture(
rule="{user.cursorless_scope_type_plural}"
" | <user.cursorless_surrounding_pair_scope_type_plural>"
" | <user.cursorless_glyph_scope_type_plural>"
" | {user.cursorless_custom_regex_scope_type_plural}"
)
@mod.capture(rule="{user.cursorless_scope_type_flattened_plural}")
def cursorless_scope_type_plural(m) -> dict[str, str]:
"""Cursorless scope type plural"""
try:
return {"type": m.cursorless_scope_type_plural}
except AttributeError:
pass

try:
return m.cursorless_surrounding_pair_scope_type_plural
except AttributeError:
pass
return creates_scope_type(m.cursorless_scope_type_flattened_plural)

try:
return m.cursorless_glyph_scope_type_plural
except AttributeError:
pass

return {
"type": "customRegex",
"regex": m.cursorless_custom_regex_scope_type_plural,
}
def creates_scope_type(id: str) -> dict[str, str]:
grouping, value = id.split(".", 1)
match grouping:
case "simple":
return {
"type": value,
}
case "surroundingPair":
return {
"type": "surroundingPair",
"delimiter": value,
}
case "customRegex":
return {
"type": "customRegex",
"regex": value,
}
case "glyph":
return {
"type": "glyph",
"character": value,
}
case _:
raise ValueError(f"Unsupported scope type grouping: {grouping}")
80 changes: 0 additions & 80 deletions cursorless-talon/src/modifiers/surrounding_pair.py

This file was deleted.

26 changes: 0 additions & 26 deletions cursorless-talon/src/paired_delimiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,29 +54,3 @@ def cursorless_wrapper_paired_delimiter(m) -> list[str]:
except AttributeError:
id = m.cursorless_wrapper_selectable_paired_delimiter
return paired_delimiters[id]


@mod.capture(
rule=(
"{user.cursorless_selectable_only_paired_delimiter} |"
"{user.cursorless_wrapper_selectable_paired_delimiter}"
)
)
def cursorless_selectable_paired_delimiter(m) -> str:
try:
return m.cursorless_selectable_only_paired_delimiter
except AttributeError:
return m.cursorless_wrapper_selectable_paired_delimiter


@mod.capture(
rule=(
"{user.cursorless_selectable_only_paired_delimiter_plural} |"
"{user.cursorless_wrapper_selectable_paired_delimiter_plural}"
)
)
def cursorless_selectable_paired_delimiter_plural(m) -> str:
try:
return m.cursorless_selectable_only_paired_delimiter_plural
except AttributeError:
return m.cursorless_wrapper_selectable_paired_delimiter_plural
6 changes: 5 additions & 1 deletion cursorless-talon/src/spoken_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
)
from .get_grapheme_spoken_form_entries import (
get_grapheme_spoken_form_entries,
get_graphemes_talon_list,
grapheme_capture_name,
)
from .marks.decorated_mark import init_hats
from .spoken_forms_output import SpokenFormsOutput
from .spoken_scope_forms import init_scope_spoken_forms

JSON_FILE = Path(__file__).parent / "spoken_forms.json"
disposables: list[Callable] = []
Expand Down Expand Up @@ -99,6 +101,7 @@ def update():
custom_spoken_forms: dict[str, list[SpokenFormEntry]] = {}
spoken_forms_output = SpokenFormsOutput()
spoken_forms_output.init()
graphemes_talon_list = get_graphemes_talon_list()

def update_spoken_forms_output():
spoken_forms_output.write(
Expand All @@ -113,7 +116,7 @@ def update_spoken_forms_output():
for entry in spoken_form_list
if entry.list_name in LIST_TO_TYPE_MAP
],
*get_grapheme_spoken_form_entries(),
*get_grapheme_spoken_form_entries(graphemes_talon_list),
]
)

Expand Down Expand Up @@ -193,6 +196,7 @@ def handle_new_values(csv_name: str, values: list[SpokenFormEntry]):
),
]

init_scope_spoken_forms(graphemes_talon_list)
update_spoken_forms_output()
initialized = True

Expand Down
Loading

0 comments on commit 19849e8

Please sign in to comment.