From 9af50e99597bd25ab080429c889ca0ec9681b839 Mon Sep 17 00:00:00 2001 From: Yrahcaz7 <74512479+Yrahcaz7@users.noreply.github.com> Date: Fri, 26 Jun 2026 20:09:20 -0400 Subject: [PATCH 1/5] formatting fixes & replace "scrub" with "clean" also fix missing links in `introduction.md` --- .../double-generator-expression/content.md | 6 +- .../double-generator-expression/snippet.txt | 6 +- .../.approaches/functools-reduce/content.md | 18 +++--- .../generator-expression/content.md | 22 +++---- .../generator-expression/snippet.txt | 6 +- .../acronym/.approaches/introduction.md | 61 ++++++++++--------- .../.approaches/list-comprehension/content.md | 18 +++--- .../list-comprehension/snippet.txt | 4 +- .../acronym/.approaches/loop/content.md | 12 ++-- .../acronym/.approaches/loop/snippet.txt | 4 +- .../.approaches/map-function/content.md | 22 +++---- .../.approaches/map-function/snippet.txt | 2 +- .../acronym/.approaches/regex-join/content.md | 42 ++++++------- .../.approaches/regex-join/snippet.txt | 2 +- .../acronym/.approaches/regex-sub/content.md | 26 ++++---- .../acronym/.approaches/regex-sub/snippet.txt | 2 +- 16 files changed, 129 insertions(+), 124 deletions(-) diff --git a/exercises/practice/acronym/.approaches/double-generator-expression/content.md b/exercises/practice/acronym/.approaches/double-generator-expression/content.md index 168d1354714..66b4dc6dff0 100644 --- a/exercises/practice/acronym/.approaches/double-generator-expression/content.md +++ b/exercises/practice/acronym/.approaches/double-generator-expression/content.md @@ -4,15 +4,15 @@ from string import ascii_letters -VALID_CHARS = {' ', '-'} | set(ascii_letters) +VALID_CHARS = {" ", "-"} | set(ascii_letters) def abbreviate(to_abbreviate): - to_abbreviate = ''.join(' ' if char == '-' else char + to_abbreviate = "".join(" " if char == "-" else char for char in to_abbreviate if char in VALID_CHARS) - return ''.join(word[0] for word in to_abbreviate.split()).upper() + return "".join(word[0] for word in to_abbreviate.split()).upper() ``` One way someone might try to increase performce is to use a single [generator expression][generator-expression] to clean the input, rather than using multiple calls to [`str.replace()`][str-replace]. diff --git a/exercises/practice/acronym/.approaches/double-generator-expression/snippet.txt b/exercises/practice/acronym/.approaches/double-generator-expression/snippet.txt index 5a56cfb20b3..6099d4ff15a 100644 --- a/exercises/practice/acronym/.approaches/double-generator-expression/snippet.txt +++ b/exercises/practice/acronym/.approaches/double-generator-expression/snippet.txt @@ -1,8 +1,8 @@ -VALID_CHARS = {' ', '-'} | set(ascii_letters) +VALID_CHARS = {" ", "-"} | set(ascii_letters) def abbreviate(to_abbreviate): - to_abbreviate = ''.join(' ' if char == '-' else char + to_abbreviate = "".join(" " if char == "-" else char for char in to_abbreviate if char in VALID_CHARS) - return ''.join(word[0] for word in to_abbreviate.split()).upper() \ No newline at end of file + return "".join(word[0] for word in to_abbreviate.split()).upper() \ No newline at end of file diff --git a/exercises/practice/acronym/.approaches/functools-reduce/content.md b/exercises/practice/acronym/.approaches/functools-reduce/content.md index 074db3fa284..cd0e10725d4 100644 --- a/exercises/practice/acronym/.approaches/functools-reduce/content.md +++ b/exercises/practice/acronym/.approaches/functools-reduce/content.md @@ -1,4 +1,4 @@ -# Scrub with `replace()` and join via `functools.reduce()` +# Clean with `replace()` and join via `functools.reduce()` ```python @@ -12,7 +12,7 @@ def abbreviate(to_abbreviate): ``` -- This approach begins by using [`str.replace()`][str-replace] to "scrub" (_remove_) non-letter characters such as `'`,`-`,`_`, and white space from `to_abbreviate`. +- This approach begins by using [`str.replace()`][str-replace] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. - The phrase is then upper-cased by calling [`str.upper()`][str-upper], - Finally, the phrase is turned into a `list` of words by calling [`str.split()`][str-split]. @@ -21,23 +21,23 @@ This works because both `replace()` and `upper()` return strings, and both `uppe However, if `split()` were called first, `replace()` and `upper()` would fail, since neither method will take a `list` as input. ~~~~exercism/note -`re.findall()` or `re.finditer()` can also be used to "scrub" `to_abbreviate`. +`re.findall()` or `re.finditer()` can also be used to "clean" `to_abbreviate`. These two methods from the `re` module will return a `list` or a lazy `iterator` of results, respectively. -As of this writing, both of these methods benchmark slower than using `str.replace()` for scrubbing. +As of this writing, both of these methods benchmark slower than using `str.replace()` for cleaning. ~~~~ -Once the phrase is scrubbed and turned into a word `list`, the acronym is created via `reduce()`. +Once the phrase is cleaned and turned into a word `list`, the acronym is created via `reduce()`. `reduce()` is a method from the [`functools`][functools] module, which provides support for higher-order functions and functional programming in Python. [`functools.reduce()`][reduce] applies an anonymous two-argument function (_the [lambda][python lambdas] in the code example_) to the items of an iterable. - The application of the function travels from left to right, so that the iterable becomes a single value (_it is "reduced" to a single value_). +The application of the function travels from left to right, so that the iterable becomes a single value (_it is "reduced" to a single value_). - Using code from the example above, `reduce(lambda start, word: start + word[0], ['GNU', 'IMAGE', 'MANIPULATION', 'PROGRAM'])` would calculate `((('GNU'[0] + 'IMAGE'[0])+'MANIPULATION'[0])+'PROGRAM'[0])`, or `GIMP`. - The left argument, `start`, is the _accumulated value_ and the right argument, `word`, is the value from the iterable that is used to update the accumulated 'total'. - The optional 'initializer' value '' is used here, and is placed ahead/before the items of the iterable in the calculation, and serves as a default if the iterable that is passed is empty. +Using code from the example above, `reduce(lambda start, word: start + word[0], ["GNU", "IMAGE", "MANIPULATION", "PROGRAM"])` would calculate `((("GNU"[0] + "IMAGE"[0]) + "MANIPULATION"[0]) + "PROGRAM"[0])`, or `GIMP`. +The left argument, `start`, is the _accumulated value_ and the right argument, `word`, is the value from the iterable that is used to update the accumulated 'total'. +The optional 'initializer' value `""` is used here, and is placed ahead/before the items of the iterable in the calculation, and serves as a default if the iterable that is passed is empty. Since using `reduce()` is fairly succinct, it is put directly on the `return` line to produce the acronym rather than assigning and returning an intermediate variable. diff --git a/exercises/practice/acronym/.approaches/generator-expression/content.md b/exercises/practice/acronym/.approaches/generator-expression/content.md index 47ec9aa8f89..92bcfb4bf2c 100644 --- a/exercises/practice/acronym/.approaches/generator-expression/content.md +++ b/exercises/practice/acronym/.approaches/generator-expression/content.md @@ -1,16 +1,16 @@ -# Scrub with `replace()` and join via `generator-expression` +# Clean with `replace()` and join via `generator-expression` ```python def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace('-', ' ').replace('_', ' ').upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() - # note the lack of square brackets around the comprehension. - return ''.join(word[0] for word in phrase) + # Note the lack of square brackets around the comprehension. + return "".join(word[0] for word in phrase) ``` -- This approach begins by using [`str.replace()`][str-replace] to "scrub" (_remove_) non-letter characters such as `'`,`-`,`_`, and white space from `to_abbreviate`. +- This approach begins by using [`str.replace()`][str-replace] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. - The phrase is then upper-cased by calling [`str.upper()`][str-upper], - Finally, the phrase is turned into a `list` of words by calling [`str.split()`][str-split]. @@ -19,25 +19,25 @@ This works because both `replace()` and `upper()` return strings, and both `uppe However, if `split()` were called first, `replace()` and `upper()` would fail, since neither method will take a `list` as input. ~~~~exercism/note -`re.findall()` or `re.finditer()` can also be used to "scrub" `to_abbreviate`. +`re.findall()` or `re.finditer()` can also be used to "clean" `to_abbreviate`. These two methods from the `re` module will return a `list` or a lazy `iterator` of results, respectively. -As of this writing, both of these methods benchmark slower than using `str.replace()` for scrubbing. +As of this writing, both of these methods benchmark slower than using `str.replace()` for cleaning. ~~~~ A [`generator-expression`][generator-expression] is then used to iterate through the phrase and select the first letters of each word via [`bracket notation`][subscript notation]. -Generator expressions are short-form [generators][generators] - lazy iterators that produce their values _on demand_, instead of saving them to memory. +Generator expressions are short-form [generators][generators] — lazy iterators that produce their values _on demand_, instead of saving them to memory. This generator expression is consumed by [`str.join()`][str-join], which joins the generated letters together using an empty string. -Other "separator" strings can be used with `str.join()` - see [concept:python/string-methods]() for some additional examples. +Other "separator" strings can be used with `str.join()` — see [concept:python/string-methods]() for some additional examples. Since the generator expression and `join()` are fairly succinct, they are put directly on the `return` line rather than assigning and returning an intermediate variable for the acronym. In benchmarks, this solution was surprisingly slower than the `list comprehension` version. -[This article][Oscar Alsing] from Oscar Alsing briefly explains why. +[This article][Oscar-Alsing-article] from Oscar Alsing briefly explains why. -[Oscar Alsing]: https://www.oscaralsing.com/list-comprehension-vs-generator-expression/#:~:text=List%20comprehensions%20are%20usually%20faster,difference%20is%20often%20quite%20small. +[Oscar-Alsing-article]: https://www.oscaralsing.com/list-comprehension-vs-generator-expression/#:~:text=List%20comprehensions%20are%20usually%20faster,difference%20is%20often%20quite%20small. [chaining]: https://pyneng.readthedocs.io/en/latest/book/04_data_structures/method_chaining.html [generator-expression]: https://dbader.org/blog/python-generator-expressions [generators]: https://dbader.org/blog/python-generators diff --git a/exercises/practice/acronym/.approaches/generator-expression/snippet.txt b/exercises/practice/acronym/.approaches/generator-expression/snippet.txt index eb4a143df80..0bf11af6fb5 100644 --- a/exercises/practice/acronym/.approaches/generator-expression/snippet.txt +++ b/exercises/practice/acronym/.approaches/generator-expression/snippet.txt @@ -1,5 +1,5 @@ def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace('-', ' ').replace('_', ' ').upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() - # note the lack of square brackets around the comprehension. - return ''.join(word[0] for word in phrase) \ No newline at end of file + # Note the lack of square brackets around the comprehension. + return "".join(word[0] for word in phrase) \ No newline at end of file diff --git a/exercises/practice/acronym/.approaches/introduction.md b/exercises/practice/acronym/.approaches/introduction.md index 52a72ff6cbf..086f248661e 100644 --- a/exercises/practice/acronym/.approaches/introduction.md +++ b/exercises/practice/acronym/.approaches/introduction.md @@ -3,26 +3,29 @@ There are multiple Pythonic ways to solve the Acronym exercise. Among them are: -- Using `str.replace()` to scrub the input, and: +- Using `str.replace()` to clean the input, and: + - joining with a `for loop` with string concatenation via the `+` operator. - joining via `str.join()`, passing a `list-comprehension` or `generator-expression`. - joining via `str.join()`, passing `map()`. - joining via `functools.reduce()`. -- Using `re.findall()`/`re.finditer()` to scrub the input, and: +- Using `re.findall()`/`re.finditer()` to clean the input, and: + - joining via `str.join()`, passing a `generator-expression`. - - Using `re.sub()` for both cleaning and joining (_using "only" regex for almost everything_)` + - using `re.sub()` for both cleaning and joining (_using "only" regex for almost everything_) ## General Guidance The goal of the Acronym exercise is to collect the first letters of each word in the input phrase and return them as a single capitalized string (_the acronym_). -The challenge is to efficiently identify and capitalize the first letters while removing or ignoring non-letter characters such as `'`,`-`,`_`, and white space. +The challenge is to efficiently identify and capitalize the first letters while removing or ignoring non-letter characters such as `'`, `-`, `_`, and whitespace. There are two idiomatic strategies for non-letter character removal: + - Python's built-in [`str.replace()`][str-replace]. -- The [`re`][re] module, (_regular expressions_). +- The [`re`][re-module] module, (_regular expressions_). For all but the most complex scenarios, using `str.replace()` is generally more efficient than using a regular expression. @@ -34,12 +37,12 @@ Some `regex` methods can avoid looping altogether, although they can become very Strings are _immutable_, so any method to produce an acronym will be creating and returning a new `str`. -## Approach: scrub with `replace()` and join via `for` loop +## Approach: Clean with `replace()` and join via `for` loop ```python def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace('-', ' ').replace('_', ' ').upper().split() - acronym = '' + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() + acronym = "" for word in phrase: acronym += word[0] @@ -50,40 +53,39 @@ def abbreviate(to_abbreviate): For more information, take a look at the [loop approach][approach-loop]. -## Approach: scrub with `replace()` and join via `list comprehension` or `generator expression` - +## Approach: Clean with `replace()` and join via `list comprehension` or `generator expression` ```python def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace('-', ' ').replace('_', ' ').upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() - return ''.join([word[0] for word in phrase]) + return "".join([word[0] for word in phrase]) ###OR### def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace('-', ' ').replace('_', ' ').upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() - # Note the parenthesis instead of square brackets. - return ''.join((word[0] for word in phrase)) + # Note the parentheses instead of square brackets. + return "".join((word[0] for word in phrase)) ``` -For more information, check out the [list-comprehension][approach-list-comprehension] approach or the [generator-expression][approach-generator-expression] approach. +For more information, check out the [list comprehension][approach-list-comprehension] approach or the [generator expression][approach-generator-expression] approach. -## Approach: scrub with `replace()` and join via `map()` +## Approach: Clean with `replace()` and join via `map()` ```python def abbreviate(to_abbreviate): phrase = to_abbreviate.replace("_", " ").replace("-", " ").upper().split() - return ''.join(map(lambda word: word[0], phrase)) + return "".join(map(lambda word: word[0], phrase)) ``` -For more information, read the [map][approach-map-function] approach. +For more information, read the [`map()`][approach-map-function] approach. -## Approach: scrub with `replace()` and join via `functools.reduce()` +## Approach: Clean with `replace()` and join via `functools.reduce()` ```python from functools import reduce @@ -98,7 +100,7 @@ def abbreviate(to_abbreviate): For more information, take a look at the [`functools.reduce()`][approach-functools-reduce] approach. -## Approach: filter with `re.findall()` and join via `str.join()` +## Approach: Clean with `re.findall()` and join via `str.join()` ```python import re @@ -107,13 +109,13 @@ import re def abbreviate(to_abbreviate): removed = re.findall(r"[a-zA-Z']+", to_abbreviate) - return ''.join(word[0] for word in removed).upper() + return "".join(word[0] for word in removed).upper() ``` For more information, take a look at the [regex-join][approach-regex-join] approach. -## Approach: use `re.sub()` +## Approach: Use `re.sub()` ```python import re @@ -125,24 +127,24 @@ def abbreviate(to_abbreviate): return re.sub(pattern, "", to_abbreviate.upper()) ``` -For more information, read the [regex-sub][approach-regex-sub] approach. +For more information, read the [`re.sub()`][approach-regex-sub] approach. -## Approach: use a `generator-expression` for both cleaning and joining +## Approach: Use a `generator-expression` for both cleaning and joining ```python from string import ascii_letters -VALID_CHARS = {' ', '-'} | set(ascii_letters) +VALID_CHARS = {" ", "-"} | set(ascii_letters) def abbreviate(to_abbreviate): - to_abbreviate = ''.join(' ' if char == '-' else char + to_abbreviate = "".join(" " if char == "-" else char for char in to_abbreviate if char in VALID_CHARS) - return ''.join(word[0] for word in to_abbreviate.split()).upper() + return "".join(word[0] for word in to_abbreviate.split()).upper() ``` For more information, take a look at the [double `generator-expression` approach][approach-double-generator-expression]. @@ -178,3 +180,6 @@ To compare performance of the approaches, take a look at the [Performance articl [approach-regex-join]: https://exercism.org/tracks/python/exercises/acronym/approaches/regex-join [approach-regex-sub]: https://exercism.org/tracks/python/exercises/acronym/approaches/regex-sub [article-performance]: https://exercism.org/tracks/python/exercises/acronym/articles/performance +[re-module]: https://docs.python.org/3/library/re.html +[str-replace]: https://docs.python.org/3/library/stdtypes.html#str.replace +[str-split]: https://docs.python.org/3/library/stdtypes.html#str.split diff --git a/exercises/practice/acronym/.approaches/list-comprehension/content.md b/exercises/practice/acronym/.approaches/list-comprehension/content.md index 7e98f45c74f..1c320171a1d 100644 --- a/exercises/practice/acronym/.approaches/list-comprehension/content.md +++ b/exercises/practice/acronym/.approaches/list-comprehension/content.md @@ -1,13 +1,13 @@ -# Scrub with `replace()` and join via `list comprehension` +# Clean with `replace()` and join via `list comprehension` ```python def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace('-', ' ').replace('_', ' ').upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() - return ''.join([word[0] for word in phrase]) + return "".join([word[0] for word in phrase]) ``` -- This approach begins by using [`str.replace()`][str-replace] to "scrub" (_remove_) non-letter characters such as `'`,`-`,`_`, and white space from `to_abbreviate`. +- This approach begins by using [`str.replace()`][str-replace] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. - The phrase is then upper-cased by calling [`str.upper()`][str-upper], - Finally, the phrase is turned into a `list` of words by calling [`str.split()`][str-split]. @@ -17,19 +17,19 @@ However, if `split()` were called first, `replace()` and `upper()` would fail, s ~~~~exercism/note -`re.findall()` or `re.finditer()` can also be used to "scrub" `to_abbreviate`. +`re.findall()` or `re.finditer()` can also be used to "clean" `to_abbreviate`. These two methods from the `re` module will return a `list` or a lazy `iterator` of results, respectively. -As of this writing, both of these methods benchmark slower than using `str.replace()` for scrubbing. +As of this writing, both of these methods benchmark slower than using `str.replace()` for cleaning. ~~~~ A [`list comprehension`][list comprehension] is then used to iterate through the phrase and select the first letters of each word via [`bracket notation`][subscript notation]. -This comprehension is passed into [`str.join()`][str-join], which unpacks the `list` of first letters and joins them together using an empty string - the acronym. -Other "separator" strings besides an empty string can be used with `str.join()` - see [concept:python/string-methods]() for some additional examples. +This comprehension is passed into [`str.join()`][str-join], which unpacks the `list` of first letters and joins them together using an empty string — the acronym. +Other "separator" strings besides an empty string can be used with `str.join()` — see [concept:python/string-methods]() for some additional examples. Since the comprehension and `join()` are fairly succinct, they are put directly on the `return` line rather than assigning and returning an intermediate variable for the acronym. -The weakness of this solution is that it is taking up extra space with the `list comprehension`, which is creating and saving a `list` in memory - only to have that list immediately unpacked by the `str.join()` method. +The weakness of this solution is that it is taking up extra space with the `list comprehension`, which is creating and saving a `list` in memory — only to have that list immediately unpacked by the `str.join()` method. While this is trivial for the inputs this problem is tested against, it could become a problem if the inputs get longer. It could also be an issue if the code were deployed in a memory-constrained environment. A [generator expression][generator-expression] here would be more memory-efficient, though there are speed tradeoffs. diff --git a/exercises/practice/acronym/.approaches/list-comprehension/snippet.txt b/exercises/practice/acronym/.approaches/list-comprehension/snippet.txt index cf17c6ec676..aeaae10b1ef 100644 --- a/exercises/practice/acronym/.approaches/list-comprehension/snippet.txt +++ b/exercises/practice/acronym/.approaches/list-comprehension/snippet.txt @@ -1,4 +1,4 @@ def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace('-', ' ').replace('_', ' ').upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() - return ''.join([word[0] for word in phrase]) \ No newline at end of file + return "".join([word[0] for word in phrase]) \ No newline at end of file diff --git a/exercises/practice/acronym/.approaches/loop/content.md b/exercises/practice/acronym/.approaches/loop/content.md index e89001d3810..341513b9570 100644 --- a/exercises/practice/acronym/.approaches/loop/content.md +++ b/exercises/practice/acronym/.approaches/loop/content.md @@ -1,10 +1,10 @@ -# Scrub with `replace()` and join via `for` loop +# Clean with `replace()` and join via `for` loop ```python def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace('-', ' ').replace('_', ' ').upper().split() - acronym = '' + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() + acronym = "" for word in phrase: acronym += word[0] @@ -13,7 +13,7 @@ def abbreviate(to_abbreviate): ``` -- This approach begins by using [`str.replace()`][str-replace] to "scrub" (_remove_) non-letter characters such as `'`,`-`,`_`, and white space from `to_abbreviate`. +- This approach begins by using [`str.replace()`][str-replace] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. - The phrase is then upper-cased by calling [`str.upper()`][str-upper], - Finally, the phrase is turned into a `list` of words by calling [`str.split()`][str-split]. @@ -28,9 +28,9 @@ When the loop is complete, `acronym` is returned from the function. ~~~~exercism/note -`re.findall()` or `re.finditer()` can also be used to "scrub" `to_abbreviate`. +`re.findall()` or `re.finditer()` can also be used to "clean" `to_abbreviate`. These two methods from the `re` module will return a `list` or a lazy `iterator` of results, respectively. -As of this writing, both of these methods benchmark slower than using `str.replace()` for scrubbing. +As of this writing, both of these methods benchmark slower than using `str.replace()` for cleaning. ~~~~ [chaining]: https://pyneng.readthedocs.io/en/latest/book/04_data_structures/method_chaining.html diff --git a/exercises/practice/acronym/.approaches/loop/snippet.txt b/exercises/practice/acronym/.approaches/loop/snippet.txt index bdf60c6e78b..820d981c302 100644 --- a/exercises/practice/acronym/.approaches/loop/snippet.txt +++ b/exercises/practice/acronym/.approaches/loop/snippet.txt @@ -1,6 +1,6 @@ def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace('-', ' ').replace('_', ' ').upper().split() - acronym = '' + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() + acronym = "" for word in phrase: acronym += word[0] diff --git a/exercises/practice/acronym/.approaches/map-function/content.md b/exercises/practice/acronym/.approaches/map-function/content.md index f237bd823bd..d3514235403 100644 --- a/exercises/practice/acronym/.approaches/map-function/content.md +++ b/exercises/practice/acronym/.approaches/map-function/content.md @@ -1,14 +1,14 @@ -# Scrub with `replace()` and join via `map()` +# Clean with `replace()` and join via `map()` ```python def abbreviate(to_abbreviate): phrase = to_abbreviate.replace("_", " ").replace("-", " ").upper().split() - return ''.join(map(lambda word: word[0], phrase)) + return "".join(map(lambda word: word[0], phrase)) ``` -- This approach begins by using [`str.replace()`][str-replace] to "scrub" (_remove_) non-letter characters such as `'`,`-`,`_`, and white space from `to_abbreviate`. +- This approach begins by using [`str.replace()`][str-replace] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. - The phrase is then upper-cased by calling [`str.upper()`][str-upper], - Finally, the phrase is turned into a `list` of words by calling [`str.split()`][str-split]. @@ -17,26 +17,26 @@ This works because both `replace()` and `upper()` return strings, and both `uppe However, if `split()` were called first, `replace()` and `upper()` would fail, since neither method will take a `list` as input. ~~~~exercism/note -`re.findall()` or `re.finditer()` can also be used to "scrub" `to_abbreviate`. +`re.findall()` or `re.finditer()` can also be used to "clean" `to_abbreviate`. These two methods from the `re` module will return a `list` or a lazy `iterator` of results, respectively. -As of this writing, both of these methods benchmark slower than using `str.replace()` for scrubbing. +As of this writing, both of these methods benchmark slower than using `str.replace()` for cleaning. ~~~~ -Once the phrase is scrubbed and turned into a word `list`, the acronym is created via the [built-in][python-builtins] [`map()`][map] function. -`map()` applies an anonymous function (_the [lambda][python lambdas] in the code example_) to all the items of an iterable (_'mapping' the function 'onto' each item_), returning a [lazy iterator][lazy iterator] of results. +Once the phrase is cleaned and turned into a word `list`, the acronym is created via the [built-in][python-builtins] [`map()`][map] function. +`map()` applies an anonymous function (_the [`lambda`][python lambdas] in the code example_) to all the items of an iterable (_'mapping' the function 'onto' each item_), returning a [lazy iterator][lazy iterator] of results. The application of the function travels from left to right, and function results are produced as needed. -Using code from the example above, `map(lambda word: word[0], ['GNU', 'IMAGE', 'MANIPULATION', 'PROGRAM'])` would calculate `'GNU'[0], 'IMAGE'[0], 'MANIPULATION'[0]), 'PROGRAM'[0]` in order as a stream of data. - `word[0]` is the function, which extracts the letter at index zero for every word in the phrase list. -This stream of data can then be 'consumed' - either in a `loop`, or by being 'unpacked' by another function or process. +Using code from the example above, `map(lambda word: word[0], ["GNU", "IMAGE", "MANIPULATION", "PROGRAM"])` would calculate `"GNU"[0], "IMAGE"[0], "MANIPULATION"[0]), "PROGRAM"[0]` in order as a stream of data. +`word[0]` is the function, which extracts the letter at index zero for every word in the phrase list. +This stream of data can then be 'consumed' — either in a `loop`, or by being 'unpacked' by another function or process. Here, the `iterator` from `map()` is immediately consumed/unpacked by [`join()`][str-join], which glues the results together with an empty string to produce the acronym. Since using `join()` with `map()` is fairly succinct, the combination is put directly on the `return` line to produce the acronym rather than assigning and returning an intermediate variable. -In benchmarks, this solution performed about as well as the `loops`, `reduce` and `list-comprehension` solutions. +In benchmarks, this solution performed about as well as the `loops`, `reduce` and `list-comprehension` solutions. [chaining]: https://pyneng.readthedocs.io/en/latest/book/04_data_structures/method_chaining.html [lazy iterator]: https://www.pythonmorsels.com/what-is-an-iterator/ diff --git a/exercises/practice/acronym/.approaches/map-function/snippet.txt b/exercises/practice/acronym/.approaches/map-function/snippet.txt index ea7b5b521ba..33216ad7ec0 100644 --- a/exercises/practice/acronym/.approaches/map-function/snippet.txt +++ b/exercises/practice/acronym/.approaches/map-function/snippet.txt @@ -1,4 +1,4 @@ def abbreviate(to_abbreviate): phrase = to_abbreviate.replace("_", " ").replace("-", " ").upper().split() - return ''.join(map(lambda word: word[0], phrase)) \ No newline at end of file + return "".join(map(lambda word: word[0], phrase)) \ No newline at end of file diff --git a/exercises/practice/acronym/.approaches/regex-join/content.md b/exercises/practice/acronym/.approaches/regex-join/content.md index 227ba06d5ea..b9f133b0bd5 100644 --- a/exercises/practice/acronym/.approaches/regex-join/content.md +++ b/exercises/practice/acronym/.approaches/regex-join/content.md @@ -7,39 +7,39 @@ import re ###re.findall### def abbreviate(to_abbreviate): - #Capitalize the input before cleaning. + # Capitalize the input before cleaning. removed = re.findall(r"[a-zA-Z']+", to_abbreviate.upper()) - return ''.join(word[0] for word in removed) + return "".join(word[0] for word in removed) #OR# def abbreviate(to_abbreviate): - #Capitalize the result after joining. - return ''.join(word[0] for word in + # Capitalize the result after joining. + return "".join(word[0] for word in re.findall(r"[a-zA-Z']+", to_abbreviate)).upper() ###re.finditer### def abbreviate(to_abbreviate): - #Capitalize the input before cleaning. + # Capitalize the input before cleaning. removed = re.finditer(r"[a-zA-Z']+", to_abbreviate.upper()) - #word.group(0)[0] (first letter of Matched word) can also be written as - #word[0][0], with the first bracketed number referring to Match group 0. - return ''.join(word.group(0)[0] for word in removed) + # word.group(0)[0] (first letter of Matched word) can also be written as + # word[0][0], with the first bracketed number referring to Match group 0. + return "".join(word.group(0)[0] for word in removed) #OR# def abbreviate(to_abbreviate): - #Capitalize the output after joining. - #Use bracket notation for Match group. - return ''.join(word[0][0] for word in - re.finditer(r"[a-zA-Z']+", to_abbreviate)).upper() + # Capitalize the output after joining. + # Use bracket notation for Match group. + return "".join(word[0][0] for word in + re.finditer(r"[a-zA-Z']+", to_abbreviate)).upper() ``` -This approach begins by using [`re.findall()`][re-findall] method from the [re][re] module to "scrub" (_remove_) non-letter characters such as `'`,`-`,`_`, and white space from `to_abbreviate`. +This approach begins by using [`re.findall()`][re-findall] method from the [`re` module][re-module] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. Python's `re` module provides support for [regular expressions][regular expressions] within the language, and has many useful methods for searching, parsing, and modifying text. Regular expression matching starts at the left-hand side of the input and travels toward the right. @@ -48,7 +48,7 @@ Regular expression matching starts at the left-hand side of the input and travel The [`re.finditer()`][re-finditer] method works in the same fashion as `re.findall()`, but returns results as a _[lazy iterator][lazy iterator]_ over [Match objects][match objects]. - This means that `re.finditer()` produces matches _on demand_ instead of saving them to memory, but needs to have both the iterator and the Match objects _unpacked_. +This means that `re.finditer()` produces matches _on demand_ instead of saving them to memory, but needs to have both the iterator and the Match objects _unpacked_. The regular expression `r[a-zA-Z']+` in the code example looks for any single character in the range `a-z` lowercase and `A-Z` uppercase, plus the `'` (_apostrophe_) character. @@ -57,12 +57,12 @@ This means that the expression will match any collection or repeat of letters (_ For example, in `Complementary metal-oxide semiconductor`, the regex will match `Complementary`, `metal`, `oxide`, and `semiconductor`. The regex will not match on ` ` or `-`. -The result returned by `findall()` will then be `['Complementary', 'metal', 'oxide', 'semiconductor']`. +The result returned by `findall()` will then be `["Complementary", "metal", "oxide", "semiconductor"]`. ~~~~exercism/note -`to_abbreviate.replace("_", " ").replace("-", " ").upper().split()` can also be used to 'scrub' `to_abbreviate` and turn the results into a `list`. -The `.replace()` approach benchmarked faster than using `re.findall()`/`re.finditer()` to 'scrub', most likely due to overhead in importing the `re` module and in the [backtracking][backtracking] behavior of regex searching and matching. +`to_abbreviate.replace("_", " ").replace("-", " ").upper().split()` can also be used to clean `to_abbreviate` and turn the results into a `list`. +The `.replace()` approach benchmarked faster than using `re.findall()`/`re.finditer()` to clean, most likely due to overhead in importing the `re` module and in the [backtracking][backtracking] behavior of regex searching and matching. [backtracking]: https://stackoverflow.com/questions/9011592/in-regular-expressions-what-is-a-backtracking-back-referencing ~~~~ @@ -72,9 +72,9 @@ Once `findall()` or `finditer()` completes, a [`generator-expression`][generator Note that when using `finditer()`, the `Match object` has to be unpacked via `match.group(0)`/`match[0]` before the first letter can be selected. -Generator expressions are short-form [generators][generators] - lazy iterators that produce their values _on demand_, instead of saving them to memory. +Generator expressions are short-form [generators][generators] — lazy iterators that produce their values _on demand_, instead of saving them to memory. This generator expression is consumed by [`str.join()`][str-join], which joins the generated letters together using an empty string. -Other "separator" strings can be used with `str.join()` - see [concept:python/string-methods]() for some additional examples. +Other "separator" strings can be used with `str.join()` — see [concept:python/string-methods]() for some additional examples. Finally, the result of `.join()` is capitalized using the [chained][chaining] [`.upper()`][str-upper]. @@ -82,7 +82,7 @@ Alternatively, `.upper()` can be used on `to_abbreviate` within `findall()`/`fin Since the generator expression + join + upper is fairly succinct, they can be placed directly on the `return` line rather than assigning and returning an intermediate variable for the acronym. -This approach was less performant in benchmarks than those using `loop`, `map`, `list-comprehension`, and `reduce`. +This approach was less performant in benchmarks than those using `loop`, `map`, `list-comprehension`, and `reduce`. [chaining]: https://pyneng.readthedocs.io/en/latest/book/04_data_structures/method_chaining.html [generator-expression]: https://dbader.org/blog/python-generator-expressions @@ -90,7 +90,7 @@ This approach was less performant in benchmarks than those using `loop`, `map`, [lazy iterator]: https://www.pythonmorsels.com/what-is-an-iterator/ [re-findall]: https://docs.python.org/3/library/re.html#re.findall [re-finditer]: https://docs.python.org/3/library/re.html#re.finditer -[re]: https://docs.python.org/3/library/re.html +[re-module]: https://docs.python.org/3/library/re.html [regular expressions]: https://en.wikipedia.org/wiki/Regular_expression [str-join]: https://docs.python.org/3/library/stdtypes.html#str.join [str-upper]: https://docs.python.org/3/library/stdtypes.html#str.upper diff --git a/exercises/practice/acronym/.approaches/regex-join/snippet.txt b/exercises/practice/acronym/.approaches/regex-join/snippet.txt index 309665fdf22..a70bce6b54b 100644 --- a/exercises/practice/acronym/.approaches/regex-join/snippet.txt +++ b/exercises/practice/acronym/.approaches/regex-join/snippet.txt @@ -3,4 +3,4 @@ import re def abbreviate(phrase): removed = re.findall(r"[a-zA-Z']+", phrase) - return ''.join(word[0] for word in removed).upper() \ No newline at end of file + return "".join(word[0] for word in removed).upper() \ No newline at end of file diff --git a/exercises/practice/acronym/.approaches/regex-sub/content.md b/exercises/practice/acronym/.approaches/regex-sub/content.md index 4f1b447d81f..7e27a6d2ddd 100644 --- a/exercises/practice/acronym/.approaches/regex-sub/content.md +++ b/exercises/practice/acronym/.approaches/regex-sub/content.md @@ -1,4 +1,4 @@ -## Approach: use `re.sub` +# Approach: Use `re.sub()` ```python @@ -8,15 +8,15 @@ import re def abbreviate_regex_sub(to_abbreviate): pattern = re.compile(r"(? Date: Fri, 26 Jun 2026 20:10:28 -0400 Subject: [PATCH 2/5] update performance snippet to match article also correct a few statements about performance --- .../practice/acronym/.approaches/introduction.md | 5 +++-- .../acronym/.articles/performance/content.md | 12 +++++++----- .../acronym/.articles/performance/snippet.md | 16 ++++++++-------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/exercises/practice/acronym/.approaches/introduction.md b/exercises/practice/acronym/.approaches/introduction.md index 086f248661e..f95ed44f829 100644 --- a/exercises/practice/acronym/.approaches/introduction.md +++ b/exercises/practice/acronym/.approaches/introduction.md @@ -167,9 +167,10 @@ Of these strategies, the `loop` approach is the fastest, although `list-comprehe All approaches are fairly succinct and readable, although the 'classic' loop is probably the easiest understood by those coming to Python from other programming languages. -The least performant for the test data was using `generator-expression`s (both one and two), `re.findall`, and `re.sub`. +The least performant for the test data was the `re.findall`, `re.sub`, and double `generator-expression` approaches. + +To compare performance of the approaches, take a look at the [performance article][article-performance]. -To compare performance of the approaches, take a look at the [Performance article][article-performance]. [approach-double-generator-expression]: https://exercism.org/tracks/python/exercises/acronym/approaches/double-generator-expression [approach-functools-reduce]: https://exercism.org/tracks/python/exercises/acronym/approaches/functools-reduce diff --git a/exercises/practice/acronym/.articles/performance/content.md b/exercises/practice/acronym/.articles/performance/content.md index b286f1926cf..5d6ca8a255b 100644 --- a/exercises/practice/acronym/.articles/performance/content.md +++ b/exercises/practice/acronym/.articles/performance/content.md @@ -17,6 +17,7 @@ The [approaches page][approaches] lists many idiomatic approaches to this exerci We will also include a tenth approach, which is a variant of the `re.findall()` approach. This variant uses a regex to select the _first letters_ of each word, instead of the _entirety_ of each word. + ## Benchmarks To benchmark these approaches, we wrote a [small benchmarking script][benchmark-application] using the [`timeit`][timeit] module along with the third party libraries [`numpy`][numpy] and [`pandas`][pandas]. @@ -24,7 +25,7 @@ To benchmark these approaches, we wrote a [small benchmarking script][benchmark- All approaches are `O(n)` — they require (_at minimum_) a loop through the entire input to create results, and the work scales in line with the length of the function input. That doesn't mean that all of these approaches take the _same amount of time to run_. Despite being `O(n)`, overhead such as number of function calls, module importing/loading, regex backtracking, generator tracking, string concatenation, and `lambda` evaluation can add significant time. -Some of the slowest strategies (_mostly regex solutions_) are _**10 (or more) times times slower**_ than the fastest methods (_straight looping and list comprehensions_). +Some of the slowest strategies (_mostly regex solutions_) are _**5 (or more) times times slower**_ than the fastest methods (_straight looping and list comprehensions_). Of these variants, the `loop` approach is by far the fastest (and easiest to read) for inputs under length 45. Above length 45, repeated string creation and concatenation via `+` starts to slow things down, and the `list-comprehension` approach becomes more efficient due to its loop optimizations and use of `str.join()`. @@ -32,10 +33,10 @@ Above length 45, repeated string creation and concatenation via `+` starts to sl At the largest input sizes, `map()` and `generator expressions` become more efficient (_as does `functools.reduce()` for certain inputs_), as they are not saving intermediary results to memory in the same way `list comprehensions` or string concatenation do. -The least efficient and least readable are the regex solutions. +Among the least efficient and least readable are the regex solutions. While regex definitely has its place, the lack of readability and significant slowdown in this case become an issue. Of particular interest is the `re.sub()` vs `re.findall()` (_first letters_) solutions. -Even though the `re.sub()` solution takes only 652 steps in the regex engine, `re.sub()` and its unpacking is slow enough that the 1766 steps for the first letters `re.findall()` solution is faster. +Even though the `re.sub()` solution takes only 652 steps in the regex engine, `re.sub()` and its unpacking is slow enough that the 1766 steps for the first letters `re.findall()` solution is often faster. | **String Length >>>** | Length: 13 | Length: 14 | Length: 19 | Length: 20 | Length: 25 | Length: 30 | Length: 35 | Length: 39 | Length: 42 | Length: 45 | Length: 60 | Length: 63 | Length: 74 | Length: 78 | Length: 93 | Length: 108 | Length: 120 | Length: 140 | Length: 150 | Length: 200 | Length: 210 | Length: 225 | Length: 260 | Length: 310 | Length: 360 | Length: 400 | Length: 2940 | @@ -52,12 +53,13 @@ Even though the `re.sub()` solution takes only 652 steps in the regex engine, `r | two generator expressions | 1.14e-06 | 1.12e-06 | 1.47e-06 | 1.49e-06 | 1.79e-06 | 2.09e-06 | 2.48e-06 | 2.54e-06 | 2.90e-06 | 2.85e-06 | 3.87e-06 | 3.92e-06 | 4.79e-06 | 4.88e-06 | 5.81e-06 | 6.92e-06 | 7.14e-06 | 8.99e-06 | 8.80e-06 | 1.24e-05 | 1.26e-05 | 1.41e-05 | 1.53e-05 | 1.83e-05 | 2.21e-05 | 2.25e-05 | 1.65e-04 | -Keep in mind that all these approaches are very fast, and that [benchmarking at this granularity can be unstable, especially on modern CPUs][timeit-issue]. Note that there can also be [bias in benchmarking][biased-benchmarks]. +Keep in mind that all these approaches are very fast, and that [benchmarking at this granularity can be unstable, especially on modern CPUs][timeit-issue]. +Note that there can also be [bias in benchmarking][biased-benchmarks]. Measurements were taken on an M3 Mac running MacOS Sonoma. Tests used `timeit.Timer.autorange()`, repeated 3 times. Time is reported in seconds taken per string after calculating the 'best of' time. -The [timeit module][timeit] docs have more details, and [note.nkmk.me][note_nkmk_me] has a nice summary of methods. +The [`timeit` module][timeit] docs have more details, and [note.nkmk.me][note_nkmk_me] has a nice summary of methods. [approaches]: https://exercism.org/tracks/python/exercises/acronym/dig_deeper [approach-double-generator-expression]: https://exercism.org/tracks/python/exercises/acronym/approaches/double-generator-expression diff --git a/exercises/practice/acronym/.articles/performance/snippet.md b/exercises/practice/acronym/.articles/performance/snippet.md index 00e1067fd98..c9f18f2b0ab 100644 --- a/exercises/practice/acronym/.articles/performance/snippet.md +++ b/exercises/practice/acronym/.articles/performance/snippet.md @@ -1,8 +1,8 @@ -| | **Len: 13** | **Len: 30** | **Len: 74** | **Len: 210** | **Len: 2940** | -|------------------------------ |:-----------: |:-----------: |:-----------: |:------------: |:-------------: | -| **loop** | 5.79e-07 | 7.25e-07 | 1.83e-06 | 4.63e-06 | 5.94e-05 | -| **list_comprehension** | 7.28e-07 | 8.30e-07 | 1.76e-06 | 4.08e-06 | 5.42e-05 | -| **functools.reduce()** | 7.93e-07 | 9.56e-07 | 2.45e-06 | 6.03e-06 | 8.10e-05 | -| **map()** | 8.05e-07 | 9.16e-07 | 2.00e-06 | 4.81e-06 | 5.64e-05 | -| **re.findall() 1st letters** | 1.63e-06 | 2.50e-06 | 5.94e-06 | 1.54e-05 | 1.95e-04 | -| **re.sub()** | 2.35e-06 | 2.92e-06 | 6.90e-06 | 1.90e-05 | 2.03e-04 | \ No newline at end of file +| **String Length >>>** | Len: 13 | Len: 30 | Len: 74 | Len: 210 | Len: 2940 | +|:-----------------------------|------------:|------------:|------------:|------------:|------------:| +| **loop** | 2.78e-07 | 3.32e-07 | 8.26e-07 | 2.18e-06 | 2.51e-05 | +| **list comprehension** | 2.92e-07 | 3.29e-07 | 7.50e-07 | 1.85e-06 | 1.95e-05 | +| **map()** | 4.18e-07 | 4.88e-07 | 1.08e-06 | 2.65e-06 | 2.95e-05 | +| **functools.reduce()** | 4.35e-07 | 5.17e-07 | 1.40e-06 | 3.56e-06 | 4.30e-05 | +| **re.sub()** | 1.04e-06 | 1.29e-06 | 3.31e-06 | 9.03e-06 | 9.89e-05 | +| **re.findall() 1st letters** | 1.09e-06 | 1.61e-06 | 3.42e-06 | 9.05e-06 | 1.11e-04 | \ No newline at end of file From b50b857d4e18ddb3f7279cc2d7e9ade03f2c0e5f Mon Sep 17 00:00:00 2001 From: Yrahcaz7 <74512479+Yrahcaz7@users.noreply.github.com> Date: Fri, 26 Jun 2026 20:43:57 -0400 Subject: [PATCH 3/5] fix the intro present in most approaches also fix a few incorrect things in the regex approaches --- .../.approaches/functools-reduce/content.md | 19 +++++++++-------- .../.approaches/functools-reduce/snippet.txt | 2 +- .../generator-expression/content.md | 17 ++++++++------- .../acronym/.approaches/introduction.md | 4 ++-- .../.approaches/list-comprehension/content.md | 17 ++++++++------- .../acronym/.approaches/loop/content.md | 21 ++++++++++--------- .../.approaches/map-function/content.md | 18 +++++++++------- .../.approaches/map-function/snippet.txt | 2 +- .../acronym/.approaches/regex-join/content.md | 17 ++++++++------- .../acronym/.approaches/regex-sub/content.md | 10 ++++----- 10 files changed, 67 insertions(+), 60 deletions(-) diff --git a/exercises/practice/acronym/.approaches/functools-reduce/content.md b/exercises/practice/acronym/.approaches/functools-reduce/content.md index cd0e10725d4..96ee36a1c78 100644 --- a/exercises/practice/acronym/.approaches/functools-reduce/content.md +++ b/exercises/practice/acronym/.approaches/functools-reduce/content.md @@ -6,24 +6,25 @@ from functools import reduce def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace("_", " ").replace("-", " ").upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() return reduce(lambda start, word: start + word[0], phrase, "") ``` - -- This approach begins by using [`str.replace()`][str-replace] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. -- The phrase is then upper-cased by calling [`str.upper()`][str-upper], +- This approach begins by using [`str.replace()`][str-replace] on `to_abbreviate` to convert non-letter characters such as `-` and `_` into spaces. +- The phrase is then upper-cased by calling [`str.upper()`][str-upper]. - Finally, the phrase is turned into a `list` of words by calling [`str.split()`][str-split]. -The three methods above are all [chained][chaining] together, with the output of one method serving as the input to the next method in the "chain". -This works because both `replace()` and `upper()` return strings, and both `upper()` and `split()` take strings as arguments. -However, if `split()` were called first, `replace()` and `upper()` would fail, since neither method will take a `list` as input. +The three methods above are all [chained][chaining] together, with each method operating on the output of the method before it in the "chain". +This works because both `replace()` and `upper()` _operate on_ strings (as they are `str` methods) and _return_ strings. +If `split()` was called first, `replace()` and `upper()` would fail, since they cannot operate on the `list` returned by `split()`. ~~~~exercism/note -`re.findall()` or `re.finditer()` can also be used to "clean" `to_abbreviate`. -These two methods from the `re` module will return a `list` or a lazy `iterator` of results, respectively. +`re.findall()` or `re.finditer()` can also be used to clean `to_abbreviate`. +These two methods from the [`re` module][re-module] will return a `list` or a lazy `iterator` of results, respectively. As of this writing, both of these methods benchmark slower than using `str.replace()` for cleaning. + +[re-module]: https://docs.python.org/3/library/re.html ~~~~ diff --git a/exercises/practice/acronym/.approaches/functools-reduce/snippet.txt b/exercises/practice/acronym/.approaches/functools-reduce/snippet.txt index 190d5d4aeff..adda726b2ab 100644 --- a/exercises/practice/acronym/.approaches/functools-reduce/snippet.txt +++ b/exercises/practice/acronym/.approaches/functools-reduce/snippet.txt @@ -1,6 +1,6 @@ from functools import reduce def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace("_", " ").replace("-", " ").upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() return reduce(lambda start, word: start + word[0], phrase, "") \ No newline at end of file diff --git a/exercises/practice/acronym/.approaches/generator-expression/content.md b/exercises/practice/acronym/.approaches/generator-expression/content.md index 92bcfb4bf2c..ad6ef3c43b0 100644 --- a/exercises/practice/acronym/.approaches/generator-expression/content.md +++ b/exercises/practice/acronym/.approaches/generator-expression/content.md @@ -9,19 +9,20 @@ def abbreviate(to_abbreviate): return "".join(word[0] for word in phrase) ``` - -- This approach begins by using [`str.replace()`][str-replace] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. -- The phrase is then upper-cased by calling [`str.upper()`][str-upper], +- This approach begins by using [`str.replace()`][str-replace] on `to_abbreviate` to convert non-letter characters such as `-` and `_` into spaces. +- The phrase is then upper-cased by calling [`str.upper()`][str-upper]. - Finally, the phrase is turned into a `list` of words by calling [`str.split()`][str-split]. -The three methods above are all [chained][chaining] together, with the output of one method serving as the input to the next method in the "chain". -This works because both `replace()` and `upper()` return strings, and both `upper()` and `split()` take strings as arguments. -However, if `split()` were called first, `replace()` and `upper()` would fail, since neither method will take a `list` as input. +The three methods above are all [chained][chaining] together, with each method operating on the output of the method before it in the "chain". +This works because both `replace()` and `upper()` _operate on_ strings (as they are `str` methods) and _return_ strings. +If `split()` was called first, `replace()` and `upper()` would fail, since they cannot operate on the `list` returned by `split()`. ~~~~exercism/note -`re.findall()` or `re.finditer()` can also be used to "clean" `to_abbreviate`. -These two methods from the `re` module will return a `list` or a lazy `iterator` of results, respectively. +`re.findall()` or `re.finditer()` can also be used to clean `to_abbreviate`. +These two methods from the [`re` module][re-module] will return a `list` or a lazy `iterator` of results, respectively. As of this writing, both of these methods benchmark slower than using `str.replace()` for cleaning. + +[re-module]: https://docs.python.org/3/library/re.html ~~~~ diff --git a/exercises/practice/acronym/.approaches/introduction.md b/exercises/practice/acronym/.approaches/introduction.md index f95ed44f829..15746d0de64 100644 --- a/exercises/practice/acronym/.approaches/introduction.md +++ b/exercises/practice/acronym/.approaches/introduction.md @@ -77,7 +77,7 @@ For more information, check out the [list comprehension][approach-list-comprehen ```python def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace("_", " ").replace("-", " ").upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() return "".join(map(lambda word: word[0], phrase)) ``` @@ -92,7 +92,7 @@ from functools import reduce def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace("_", " ").replace("-", " ").upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() return reduce(lambda start, word: start + word[0], phrase, "") ``` diff --git a/exercises/practice/acronym/.approaches/list-comprehension/content.md b/exercises/practice/acronym/.approaches/list-comprehension/content.md index 1c320171a1d..1f0b123ffb1 100644 --- a/exercises/practice/acronym/.approaches/list-comprehension/content.md +++ b/exercises/practice/acronym/.approaches/list-comprehension/content.md @@ -7,19 +7,20 @@ def abbreviate(to_abbreviate): return "".join([word[0] for word in phrase]) ``` -- This approach begins by using [`str.replace()`][str-replace] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. -- The phrase is then upper-cased by calling [`str.upper()`][str-upper], +- This approach begins by using [`str.replace()`][str-replace] on `to_abbreviate` to convert non-letter characters such as `-` and `_` into spaces. +- The phrase is then upper-cased by calling [`str.upper()`][str-upper]. - Finally, the phrase is turned into a `list` of words by calling [`str.split()`][str-split]. -The three methods above are all [chained][chaining] together, with the output of one method serving as the input to the next method in the "chain". -This works because both `replace()` and `upper()` return strings, and both `upper()` and `split()` _take_ strings as arguments. -However, if `split()` were called first, `replace()` and `upper()` would fail, since neither method will take a `list` as input. - +The three methods above are all [chained][chaining] together, with each method operating on the output of the method before it in the "chain". +This works because both `replace()` and `upper()` _operate on_ strings (as they are `str` methods) and _return_ strings. +If `split()` was called first, `replace()` and `upper()` would fail, since they cannot operate on the `list` returned by `split()`. ~~~~exercism/note -`re.findall()` or `re.finditer()` can also be used to "clean" `to_abbreviate`. -These two methods from the `re` module will return a `list` or a lazy `iterator` of results, respectively. +`re.findall()` or `re.finditer()` can also be used to clean `to_abbreviate`. +These two methods from the [`re` module][re-module] will return a `list` or a lazy `iterator` of results, respectively. As of this writing, both of these methods benchmark slower than using `str.replace()` for cleaning. + +[re-module]: https://docs.python.org/3/library/re.html ~~~~ diff --git a/exercises/practice/acronym/.approaches/loop/content.md b/exercises/practice/acronym/.approaches/loop/content.md index 341513b9570..8bcf47754cb 100644 --- a/exercises/practice/acronym/.approaches/loop/content.md +++ b/exercises/practice/acronym/.approaches/loop/content.md @@ -12,29 +12,30 @@ def abbreviate(to_abbreviate): return acronym ``` - -- This approach begins by using [`str.replace()`][str-replace] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. -- The phrase is then upper-cased by calling [`str.upper()`][str-upper], +- This approach begins by using [`str.replace()`][str-replace] on `to_abbreviate` to convert non-letter characters such as `-` and `_` into spaces. +- The phrase is then upper-cased by calling [`str.upper()`][str-upper]. - Finally, the phrase is turned into a `list` of words by calling [`str.split()`][str-split]. -The three methods above are all [chained][chaining] together, with the output of one method serving as the input to the next method in the "chain". -This works because both `replace()` and `upper()` return strings, and both `upper()` and `split()` take strings as arguments. -However, if `split()` were called first, `replace()` and `upper()` would fail, since neither method will take a `list` as input. +The three methods above are all [chained][chaining] together, with each method operating on the output of the method before it in the "chain". +This works because both `replace()` and `upper()` _operate on_ strings (as they are `str` methods) and _return_ strings. +If `split()` was called first, `replace()` and `upper()` would fail, since they cannot operate on the `list` returned by `split()`. After the phrase is cleaned and split into a word list, we declare an empty `acronym` string to hold our final acronym. The phrase `list` is then looped over via `for word in phrase`. -The first letter of each word is selected via [`bracket notation`][subscript notation], and concatenated via `+` to the `acronym` string. +The first letter of each word is selected via [`bracket notation`][subscript-notation], and concatenated via `+` to the `acronym` string. When the loop is complete, `acronym` is returned from the function. ~~~~exercism/note -`re.findall()` or `re.finditer()` can also be used to "clean" `to_abbreviate`. -These two methods from the `re` module will return a `list` or a lazy `iterator` of results, respectively. +`re.findall()` or `re.finditer()` can also be used to clean `to_abbreviate`. +These two methods from the [`re` module][re-module] will return a `list` or a lazy `iterator` of results, respectively. As of this writing, both of these methods benchmark slower than using `str.replace()` for cleaning. + +[re-module]: https://docs.python.org/3/library/re.html ~~~~ [chaining]: https://pyneng.readthedocs.io/en/latest/book/04_data_structures/method_chaining.html [str-replace]: https://docs.python.org/3/library/stdtypes.html#str.replace [str-split]: https://docs.python.org/3/library/stdtypes.html#str.split [str-upper]: https://docs.python.org/3/library/stdtypes.html#str.upper -[subscript notation]: https://docs.python.org/3/glossary.html#term-slice +[subscript-notation]: https://docs.python.org/3/glossary.html#term-slice diff --git a/exercises/practice/acronym/.approaches/map-function/content.md b/exercises/practice/acronym/.approaches/map-function/content.md index d3514235403..029f05f1758 100644 --- a/exercises/practice/acronym/.approaches/map-function/content.md +++ b/exercises/practice/acronym/.approaches/map-function/content.md @@ -3,23 +3,25 @@ ```python def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace("_", " ").replace("-", " ").upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() return "".join(map(lambda word: word[0], phrase)) ``` -- This approach begins by using [`str.replace()`][str-replace] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. -- The phrase is then upper-cased by calling [`str.upper()`][str-upper], +- This approach begins by using [`str.replace()`][str-replace] on `to_abbreviate` to convert non-letter characters such as `-` and `_` into spaces. +- The phrase is then upper-cased by calling [`str.upper()`][str-upper]. - Finally, the phrase is turned into a `list` of words by calling [`str.split()`][str-split]. -The three methods above are all [chained][chaining] together, with the output of one method serving as the input to the next method in the "chain". -This works because both `replace()` and `upper()` return strings, and both `upper()` and `split()` take strings as arguments. -However, if `split()` were called first, `replace()` and `upper()` would fail, since neither method will take a `list` as input. +The three methods above are all [chained][chaining] together, with each method operating on the output of the method before it in the "chain". +This works because both `replace()` and `upper()` _operate on_ strings (as they are `str` methods) and _return_ strings. +If `split()` was called first, `replace()` and `upper()` would fail, since they cannot operate on the `list` returned by `split()`. ~~~~exercism/note -`re.findall()` or `re.finditer()` can also be used to "clean" `to_abbreviate`. -These two methods from the `re` module will return a `list` or a lazy `iterator` of results, respectively. +`re.findall()` or `re.finditer()` can also be used to clean `to_abbreviate`. +These two methods from the [`re` module][re-module] will return a `list` or a lazy `iterator` of results, respectively. As of this writing, both of these methods benchmark slower than using `str.replace()` for cleaning. + +[re-module]: https://docs.python.org/3/library/re.html ~~~~ diff --git a/exercises/practice/acronym/.approaches/map-function/snippet.txt b/exercises/practice/acronym/.approaches/map-function/snippet.txt index 33216ad7ec0..cd910ee892c 100644 --- a/exercises/practice/acronym/.approaches/map-function/snippet.txt +++ b/exercises/practice/acronym/.approaches/map-function/snippet.txt @@ -1,4 +1,4 @@ def abbreviate(to_abbreviate): - phrase = to_abbreviate.replace("_", " ").replace("-", " ").upper().split() + phrase = to_abbreviate.replace("-", " ").replace("_", " ").upper().split() return "".join(map(lambda word: word[0], phrase)) \ No newline at end of file diff --git a/exercises/practice/acronym/.approaches/regex-join/content.md b/exercises/practice/acronym/.approaches/regex-join/content.md index b9f133b0bd5..d621c64f209 100644 --- a/exercises/practice/acronym/.approaches/regex-join/content.md +++ b/exercises/practice/acronym/.approaches/regex-join/content.md @@ -8,9 +8,9 @@ import re def abbreviate(to_abbreviate): # Capitalize the input before cleaning. - removed = re.findall(r"[a-zA-Z']+", to_abbreviate.upper()) + cleaned = re.findall(r"[a-zA-Z']+", to_abbreviate.upper()) - return "".join(word[0] for word in removed) + return "".join(word[0] for word in cleaned) #OR# @@ -23,11 +23,11 @@ def abbreviate(to_abbreviate): def abbreviate(to_abbreviate): # Capitalize the input before cleaning. - removed = re.finditer(r"[a-zA-Z']+", to_abbreviate.upper()) + cleaned = re.finditer(r"[a-zA-Z']+", to_abbreviate.upper()) # word.group(0)[0] (first letter of Matched word) can also be written as # word[0][0], with the first bracketed number referring to Match group 0. - return "".join(word.group(0)[0] for word in removed) + return "".join(word.group(0)[0] for word in cleaned) #OR# @@ -39,7 +39,8 @@ def abbreviate(to_abbreviate): ``` -This approach begins by using [`re.findall()`][re-findall] method from the [`re` module][re-module] to remove non-letter characters such as `'`, `-`, `_`, and whitespace from `to_abbreviate`. +This approach begins by using the [`re.findall()`][re-findall] method from the [`re` module][re-module] to clean `to_abbreviate` and split it into words. + Python's `re` module provides support for [regular expressions][regular expressions] within the language, and has many useful methods for searching, parsing, and modifying text. Regular expression matching starts at the left-hand side of the input and travels toward the right. @@ -47,8 +48,8 @@ Regular expression matching starts at the left-hand side of the input and travel `re.findall()` searches text for all matching patterns, returning results (_including 'empty' matches_) in a `list` of strings. -The [`re.finditer()`][re-finditer] method works in the same fashion as `re.findall()`, but returns results as a _[lazy iterator][lazy iterator]_ over [Match objects][match objects]. -This means that `re.finditer()` produces matches _on demand_ instead of saving them to memory, but needs to have both the iterator and the Match objects _unpacked_. +The [`re.finditer()`][re-finditer] method works in the same fashion as `re.findall()`, but returns results as a _[lazy iterator][lazy iterator]_ over [`Match` objects][match objects]. +This means that `re.finditer()` produces matches _on demand_ instead of saving them to memory, but needs to have both the iterator and the `Match` objects _unpacked_. The regular expression `r[a-zA-Z']+` in the code example looks for any single character in the range `a-z` lowercase and `A-Z` uppercase, plus the `'` (_apostrophe_) character. @@ -61,7 +62,7 @@ The result returned by `findall()` will then be `["Complementary", "metal", "oxi ~~~~exercism/note -`to_abbreviate.replace("_", " ").replace("-", " ").upper().split()` can also be used to clean `to_abbreviate` and turn the results into a `list`. +`to_abbreviate.replace("-", " ").replace("_", " ").upper().split()` can also be used to clean `to_abbreviate` and turn the results into a `list`. The `.replace()` approach benchmarked faster than using `re.findall()`/`re.finditer()` to clean, most likely due to overhead in importing the `re` module and in the [backtracking][backtracking] behavior of regex searching and matching. [backtracking]: https://stackoverflow.com/questions/9011592/in-regular-expressions-what-is-a-backtracking-back-referencing diff --git a/exercises/practice/acronym/.approaches/regex-sub/content.md b/exercises/practice/acronym/.approaches/regex-sub/content.md index 7e27a6d2ddd..0e843d0c7b8 100644 --- a/exercises/practice/acronym/.approaches/regex-sub/content.md +++ b/exercises/practice/acronym/.approaches/regex-sub/content.md @@ -16,7 +16,7 @@ def abbreviate_regex_sub(to_abbreviate): return re.sub(r"(? Date: Fri, 26 Jun 2026 21:29:21 -0400 Subject: [PATCH 4/5] improve clarity and fix rendering issues --- .../.approaches/functools-reduce/content.md | 4 ++-- .../generator-expression/content.md | 2 +- .../.approaches/list-comprehension/content.md | 2 +- .../.approaches/map-function/content.md | 8 +++---- .../acronym/.approaches/regex-join/content.md | 8 +++---- .../acronym/.approaches/regex-sub/content.md | 22 +++++++++---------- 6 files changed, 23 insertions(+), 23 deletions(-) diff --git a/exercises/practice/acronym/.approaches/functools-reduce/content.md b/exercises/practice/acronym/.approaches/functools-reduce/content.md index 96ee36a1c78..7020424243d 100644 --- a/exercises/practice/acronym/.approaches/functools-reduce/content.md +++ b/exercises/practice/acronym/.approaches/functools-reduce/content.md @@ -38,10 +38,10 @@ The application of the function travels from left to right, so that the iterable Using code from the example above, `reduce(lambda start, word: start + word[0], ["GNU", "IMAGE", "MANIPULATION", "PROGRAM"])` would calculate `((("GNU"[0] + "IMAGE"[0]) + "MANIPULATION"[0]) + "PROGRAM"[0])`, or `GIMP`. The left argument, `start`, is the _accumulated value_ and the right argument, `word`, is the value from the iterable that is used to update the accumulated 'total'. -The optional 'initializer' value `""` is used here, and is placed ahead/before the items of the iterable in the calculation, and serves as a default if the iterable that is passed is empty. +The optional 'initializer' value `""` is used here, and is placed before the items of the iterable in the calculation, and serves as a default if the iterable that is passed is empty. -Since using `reduce()` is fairly succinct, it is put directly on the `return` line to produce the acronym rather than assigning and returning an intermediate variable. +Since using `reduce()` is fairly succinct, it is put directly on the `return` line to produce the acronym, rather than assigning and returning an intermediate variable. In benchmarks, this solution performed about as well as both the `loops` and the `list-comprehension` solutions. diff --git a/exercises/practice/acronym/.approaches/generator-expression/content.md b/exercises/practice/acronym/.approaches/generator-expression/content.md index ad6ef3c43b0..19a1e7e55a3 100644 --- a/exercises/practice/acronym/.approaches/generator-expression/content.md +++ b/exercises/practice/acronym/.approaches/generator-expression/content.md @@ -32,7 +32,7 @@ A [`generator-expression`][generator-expression] is then used to iterate through Generator expressions are short-form [generators][generators] — lazy iterators that produce their values _on demand_, instead of saving them to memory. This generator expression is consumed by [`str.join()`][str-join], which joins the generated letters together using an empty string. Other "separator" strings can be used with `str.join()` — see [concept:python/string-methods]() for some additional examples. -Since the generator expression and `join()` are fairly succinct, they are put directly on the `return` line rather than assigning and returning an intermediate variable for the acronym. +Since the generator expression and `join()` are fairly succinct, they are put directly on the `return` line, rather than assigning and returning an intermediate variable for the acronym. In benchmarks, this solution was surprisingly slower than the `list comprehension` version. diff --git a/exercises/practice/acronym/.approaches/list-comprehension/content.md b/exercises/practice/acronym/.approaches/list-comprehension/content.md index 1f0b123ffb1..7671d1c3262 100644 --- a/exercises/practice/acronym/.approaches/list-comprehension/content.md +++ b/exercises/practice/acronym/.approaches/list-comprehension/content.md @@ -27,7 +27,7 @@ As of this writing, both of these methods benchmark slower than using `str.repla A [`list comprehension`][list comprehension] is then used to iterate through the phrase and select the first letters of each word via [`bracket notation`][subscript notation]. This comprehension is passed into [`str.join()`][str-join], which unpacks the `list` of first letters and joins them together using an empty string — the acronym. Other "separator" strings besides an empty string can be used with `str.join()` — see [concept:python/string-methods]() for some additional examples. -Since the comprehension and `join()` are fairly succinct, they are put directly on the `return` line rather than assigning and returning an intermediate variable for the acronym. +Since the comprehension and `join()` are fairly succinct, they are put directly on the `return` line, rather than assigning and returning an intermediate variable for the acronym. The weakness of this solution is that it is taking up extra space with the `list comprehension`, which is creating and saving a `list` in memory — only to have that list immediately unpacked by the `str.join()` method. diff --git a/exercises/practice/acronym/.approaches/map-function/content.md b/exercises/practice/acronym/.approaches/map-function/content.md index 029f05f1758..dbc948ecc51 100644 --- a/exercises/practice/acronym/.approaches/map-function/content.md +++ b/exercises/practice/acronym/.approaches/map-function/content.md @@ -30,15 +30,15 @@ Once the phrase is cleaned and turned into a word `list`, the acronym is created The application of the function travels from left to right, and function results are produced as needed. -Using code from the example above, `map(lambda word: word[0], ["GNU", "IMAGE", "MANIPULATION", "PROGRAM"])` would calculate `"GNU"[0], "IMAGE"[0], "MANIPULATION"[0]), "PROGRAM"[0]` in order as a stream of data. +Using code from the example above, `map(lambda word: word[0], ["GNU", "IMAGE", "MANIPULATION", "PROGRAM"])` would calculate `"GNU"[0], "IMAGE"[0], "MANIPULATION"[0], "PROGRAM"[0]` in order as a stream of data. `word[0]` is the function, which extracts the letter at index zero for every word in the phrase list. This stream of data can then be 'consumed' — either in a `loop`, or by being 'unpacked' by another function or process. -Here, the `iterator` from `map()` is immediately consumed/unpacked by [`join()`][str-join], which glues the results together with an empty string to produce the acronym. +Here, the `iterator` from `map()` is immediately consumed/unpacked by [`str.join()`][str-join], which glues the results together with an empty string to produce the acronym. -Since using `join()` with `map()` is fairly succinct, the combination is put directly on the `return` line to produce the acronym rather than assigning and returning an intermediate variable. +Since using `join()` with `map()` is fairly succinct, the combination is put directly on the `return` line to produce the acronym, rather than assigning and returning an intermediate variable. -In benchmarks, this solution performed about as well as the `loops`, `reduce` and `list-comprehension` solutions. +In benchmarks, this solution performed about as well as the `loops`, `reduce` and `list-comprehension` approaches. [chaining]: https://pyneng.readthedocs.io/en/latest/book/04_data_structures/method_chaining.html [lazy iterator]: https://www.pythonmorsels.com/what-is-an-iterator/ diff --git a/exercises/practice/acronym/.approaches/regex-join/content.md b/exercises/practice/acronym/.approaches/regex-join/content.md index d621c64f209..b2ee28bb4bb 100644 --- a/exercises/practice/acronym/.approaches/regex-join/content.md +++ b/exercises/practice/acronym/.approaches/regex-join/content.md @@ -54,7 +54,7 @@ This means that `re.finditer()` produces matches _on demand_ instead of saving t The regular expression `r[a-zA-Z']+` in the code example looks for any single character in the range `a-z` lowercase and `A-Z` uppercase, plus the `'` (_apostrophe_) character. The `+` operator is a 'greedy' modifier that matches the previous range one to unlimited times. -This means that the expression will match any collection or repeat of letters (_word_), but will omit matching on any sort of space or 'non-letter' character, such as `\t`, `\n`, ` `, `_`, or `-`. +This means that the expression will match any collection or repeat of letters (_a word_), but will not match any sort of space or 'non-letter' character, such as a tab, space, hyphen, or underscore. For example, in `Complementary metal-oxide semiconductor`, the regex will match `Complementary`, `metal`, `oxide`, and `semiconductor`. The regex will not match on ` ` or `-`. @@ -78,9 +78,9 @@ This generator expression is consumed by [`str.join()`][str-join], which joins t Other "separator" strings can be used with `str.join()` — see [concept:python/string-methods]() for some additional examples. -Finally, the result of `.join()` is capitalized using the [chained][chaining] [`.upper()`][str-upper]. -Alternatively, `.upper()` can be used on `to_abbreviate` within `findall()`/`finditer()`, to uppercase the input before cleaning. -Since the generator expression + join + upper is fairly succinct, they can be placed directly on the `return` line rather than assigning and returning an intermediate variable for the acronym. +Finally, the result of `.join()` is capitalized using the [chained][chaining] [`str.upper()`][str-upper]. +Alternatively, `str.upper()` can be used on `to_abbreviate` within `findall()`/`finditer()`, to uppercase the input before cleaning. +Since the solution is fairly succinct, it can be condensed onto the `return` line, rather than assigning and returning an intermediate variable for the acronym. This approach was less performant in benchmarks than those using `loop`, `map`, `list-comprehension`, and `reduce`. diff --git a/exercises/practice/acronym/.approaches/regex-sub/content.md b/exercises/practice/acronym/.approaches/regex-sub/content.md index 0e843d0c7b8..92fa9469ed0 100644 --- a/exercises/practice/acronym/.approaches/regex-sub/content.md +++ b/exercises/practice/acronym/.approaches/regex-sub/content.md @@ -16,7 +16,7 @@ def abbreviate_regex_sub(to_abbreviate): return re.sub(r"(? Date: Fri, 26 Jun 2026 23:03:05 -0400 Subject: [PATCH 5/5] Separate `re.finditer()` into a variation also add username to contributors array --- .../practice/acronym/.approaches/config.json | 21 ++++-- .../acronym/.approaches/regex-join/content.md | 72 +++++++++++-------- 2 files changed, 55 insertions(+), 38 deletions(-) diff --git a/exercises/practice/acronym/.approaches/config.json b/exercises/practice/acronym/.approaches/config.json index 7959ea68868..5da9778478c 100644 --- a/exercises/practice/acronym/.approaches/config.json +++ b/exercises/practice/acronym/.approaches/config.json @@ -9,49 +9,56 @@ "slug": "functools-reduce", "title": "Functools Reduce", "blurb": "Use functools.reduce() to form an acronym from text cleaned using str.replace().", - "authors": ["bethanyg"] + "authors": ["bethanyg"], + "contributors": ["yrahcaz7"] }, { "uuid": "d568ea30-b839-46ad-9c9b-73321a274325", "slug": "generator-expression", "title": "Generator Expression", "blurb": "Use a generator expression with str.join() to form an acronym from text cleaned using str.replace().", - "authors": ["bethanyg"] + "authors": ["bethanyg"], + "contributors": ["yrahcaz7"] }, { "uuid": "da53b1bc-35c7-47a7-88d5-56ebb9d3658d", "slug": "list-comprehension", "title": "List Comprehension", "blurb": "Use a list comprehension with str.join() to form an acronym from text cleaned using str.replace().", - "authors": ["bethanyg"] + "authors": ["bethanyg"], + "contributors": ["yrahcaz7"] }, { "uuid": "abd51d7d-3743-448d-b8f1-49f484ae6b30", "slug": "loop", "title": "Loop", "blurb": "Use str.replace() to clean the input string and a loop with string concatenation to form the acronym.", - "authors": ["bethanyg"] + "authors": ["bethanyg"], + "contributors": ["yrahcaz7"] }, { "uuid": "9eee8db9-80f8-4ee4-aaaf-e55b78221283", "slug": "map-function", "title": "Map Built-in", "blurb": "Use the built-in map() function to form an acronym after cleaning the input string with str.replace().", - "authors": ["bethanyg"] + "authors": ["bethanyg"], + "contributors": ["yrahcaz7"] }, { "uuid": "8f4dc8ba-fd1c-4c85-bcc3-8ef9dca34c7f", "slug": "regex-join", "title": "Regex join", "blurb": "Use regex to clean the input string and form the acronym with str.join().", - "authors": ["bethanyg"] + "authors": ["bethanyg"], + "contributors": ["yrahcaz7"] }, { "uuid": "8830be43-44c3-45ab-8311-f588f60dfc5f", "slug": "regex-sub", "title": "Regex Sub", "blurb": "Use re.sub() to clean the input string and create the acronym in one step.", - "authors": ["bethanyg"] + "authors": ["bethanyg"], + "contributors": ["yrahcaz7"] }, { "uuid": "0ce3eaf7-da79-403d-a481-5dd8f476d286", diff --git a/exercises/practice/acronym/.approaches/regex-join/content.md b/exercises/practice/acronym/.approaches/regex-join/content.md index b2ee28bb4bb..45d4e5508f8 100644 --- a/exercises/practice/acronym/.approaches/regex-join/content.md +++ b/exercises/practice/acronym/.approaches/regex-join/content.md @@ -1,10 +1,9 @@ -# Approach: filter with `re.findall()` and join via `str.join()` +# Approach: Filter with `re.findall()` and join via `str.join()` ```python import re -###re.findall### def abbreviate(to_abbreviate): # Capitalize the input before cleaning. @@ -18,24 +17,6 @@ def abbreviate(to_abbreviate): # Capitalize the result after joining. return "".join(word[0] for word in re.findall(r"[a-zA-Z']+", to_abbreviate)).upper() - -###re.finditer### - -def abbreviate(to_abbreviate): - # Capitalize the input before cleaning. - cleaned = re.finditer(r"[a-zA-Z']+", to_abbreviate.upper()) - - # word.group(0)[0] (first letter of Matched word) can also be written as - # word[0][0], with the first bracketed number referring to Match group 0. - return "".join(word.group(0)[0] for word in cleaned) - -#OR# - -def abbreviate(to_abbreviate): - # Capitalize the output after joining. - # Use bracket notation for Match group. - return "".join(word[0][0] for word in - re.finditer(r"[a-zA-Z']+", to_abbreviate)).upper() ``` @@ -48,43 +29,72 @@ Regular expression matching starts at the left-hand side of the input and travel `re.findall()` searches text for all matching patterns, returning results (_including 'empty' matches_) in a `list` of strings. -The [`re.finditer()`][re-finditer] method works in the same fashion as `re.findall()`, but returns results as a _[lazy iterator][lazy iterator]_ over [`Match` objects][match objects]. -This means that `re.finditer()` produces matches _on demand_ instead of saving them to memory, but needs to have both the iterator and the `Match` objects _unpacked_. - - -The regular expression `r[a-zA-Z']+` in the code example looks for any single character in the range `a-z` lowercase and `A-Z` uppercase, plus the `'` (_apostrophe_) character. +The regular expression `[a-zA-Z']+` in the code example looks for any single character in the range `a-z` (_lowercase_) and `A-Z` (_uppercase_), plus the `'` (_apostrophe_) character. The `+` operator is a 'greedy' modifier that matches the previous range one to unlimited times. This means that the expression will match any collection or repeat of letters (_a word_), but will not match any sort of space or 'non-letter' character, such as a tab, space, hyphen, or underscore. For example, in `Complementary metal-oxide semiconductor`, the regex will match `Complementary`, `metal`, `oxide`, and `semiconductor`. -The regex will not match on ` ` or `-`. +The regex will not match any of the spaces or the hyphen (`-`). The result returned by `findall()` will then be `["Complementary", "metal", "oxide", "semiconductor"]`. ~~~~exercism/note `to_abbreviate.replace("-", " ").replace("_", " ").upper().split()` can also be used to clean `to_abbreviate` and turn the results into a `list`. -The `.replace()` approach benchmarked faster than using `re.findall()`/`re.finditer()` to clean, most likely due to overhead in importing the `re` module and in the [backtracking][backtracking] behavior of regex searching and matching. +The `.replace()` approach benchmarked faster than using `re.findall()` to clean, most likely due to overhead in importing the `re` module and in the [backtracking][backtracking] behavior of regex searching and matching. [backtracking]: https://stackoverflow.com/questions/9011592/in-regular-expressions-what-is-a-backtracking-back-referencing ~~~~ -Once `findall()` or `finditer()` completes, a [`generator-expression`][generator-expression] is used to iterate through the results and select the first letters of each word via [`bracket notation`][subscript notation]. -Note that when using `finditer()`, the `Match object` has to be unpacked via `match.group(0)`/`match[0]` before the first letter can be selected. +Once `findall()` completes, a [`generator-expression`][generator-expression] is used to iterate through the results and select the first letters of each word via [`bracket notation`][subscript notation]. -Generator expressions are short-form [generators][generators] — lazy iterators that produce their values _on demand_, instead of saving them to memory. +Generator expressions are short-form [generators][generators] — [lazy iterators][lazy iterator] that produce their values _on demand_, instead of saving them to memory. This generator expression is consumed by [`str.join()`][str-join], which joins the generated letters together using an empty string. Other "separator" strings can be used with `str.join()` — see [concept:python/string-methods]() for some additional examples. Finally, the result of `.join()` is capitalized using the [chained][chaining] [`str.upper()`][str-upper]. -Alternatively, `str.upper()` can be used on `to_abbreviate` within `findall()`/`finditer()`, to uppercase the input before cleaning. +Alternatively, `str.upper()` can be used on `to_abbreviate` within `findall()`, to uppercase the input before cleaning. Since the solution is fairly succinct, it can be condensed onto the `return` line, rather than assigning and returning an intermediate variable for the acronym. This approach was less performant in benchmarks than those using `loop`, `map`, `list-comprehension`, and `reduce`. + +## Variation 1: `re.finditer()` + + +```python +import re + + +def abbreviate(to_abbreviate): + # Capitalize the input before cleaning. + cleaned = re.finditer(r"[a-zA-Z']+", to_abbreviate.upper()) + + # word.group(0)[0] (first letter of Matched word) can also be written as + # word[0][0], with the first bracketed number referring to Match group 0. + return "".join(word.group(0)[0] for word in cleaned) + +#OR# + +def abbreviate(to_abbreviate): + # Capitalize the output after joining. + # Use bracket notation for Match group. + return "".join(word[0][0] for word in + re.finditer(r"[a-zA-Z']+", to_abbreviate)).upper() +``` + + +This variant uses [`re.finditer()`][re-finditer] for cleaning instead of `re.findall()`. + +The `re.finditer()` method works in the same fashion as `re.findall()`, but it returns results as a _[lazy iterator][lazy iterator]_ over [`Match` objects][match objects]. +This means that `re.finditer()` produces matches _on demand_ instead of saving them to memory, but needs to have both the iterator and the `Match` objects _unpacked_. + +Due to this, the generator expression was modified to unpack the `Match` objects via `word.group(0)` (or `word[0]`) before the first letter is selected. + + [chaining]: https://pyneng.readthedocs.io/en/latest/book/04_data_structures/method_chaining.html [generator-expression]: https://dbader.org/blog/python-generator-expressions [generators]: https://dbader.org/blog/python-generators