Back ported a lot of changes made to the Koans directory.
[ruby_koans.git] / src / about_regular_expressions.rb
1 # -*- coding: utf-8 -*-
2 require File.expand_path(File.dirname(__FILE__) + '/edgecase')
3
4 class AboutRegularExpressions < EdgeCase::Koan
5   def test_a_pattern_is_a_regular_expression
6     assert_equal __(Regexp), /pattern/.class
7   end
8
9   def test_a_regexp_can_search_a_string_for_matching_content
10     assert_equal __("match"), "some matching content"[/match/]
11   end
12
13   def test_a_failed_match_returns_nil
14     assert_equal __(nil), "some matching content"[/missing/]
15   end
16
17   # ------------------------------------------------------------------
18
19   def test_question_mark_means_optional
20     assert_equal __("ab"), "abbcccddddeeeee"[/ab?/]
21     assert_equal __("a"), "abbcccddddeeeee"[/az?/]
22   end
23
24   def test_plus_means_one_or_more
25     assert_equal __("bccc"), "abbcccddddeeeee"[/bc+/]
26   end
27
28   def test_asterisk_means_zero_or_more
29     assert_equal __("abb"), "abbcccddddeeeee"[/ab*/]
30     assert_equal __("a"), "abbcccddddeeeee"[/az*/]
31     assert_equal __(""), "abbcccddddeeeee"[/z*/]
32
33     # THINK ABOUT IT:
34     #
35     # When would * fail to match?
36   end
37
38   # THINK ABOUT IT:
39   #
40   # We say that the repetition operators above are "greedy."
41   #
42   # Why?
43
44   # ------------------------------------------------------------------
45
46   def test_the_left_most_match_wins
47     assert_equal __("a"), "abbccc az"[/az*/]
48   end
49
50   # ------------------------------------------------------------------
51
52   def test_character_classes_give_options_for_a_character
53     animals = ["cat", "bat", "rat", "zat"]
54     assert_equal __(["cat", "bat", "rat"]), animals.select { |a| a[/[cbr]at/] }
55   end
56
57   def test_slash_d_is_a_shortcut_for_a_digit_character_class
58     assert_equal __("42"), "the number is 42"[/[0123456789]+/]
59     assert_equal __("42"), "the number is 42"[/\d+/]
60   end
61
62   def test_character_classes_can_include_ranges
63     assert_equal __("42"), "the number is 42"[/[0-9]+/]
64   end
65
66   def test_slash_s_is_a_shortcut_for_a_whitespace_character_class
67     assert_equal __(" \t\n"), "space: \t\n"[/\s+/]
68   end
69
70   def test_slash_w_is_a_shortcut_for_a_word_character_class
71     # NOTE:  This is more like how a programmer might define a word.
72     assert_equal __("variable_1"), "variable_1 = 42"[/[a-zA-Z0-9_]+/]
73     assert_equal __("variable_1"), "variable_1 = 42"[/\w+/]
74   end
75
76   def test_period_is_a_shortcut_for_any_non_newline_character
77     assert_equal __("abc"), "abc\n123"[/a.+/]
78   end
79
80   def test_a_character_class_can_be_negated
81     assert_equal __("the number is "), "the number is 42"[/[^0-9]+/]
82   end
83
84   def test_shortcut_character_classes_are_negated_with_capitals
85     assert_equal __("the number is "), "the number is 42"[/\D+/]
86     assert_equal __("space:"), "space: \t\n"[/\S+/]
87     # ... a programmer would most likely do
88     assert_equal __(" = "), "variable_1 = 42"[/[^a-zA-Z0-9_]+/]
89     assert_equal __(" = "), "variable_1 = 42"[/\W+/]
90   end
91
92   # ------------------------------------------------------------------
93
94   def test_slash_a_anchors_to_the_start_of_the_string
95     assert_equal __("start"), "start end"[/\Astart/]
96     assert_equal __(nil), "start end"[/\Aend/]
97   end
98
99   def test_slash_z_anchors_to_the_end_of_the_string
100     assert_equal __("end"), "start end"[/end\z/]
101     assert_equal __(nil), "start end"[/start\z/]
102   end
103
104   def test_caret_anchors_to_the_start_of_lines
105     assert_equal __("2"), "num 42\n2 lines"[/^\d+/]
106   end
107
108   def test_dollar_sign_anchors_to_the_end_of_lines
109     assert_equal __("42"), "2 lines\nnum 42"[/\d+$/]
110   end
111
112   def test_slash_b_anchors_to_a_word_boundary
113     assert_equal __("vines"), "bovine vines"[/\bvine./]
114   end
115
116   # ------------------------------------------------------------------
117
118   def test_parentheses_group_contents
119     assert_equal __("hahaha"), "ahahaha"[/(ha)+/]
120   end
121
122   # ------------------------------------------------------------------
123
124   def test_parentheses_also_capture_matched_content_by_number
125     assert_equal __("Gray"), "Gray, James"[/(\w+), (\w+)/, 1]
126     assert_equal __("James"), "Gray, James"[/(\w+), (\w+)/, 2]
127   end
128
129   def test_variables_can_also_be_used_to_access_captures
130     assert_equal __("Gray, James"), "Name:  Gray, James"[/(\w+), (\w+)/]
131     assert_equal __("Gray"), $1
132     assert_equal __("James"), $2
133   end
134
135   # ------------------------------------------------------------------
136
137   def test_a_vertical_pipe_means_or
138     grays = /(James|Dana|Summer) Gray/
139     assert_equal __("James Gray"), "James Gray"[grays]
140     assert_equal __("Summer"), "Summer Gray"[grays, 1]
141     assert_equal __(nil), "Jim Gray"[grays, 1]
142   end
143
144   # THINK ABOUT IT:
145   #
146   # Explain the difference between a character class ([...]) and alternation (|).
147
148   # ------------------------------------------------------------------
149
150   def test_scan_is_like_find_all
151     assert_equal __(["one", "two", "three"]), "one two-three".scan(/\w+/)
152   end
153
154   def test_sub_is_like_find_and_replace
155     assert_equal __("one t-three"), "one two-three".sub(/(t\w*)/) { $1[0, 1] }
156   end
157
158   def test_gsub_is_like_find_and_replace_all
159     assert_equal __("one t-t"), "one two-three".gsub(/(t\w*)/) { $1[0, 1] }
160   end
161 end