6 #include "constants.hpp"
10 using namespace Constants;
14 //####################################
15 // BASIC CHARACTER MATCHERS
16 //####################################
18 // Match standard control chars
19 const char* kwd_at(const char* src) { return exactly<'@'>(src); }
20 const char* kwd_dot(const char* src) { return exactly<'.'>(src); }
21 const char* kwd_comma(const char* src) { return exactly<','>(src); };
22 const char* kwd_colon(const char* src) { return exactly<':'>(src); };
23 const char* kwd_star(const char* src) { return exactly<'*'>(src); };
24 const char* kwd_plus(const char* src) { return exactly<'+'>(src); };
25 const char* kwd_minus(const char* src) { return exactly<'-'>(src); };
26 const char* kwd_slash(const char* src) { return exactly<'/'>(src); };
28 //####################################
29 // implement some function that do exist in the standard
30 // but those are locale aware which brought some trouble
31 // this even seems to improve performance by quite a bit
32 //####################################
34 bool is_alpha(const char& chr)
36 return unsigned(chr - 'A') <= 'Z' - 'A' ||
37 unsigned(chr - 'a') <= 'z' - 'a';
40 bool is_space(const char& chr)
42 // adapted the technique from is_alpha
43 return chr == ' ' || unsigned(chr - '\t') <= '\r' - '\t';
46 bool is_digit(const char& chr)
48 // adapted the technique from is_alpha
49 return unsigned(chr - '0') <= '9' - '0';
52 bool is_xdigit(const char& chr)
54 // adapted the technique from is_alpha
55 return unsigned(chr - '0') <= '9' - '0' ||
56 unsigned(chr - 'a') <= 'f' - 'a' ||
57 unsigned(chr - 'A') <= 'F' - 'A';
60 bool is_punct(const char& chr)
66 bool is_alnum(const char& chr)
68 return is_alpha(chr) || is_digit(chr);
71 // check if char is outside ascii range
72 bool is_unicode(const char& chr)
74 // check for unicode range
75 return unsigned(chr) > 127;
78 // check if char is outside ascii range
79 // but with specific ranges (copied from Ruby Sass)
80 bool is_nonascii(const char& chr)
83 (unsigned(chr) >= 128 && unsigned(chr) <= 15572911) ||
84 (unsigned(chr) >= 15630464 && unsigned(chr) <= 15712189) ||
85 (unsigned(chr) >= 4036001920)
89 // check if char is within a reduced ascii range
90 // valid in a uri (copied from Ruby Sass)
91 bool is_uri_character(const char& chr)
93 return (unsigned(chr) > 41 && unsigned(chr) < 127) ||
94 unsigned(chr) == ':' || unsigned(chr) == '/';
97 // check if char is within a reduced ascii range
98 // valid for escaping (copied from Ruby Sass)
99 bool is_escapable_character(const char& chr)
101 return unsigned(chr) > 31 && unsigned(chr) < 127;
104 // Match word character (look ahead)
105 bool is_character(const char& chr)
107 // valid alpha, numeric or unicode char (plus hyphen)
108 return is_alnum(chr) || is_unicode(chr) || chr == '-';
111 //####################################
112 // BASIC CLASS MATCHERS
113 //####################################
115 // create matchers that advance the position
116 const char* space(const char* src) { return is_space(*src) ? src + 1 : 0; }
117 const char* alpha(const char* src) { return is_alpha(*src) ? src + 1 : 0; }
118 const char* unicode(const char* src) { return is_unicode(*src) ? src + 1 : 0; }
119 const char* nonascii(const char* src) { return is_nonascii(*src) ? src + 1 : 0; }
120 const char* digit(const char* src) { return is_digit(*src) ? src + 1 : 0; }
121 const char* xdigit(const char* src) { return is_xdigit(*src) ? src + 1 : 0; }
122 const char* alnum(const char* src) { return is_alnum(*src) ? src + 1 : 0; }
123 const char* punct(const char* src) { return is_punct(*src) ? src + 1 : 0; }
124 const char* hyphen(const char* src) { return *src && *src == '-' ? src + 1 : 0; }
125 const char* character(const char* src) { return is_character(*src) ? src + 1 : 0; }
126 const char* uri_character(const char* src) { return is_uri_character(*src) ? src + 1 : 0; }
127 const char* escapable_character(const char* src) { return is_escapable_character(*src) ? src + 1 : 0; }
129 // Match multiple ctype characters.
130 const char* spaces(const char* src) { return one_plus<space>(src); }
131 const char* digits(const char* src) { return one_plus<digit>(src); }
132 const char* hyphens(const char* src) { return one_plus<hyphen>(src); }
134 // Whitespace handling.
135 const char* no_spaces(const char* src) { return negate< space >(src); }
136 const char* optional_spaces(const char* src) { return zero_plus< space >(src); }
138 // Match any single character.
139 const char* any_char(const char* src) { return *src ? src + 1 : src; }
141 // Match word boundary (zero-width lookahead).
142 const char* word_boundary(const char* src) { return is_character(*src) || *src == '#' ? 0 : src; }
144 // Match linefeed /(?:\n|\r\n?)/
145 const char* re_linebreak(const char* src)
147 // end of file or unix linefeed return here
148 if (*src == 0 || *src == '\n') return src + 1;
149 // a carriage return may optionally be followed by a linefeed
150 if (*src == '\r') return *(src + 1) == '\n' ? src + 2 : src + 1;
155 // Assert string boundaries (/\Z|\z|\A/)
156 // This is a zero-width positive lookahead
157 const char* end_of_line(const char* src)
159 // end of file or unix linefeed return here
160 return *src == 0 || *src == '\n' || *src == '\r' ? src : 0;
163 // Assert end_of_file boundary (/\z/)
164 // This is a zero-width positive lookahead
165 const char* end_of_file(const char* src)
167 // end of file or unix linefeed return here
168 return *src == 0 ? src : 0;