Phasor 2.2.0
Stack VM based Programming Language
Loading...
Searching...
No Matches
regex.cpp
Go to the documentation of this file.
1#include "StdLib.hpp"
2#include <regex>
3#include <vector>
4
5namespace Phasor
6{
7
8Value StdLib::registerRegexFunctions(const std::vector<Value> &args, VM *vm)
9{
10 checkArgCount(args, 0, "include_stdregex");
11
12 // Register regex functions
13 vm->registerNativeFunction("regex_match", regex_match);
14 vm->registerNativeFunction("regex_search", regex_search);
15 vm->registerNativeFunction("regex_findall", regex_findall);
16 vm->registerNativeFunction("regex_split", regex_split);
17 vm->registerNativeFunction("regex_replace", regex_replace);
18
19 return true;
20}
21
22Value StdLib::regex_match(const std::vector<Value> &args, VM *vm)
23{
24 // args[1] - pattern
25 // args[2] - text
26 checkArgCount(args, 2, "regex_match");
27
28 try
29 {
30 const std::string &pattern = args[1].asString();
31 const std::string &text = args[2].asString();
32
33 std::regex re(pattern);
34 bool match = std::regex_match(text, re);
35 return Value(match);
36 }
37 catch (const std::regex_error &e)
38 {
39 throw std::runtime_error("Regex error in regex_match: " + std::string(e.what()));
40 }
41}
42
43Value StdLib::regex_search(const std::vector<Value> &args, VM *vm)
44{
45 // args[1] - pattern
46 // args[2] - text
47 // args[3] - start (optional)
48 // args[4] - end (optional)
49 checkArgCount(args, 2, "regex_search", true);
50
51 try
52 {
53 const std::string &pattern = args[1].asString();
54 std::string text = args[2].asString();
55
56 // Handle optional start/end positions
57 size_t start_pos = 0;
58 size_t end_pos = text.length();
59
60 if (args.size() > 3 && !args[3].isNull())
61 {
62 start_pos = static_cast<size_t>(args[3].asInt());
63 }
64
65 if (args.size() > 4 && !args[4].isNull())
66 {
67 end_pos = static_cast<size_t>(args[4].asInt());
68 }
69
70 if (start_pos > end_pos || end_pos > text.length())
71 {
72 throw std::out_of_range("Invalid start/end positions in regex_search");
73 }
74
75 std::regex re(pattern);
76 std::smatch match;
77 std::string search_text = text.substr(start_pos, end_pos - start_pos);
78
79 bool found = std::regex_search(search_text, match, re);
80
81 // Return the first match if found, otherwise return empty string
82 return found ? Value(match[0].str()) : Value("");
83 }
84 catch (const std::regex_error &e)
85 {
86 throw std::runtime_error("Regex error in regex_search: " + std::string(e.what()));
87 }
88 catch (const std::out_of_range &e)
89 {
90 throw std::runtime_error("Position out of range in regex_search: " + std::string(e.what()));
91 }
92}
93
94Value StdLib::regex_findall(const std::vector<Value> &args, VM *vm)
95{
96 // args[1] - pattern
97 // args[2] - text
98 checkArgCount(args, 2, "regex_findall");
99
100 try
101 {
102 const std::string &pattern = args[1].asString();
103 const std::string &text = args[2].asString();
104
105 std::regex re(pattern);
106 std::sregex_iterator it(text.begin(), text.end(), re);
107 std::sregex_iterator end;
108
109 // Create a struct to hold the array of matches
110 Value result = Value::createStruct("RegexMatches");
111 int count = 0;
112
113 while (it != end)
114 {
115 // Create a struct for each match
116 Value match = Value::createStruct("RegexMatch");
117 match.setField("text", Value(it->str()));
118 match.setField("position", Value(static_cast<int64_t>(it->position())));
119
120 // Add match to results
121 result.setField(std::to_string(count++), match);
122 ++it;
123 }
124
125 // Add count field
126 result.setField("count", Value(static_cast<int64_t>(count)));
127
128 return result;
129 }
130 catch (const std::regex_error &e)
131 {
132 throw std::runtime_error("Regex error in regex_findall: " + std::string(e.what()));
133 }
134}
135
136Value StdLib::regex_split(const std::vector<Value> &args, VM *vm)
137{
138 // args[1] - pattern
139 // args[2] - text
140 // args[3] - max split (optional, -1 for no limit)
141 checkArgCount(args, 2, "regex_split", true);
142
143 try
144 {
145 const std::string &pattern = args[1].asString();
146 const std::string &text = args[2].asString();
147 int max_split = -1;
148
149 if (args.size() > 3 && !args[3].isNull())
150 {
151 max_split = static_cast<int>(args[3].asInt());
152 }
153
154 std::regex re(pattern);
155 std::sregex_token_iterator it(text.begin(), text.end(), re, -1);
156 std::sregex_token_iterator end;
157
158 // Create a struct to hold the split parts
159 Value result = Value::createStruct("SplitResult");
160 std::vector<std::string> parts;
161 int count = 0;
162
163 while (it != end && (max_split == -1 || count < max_split))
164 {
165 parts.push_back(it->str());
166 result.setField(std::to_string(count++), Value(it->str()));
167 ++it;
168 }
169
170 // Add the rest of the string if we hit max_split
171 if (it != end && max_split != -1)
172 {
173 std::string remaining = it->str();
174 while (++it != end)
175 {
176 remaining += it->str();
177 }
178 result.setField(std::to_string(count++), Value(remaining));
179 }
180
181 // Add count field
182 result.setField("count", Value(static_cast<int64_t>(count)));
183
184 return result;
185 }
186 catch (const std::regex_error &e)
187 {
188 throw std::runtime_error("Regex error in regex_split: " + std::string(e.what()));
189 }
190}
191
192Value StdLib::regex_replace(const std::vector<Value> &args, VM *vm)
193{
194 // args[1] - pattern
195 // args[2] - text
196 // args[3] - replacement
197 // args[4] - flags (optional, default to std::regex_constants::format_default)
198 checkArgCount(args, 3, "regex_replace", true);
199
200 try
201 {
202 const std::string &pattern = args[1].asString();
203 std::string text = args[2].asString();
204 const std::string &replacement = args[3].asString();
205
206 // Default flags (use explicit enum types)
207 std::regex_constants::syntax_option_type syntax = std::regex_constants::ECMAScript; // Default syntax
208 std::regex_constants::match_flag_type match_flags = std::regex_constants::match_default;
209 std::regex_constants::match_flag_type format_flags = std::regex_constants::match_default;
210
211 // Handle optional flags
212 if (args.size() > 4 && !args[4].isNull())
213 {
214 const std::string &flagsStr = args[4].asString();
215
216 // Parse syntax flags (these affect regex construction)
217 if (flagsStr.find('e') != std::string::npos)
218 {
219 syntax = std::regex_constants::ECMAScript;
220 }
221 if (flagsStr.find('a') != std::string::npos)
222 {
223 syntax = std::regex_constants::awk;
224 }
225 if (flagsStr.find('g') != std::string::npos)
226 {
227 syntax = std::regex_constants::grep;
228 }
229 if (flagsStr.find('p') != std::string::npos)
230 {
231 syntax = std::regex_constants::egrep;
232 }
233 // Flags that modify syntax options (case, multiline, nosubs, optimize, collate)
234 if (flagsStr.find('i') != std::string::npos)
235 {
236 syntax = syntax | std::regex_constants::icase;
237 }
238#if (defined(__GNUC__) && __cplusplus >= 201703L) || (defined(_MSC_VER) && _MSC_VER >= 1950)
239 if (flagsStr.find('m') != std::string::npos)
240 {
241 syntax = syntax | std::regex_constants::multiline;
242 }
243#endif
244 if (flagsStr.find('n') != std::string::npos)
245 {
246 syntax = syntax | std::regex_constants::nosubs;
247 }
248 if (flagsStr.find('o') != std::string::npos)
249 {
250 syntax = syntax | std::regex_constants::optimize;
251 }
252 if (flagsStr.find('c') != std::string::npos)
253 {
254 syntax = syntax | std::regex_constants::collate;
255 }
256
257 // Format flags for regex_replace
258 if (flagsStr.find('f') != std::string::npos)
259 {
260 format_flags = format_flags | std::regex_constants::format_sed;
261 }
262 if (flagsStr.find('r') != std::string::npos)
263 {
264 format_flags = format_flags | std::regex_constants::format_no_copy;
265 }
266 if (flagsStr.find('d') != std::string::npos)
267 {
268 format_flags = format_flags | std::regex_constants::format_first_only;
269 }
270 }
271
272 std::regex re(pattern, syntax);
273 // Combine match_flags and format_flags using bitwise OR on enum types
274 auto combined_flags = match_flags | format_flags;
275 std::string result = std::regex_replace(text, re, replacement, combined_flags);
276
277 return Value(result);
278 }
279 catch (const std::regex_error &e)
280 {
281 throw std::runtime_error("Regex error in regex_replace: " + std::string(e.what()));
282 }
283}
284} // namespace Phasor
static Value regex_search(const std::vector< Value > &args, VM *vm)
Check if regex is found.
Definition regex.cpp:43
static Value regex_findall(const std::vector< Value > &args, VM *vm)
Find all regex matches.
Definition regex.cpp:94
static void checkArgCount(const std::vector< Value > &args, size_t minimumArguments, const std::string &name, bool allowMoreArguments=false)
Definition StdLib.cpp:50
static Value regex_replace(const std::vector< Value > &args, VM *vm)
Replace regex matches.
Definition regex.cpp:192
static Value regex_match(const std::vector< Value > &args, VM *vm)
Check if regex matches.
Definition regex.cpp:22
static Value regex_split(const std::vector< Value > &args, VM *vm)
Split string by regex matches.
Definition regex.cpp:136
static Value registerRegexFunctions(const std::vector< Value > &args, VM *vm)
Definition regex.cpp:8
Virtual Machine.
Definition VM.hpp:18
void registerNativeFunction(const std::string &name, NativeFunction fn)
Register a native function.
Definition VM.cpp:869
A value in the Phasor VM.
Definition Value.hpp:33
void setField(const std::string &name, Value value)
Definition Value.hpp:443
static Value createStruct(const std::string &name)
Definition Value.hpp:422
The Phasor Programming Language and Runtime.
Definition AST.hpp:8