Skip to content

Commit

Permalink
Add path parser (not used yet)
Browse files Browse the repository at this point in the history
  • Loading branch information
adamritter committed Jun 26, 2023
1 parent 39a0325 commit 81e1578
Show file tree
Hide file tree
Showing 2 changed files with 323 additions and 0 deletions.
231 changes: 231 additions & 0 deletions src/parse_path.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
#include <string>
#include <vector>
#include <optional>
#include <cctype>
#include <variant>
#include "parse_path.hpp"

bool isIdentifierChar(char c)
{
return std::isalnum(c) || c == '_';
}

class Parser
{
public:
Parser(const std::string &input) : input_(input), index_(0) {}

ObjectAccessors parse()
{
return parseObjectAccessors();
}

private:
std::string_view lookAhead(int N)
{
// Ensure we're not exceeding the bounds of the input string.
if (index_ + N > input_.size())
{
throw std::runtime_error("Attempted to look ahead beyond the end of input.");
}

// Return a view of the next N characters from the input string without advancing the index.
return std::string_view(input_).substr(index_, N);
}

void consume(int N)
{
// Ensure we're not exceeding the bounds of the input string.
if (index_ + N > input_.size())
{
throw std::runtime_error("Attempted to consume beyond the end of input.");
}

// Advance the index by N characters.
index_ += N;
}

ObjectAccessors parseObjectAccessors()
{
ObjectAccessors accessors;
parseObjectAccessor(accessors.object_accessors.emplace_back());
while (match(','))
{
if (lookAhead(3) == "...")
{
accessors.echo_others = true;
consume(3); // Assuming consume function consumes N characters from input.
break;
}
parseObjectAccessor(accessors.object_accessors.emplace_back());
}
return accessors;
}

void parseObjectAccessor(ObjectAccessor &accessor)
{
parseIdentifier(accessor.key);
if (match(':'))
{
accessor.new_key.emplace(); // Here we separate the emplace operation
parseIdentifier(*accessor.new_key); // We dereference the optional to get the string reference
if (match(':'))
{
accessor.value_accessor = parseValueAccessor();
}
}
else
{
accessor.value_accessor = std::monostate{};
}
}

string parseJSONString()
{
string s;
while (index_ < input_.size())
{
if (match('"'))
{
break;
}
match('\\'); // Ignore escape
s += input_[index_++];
}
return s;
}

ValueAccessor parseValueAccessor()
{
if (match('['))
{
if (match('"'))
{
// object accessor
ObjectAccessors objectAccessors;
string key = parseJSONString();
expect(']');
objectAccessors.object_accessors.emplace_back(ObjectAccessor{key, nullopt, parseValueAccessor()});
}
else
{
return parseSlice();
}
}
else if (match('{'))
{
ObjectAccessors objectAccessors = parseObjectAccessors();
expect('}');
return objectAccessors;
}
else if (match('.'))
{
if (isIdentifierChar(input_[index_]))
{
ObjectAccessors objectAccessors;
string key = parseJSONString();
objectAccessors.object_accessors.emplace_back(ObjectAccessor{key, nullopt, parseValueAccessor()});
return objectAccessors;
}
else
{
Slice slice;
slice.append_index = true;
parseInt(slice.start);
slice.end = slice.start;
slice.step = 1;
slice.value_accessor = parseValueAccessor();
return slice;
}
}
}

Slice parseSlice()
{
Slice slice;
if (match('['))
{
slice.append_index = false;
parseInt(slice.start);
expect(']');
slice.end = slice.start;
slice.step = 1;
slice.value_accessor = parseValueAccessor();
return slice;
}
slice.append_index = true;

expect('[');
parseInt(slice.start);

expect(':');
parseInt(slice.step);
if (match(':'))
{
parseInt(slice.end);
}
else
{
slice.end = slice.step;
slice.step = 1;
}
expect(']');
slice.value_accessor = parseValueAccessor();
return slice;
}

void parseIdentifier(std::string &identifier)
{
skipWhitespace();
std::size_t start = index_;
while (index_ < input_.size() && isIdentifierChar(input_[index_]))
{
++index_;
}
identifier = input_.substr(start, index_ - start);
}

void parseInt(int &value)
{
skipWhitespace();
std::size_t start = index_;
while (index_ < input_.size() && std::isdigit(input_[index_]))
{
++index_;
}
value = std::stoi(input_.substr(start, index_ - start));
}

void expect(char c)
{
skipWhitespace();
if (index_ >= input_.size() || input_[index_] != c)
{
throw std::runtime_error("Expected character '" + std::string(1, c) + "' not found.");
}
++index_;
}

bool match(char c)
{
skipWhitespace();
if (index_ < input_.size() && input_[index_] == c)
{
++index_;
return true;
}
return false;
}

void skipWhitespace()
{
while (index_ < input_.size() && std::isspace(input_[index_]))
{
++index_;
}
}

private:
const std::string &input_;
std::size_t index_;
};
92 changes: 92 additions & 0 deletions src/parse_path.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// This file contains the data structure for storing path grammars.
// Example paths are:
// .a[1].b.2[4:][-3:] that contain slices and object accessors.
// Multiple object accessors can be created like this .{id,users[1].{name,address}}
// It's also equivalent to this: .{id,users[1].name,users[1].address}
// Renaming can be supported multiple ways:
// .{id,user:users[1]:.{name,address}} or .{id,name:users[1].name,address:users[1].address}
// or {.id,user.name:users[1].name,user.address:users[1].address}
// Rewinding or multipath support is needed to support this:
// .{id,user:users[1],name:users[1].name,address:users[1].address}
// Let's go with multipath. rewriting the last expression looks like this:
// .{id,user:users[1]:{name,address}}
// [[1]] is an index accessor without outputting on the path.

#pragma once
#include <optional>
#include <cctype>
#include <variant>
using namespace std;

struct Slice;
struct ObjectAccessor;
struct ObjectAccessors;

using ValueAccessor = std::variant<std::monostate,
Slice,
ObjectAccessors>;

struct Slice
{
int start = 0;
int step = 1;
int end = -1;
bool append_index;
ValueAccessor value_accessor;
};

struct ObjectAccessors
{
std::vector<ObjectAccessor> object_accessors;
bool echo_others;
};

struct ObjectAccessor
{
std::string key;
std::optional<std::string> new_key;
ValueAccessor value_accessor;
};

/*
// encode the example paths in the given data structure. Don't use helper functions.
// For the path ".a[1].b.2[4:][-3:]"
vector<ValueAccessor> getPath1()
{
Slice slice2 = {-3, -1, 1, true, std::monostate{}};
Slice slice1 = {4, -1, 1, true, slice2};
// .2 is equivalent to [2]
Slice slice0 = {2, 2, 1, true, slice1};
ObjectAccessor accessorB = {"b", "", false, slice0};
Slice sliceForA = {1, 1, 1, true, accessorB};
ObjectAccessor rootAccessor1 = {"a", "", false, sliceForA};
std::vector<ValueAccessor> rootAccessors1 = {rootAccessor1};
}
// For the path ".{id,users[1].name,users[1].address}"
vector<ValueAccessor> getPath2()
{
Slice sliceForName = {1, 1, 1, true, ObjectAccessor{"name", "name", std::monostate{}}};
Slice sliceForAddress = {1, 1, 1, true, ObjectAccessor{"address", "address", std::monostate{}}};
std::vector<ValueAccessor> multipleAccessorsForUsers = {sliceForName, sliceForAddress};
ObjectAccessor accessorUsers = {"users", "users", multipleAccessorsForUsers};
std::vector<ValueAccessor> rootAccessors2 = {ObjectAccessor{"id", "id", std::monostate{}}, accessorUsers};
return rootAccessors2;
}
// For renaming ".{id,user:users[1]:.{name,address}}"
vector<ValueAccessor> getPath3()
{
ValueAccessor inner = {{ObjectAccessor{"name", "name", std::monostate{}},
ObjectAccessor{"address", "address", std::monostate{}}},
false};
Slice sliceForAddressWithRename = {1, 1, 1, false, inner};
std::vector<ValueAccessor> renamedAccessorsForUsers = {sliceForNameWithRename, sliceForAddressWithRename};
ObjectAccessor accessorUsersWithRename = {"users", "user", renamedAccessorsForUsers};
std::vector<ValueAccessor> rootAccessors3 = {
{ObjectAccessor{"id", "id", std::monostate{}}, accessorUsersWithRename},
false};
return rootAccessors3;
}
*/

0 comments on commit 81e1578

Please sign in to comment.