Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interface design of simple unified access to json, xml, yaml, ini, etc. #259

Merged
merged 12 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions common/simple_dom.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include "simple_dom.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <photon/common/alog.h>
#include <photon/common/utility.h>
#include <photon/common/stream.h>
#include <photon/fs/localfs.h>
#include <photon/fs/filesystem.h>


namespace photon {
namespace SimpleDOM {

static NodeImpl* parse_json(char* text, size_t size, int flags) {
/*
using namespace rapidjson;
auto flags = kParseInsituFlag | kParseNumbersAsStringsFlag |
kParseCommentsFlag | kParseTrailingCommasFlag |
kParseNanAndInfFlag;
Reader reader;
reader.Parse<flags>(stream(text), handler);
*/
return {nullptr};
}

static NodeImpl* parse_xml(char* text, size_t size, int flags) {
return {nullptr};
}

static NodeImpl* parse_yaml(char* text, size_t size, int flags) {
return {nullptr};
}

static NodeImpl* parse_ini(char* text, size_t size, int flags) {
return {nullptr};
}

Node parse(char* text, size_t size, int flags) {
if (!text || !size)
LOG_ERROR_RETURN(EINVAL, nullptr, "invalid argument:", VALUE(text), VALUE(size));
using Parser = NodeImpl* (*) (char* text, size_t size, int flags);
constexpr static Parser parsers[] = {&parse_json, &parse_xml,
&parse_yaml, &parse_ini};
auto i = flags & DOC_TYPE_MASK;
if (i > LEN(parsers)) {
if (flags & FLAG_FREE_TEXT_IF_PARSING_FAILED) free(text);
LOG_ERROR_RETURN(EINVAL, nullptr, "invalid document type ", HEX(i));
}
return parsers[i](text, size, flags);
}

Node parse_file(fs::IFile* file, int flags) {
return parse(file->readall(), flags | FLAG_FREE_TEXT_IF_PARSING_FAILED);
}

Node parse_file(const char* filename, int flags, fs::IFileSystem* fs) {
using namespace fs;
auto file = fs ? fs->open(filename, O_RDONLY) :
open_localfile_adaptor(filename, O_RDONLY) ;
if (!file)
LOG_ERRNO_RETURN(0, nullptr, "failed to open file ", filename);
DEFER(delete file);
return parse_file(file, flags);
}

}
}
199 changes: 199 additions & 0 deletions common/simple_dom.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
/*
Copyright 2022 The Photon Authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#pragma once
#include "simple_dom_impl.h"

namespace photon {

namespace fs {
class IFileSystem;
class IFile;
}

// SimpleDOM emphasize on:
// 1. a simple & convenient interface for JSON, XML, YAML, INI, etc;
// 2. fast compilation, efficient accessing;
// 3. common needs;
namespace SimpleDOM {

using str = estring_view;

// the interface for users
class Node {
NodeImpl* _impl = nullptr;
public:
Node() = default;
Node(NodeImpl* node) {
_impl = node;
if (_impl)
_impl->_root->add_doc_ref();
}
Node(const Node& rhs) :
Node(rhs._impl) { }
Node(Node&& rhs) {
_impl = rhs._impl;
rhs._impl = nullptr;
}
Node& operator = (const Node& rhs) {
auto rt = root_impl();
auto rrt = rhs.root_impl();
if (rt != rrt) {
if (rt) rt->del_doc_ref();
if (rrt) rrt->add_doc_ref();
}
_impl = rhs._impl;
return *this;
}
Node& operator = (Node&& rhs) {
if (_impl)
_impl->_root->del_doc_ref();
_impl = rhs._impl;
rhs._impl = nullptr;
return *this;
}
~Node() {
_impl->_root->del_doc_ref();
}

#define IF_RET(e) if (_impl) return e; else return {};
Node next() const { IF_RET(_impl->_next); }
bool is_root() const { IF_RET(_impl->_root == _impl); }
Node root() const { IF_RET(_impl->_root); }
NodeImpl* root_impl() const { IF_RET(_impl->_root); }
rstring_view32 rkey() const { assert(!is_root()); IF_RET(_impl->_key); }
rstring_view32 rvalue() const { IF_RET(_impl->_value); }
str key(const char* b) const { IF_RET(b | rkey()); }
str value(const char* b) const { IF_RET(b | rvalue()); }
const char* text_begin() const { IF_RET(root()._impl->_text_begin); }
str key() const { IF_RET(text_begin() | rkey()); }
str value() const { IF_RET(text_begin() | rvalue()); }
bool valid() const { return _impl; }
operator bool() const { return _impl; }
size_t num_children() const { IF_RET(_impl->num_children()); }
Node get(size_t i) const { IF_RET({_impl->get(i)}); }
Node get(str key) const { IF_RET({_impl->get(key)}); }
Node operator[](str key) const { return get(key); }
Node operator[](size_t i) const { return get(i); }
Node get_attributes() const { return get("__attributes__"); }
str to_string() const { return value(); }
#undef IF_RET
template<size_t N>
Node operator[](const char (&key)[N]) const {
return get(key);
}
int64_t to_integer(int64_t def_val = 0) const {
return value().to_uint64(def_val);
}
double to_number(double def_val = NAN) const {
return value().to_double(def_val);
}

struct SameKeyEnumerator;
Enumerable<SameKeyEnumerator> enumerable_same_key_siblings() const;

struct ChildrenEnumerator;
Enumerable<ChildrenEnumerator> enumerable_children() const;

Enumerable<SameKeyEnumerator> enumerable_children(str key) const {
return get(key).enumerable_same_key_siblings();
}
};

// lower 8 bits are reserved for doc types
const int DOC_JSON = 0x00;
const int DOC_XML = 0x01;
const int DOC_YAML = 0x02;
const int DOC_INI = 0x03;
const int DOC_TYPE_MASK = 0xff;

const int FLAG_FREE_TEXT_IF_PARSING_FAILED = 0x100;

using Document = Node;

// 1. text is handed over to the simple_dom object, and gets freed during destruction
// 2. the content of text may be modified in-place to un-escape strings.
// 3. returning a pointer (of NodeImpl) is more efficient than an object (of Document),
// even if they are equivalent in binary form.
Node parse(char* text, size_t size, int flags);

inline Node parse(IStream::ReadAll&& buf, int flags) {
if (!buf.ptr || buf.size <= 0) return nullptr;
auto node = parse((char*)buf.ptr.get(), (size_t)buf.size, flags);
if (node || (flags & FLAG_FREE_TEXT_IF_PARSING_FAILED)) {
buf.ptr.reset();
buf.size = 0;
}
return node;
}

inline Node parse_copy(const char* text, size_t size, int flags) {
auto copy = strndup(text, size);
return parse(copy, size, flags | FLAG_FREE_TEXT_IF_PARSING_FAILED);
}

inline Node parse_copy(const IStream::ReadAll& buf, int flags) {
if (!buf.ptr || buf.size <= 0) return nullptr;
return parse_copy((char*)buf.ptr.get(), (size_t)buf.size, flags);
}

Node parse_file(fs::IFile* file, int flags);

// assuming localfs by default
Node parse_file(const char* filename, int flags, fs::IFileSystem* fs = nullptr);

Node make_overlay(Node* nodes, int n);

struct Node::ChildrenEnumerator {
Node _impl;
Node get() const {
return _impl;
}
int next() {
_impl = _impl.next();
return _impl.valid() ? 0 : -1;
}
};

inline Enumerable<Node::ChildrenEnumerator>
Node::enumerable_children() const {
return enumerable(Node::ChildrenEnumerator{_impl->get(0)});
}

struct Node::SameKeyEnumerator {
Node _impl;
const char* _base;
str _key;
SameKeyEnumerator(Node node) : _impl(node) {
_base = node.text_begin();
_key = node.key(_base);
}
Node get() const {
return _impl;
}
int next() {
_impl = _impl.next();
return (_impl.valid() && _impl.key(_base) == _key) ? 0 : -1;
}
};

inline Enumerable<Node::SameKeyEnumerator>
Node::enumerable_same_key_siblings() const {
return enumerable(Node::SameKeyEnumerator{{_impl}});
}

}
}
79 changes: 79 additions & 0 deletions common/simple_dom_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
Copyright 2022 The Photon Authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

#pragma once
#include <inttypes.h>
#include <assert.h>
#include <memory>
#include <math.h>
#include <atomic>
#include <photon/common/object.h>
#include <photon/common/estring.h>
#include <photon/common/enumerable.h>
#include <photon/common/stream.h>


namespace photon {

namespace SimpleDOM {

using str = estring_view;

struct Node;

// the interface for internal implementations
class NodeImpl : public Object {
protected:
NodeImpl() = default;
NodeImpl* _root;
union {
NodeImpl* _next;
const char* _text_begin; // the root node have text begin (base
}; // of _key and _value of rstring_view)
union {
rstring_view32 _key; // root node doesn't have a valid key, do not try to get it
std::atomic<uint32_t> _refcnt{0};
};
rstring_view32 _value;

void add_doc_ref() {
assert(this == _root);
++_refcnt;
}

void del_doc_ref() {
assert(this == _root);
if (--_refcnt == 0)
delete this;
}

friend struct Node;

public:
virtual size_t num_children() const __attribute__((pure)) = 0;

// get the i-th child node
// for an array object, it gets the i-th element (doc type determines the starting value)
// for an object, it gets the i-th element in implementation defined order
virtual NodeImpl* get(size_t i) const __attribute__((pure)) = 0;

// get the first child node with a specified `key`
// XML attributes are treated as a special child node with key "__attributes__"
virtual NodeImpl* get(str key) const __attribute__((pure)) = 0;
};

}
}
39 changes: 39 additions & 0 deletions common/stream.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#include "stream.h"
#include <stdlib.h>
#include "alog.h"


IStream::ReadAll IStream::readall(size_t max_buf, size_t min_buf) {
ReadAll buf;
buf.size = 0;
ssize_t capacity = min_buf;
auto ptr = (char*)malloc(capacity);
if (!ptr)
LOG_ERROR_RETURN(ENOBUFS, buf, "failed to malloc(`)", capacity);
buf.ptr.reset(ptr);
while(true) {
ssize_t ret = this->read((char*)buf.ptr.get() + buf.size, capacity - buf.size);
if (ret < 0) {
buf.size = -buf.size;
LOG_ERRNO_RETURN(0, buf, "failed to read from stream");
}
if (ret == 0) { // EOF
return buf;
}
buf.size += ret;
assert(buf.size <= capacity);
if (unlikely(buf.size == capacity)) {
if (capacity >= max_buf) {
buf.size = -buf.size;
LOG_ERROR_RETURN(ENOBUFS, buf, "content size in stream exceeds upper limit ", max_buf);
}
auto ptr = realloc(buf.ptr.get(), capacity *= 2);
if (!ptr) {
buf.size = -buf.size;
LOG_ERROR_RETURN(ENOBUFS, buf, "failed to realloc(`)", capacity);
}
buf.ptr.reset(ptr);
}
}
}

Loading