Author: Mason Wright
Email:
[email protected]
Date: Wed, 18 Jun 2025 16:42:10 -0600
src/parser.cc
09c195df02536b6a796bd648fce9669397b96109
Moved things around and made .h files
Clone
A
include/element.h
A
include/parser.h
-
index.html
M
main
M
main.cc
A
make.sh
-
src/adapter.cc
A
src/element.cc
-
src/events.cc
A
src/parser.cc
Commits
b966b2a517365074e5c381dbdea05b3221dc0198
e840f1eeb0ae26af69e1ae146ea9938e28e9f1af
e4e05418a640eaed08cd1ec7cd8644eb1dbcca50
4e01ba8ad2c3361fa4be3d896288020948b58b5e
aae562ac1350480e4889aabb35899f776c5b59e9
6c3ae0e31eb0893f20e3872117f92cc6b9a942af
350e7d88bb2feb9db00c6e032cc6623f215b7adf
95e6c70d23e99ffcf70e5bbe12503496e5d8f232
e188783659b9bc3b9993a647e93ed110e7f41db6
5e4c38ff3c212cdd9881427ef3f8c2706539a190
e50ea9e1356a74af18fdd171337ef9dc931e1f4e
8f2e83556d12aaebe8e8597ea6923804b0eb7a43
1627c585128af263181053ab2cf1a4cdcd14ee21
def3513f75b325464ad88a33c741c4ca80572b77
a21501590980a905fa9b902897d700a42a08b7f0
56074a6bfe4498d092f3a227297c8c20e2bb962c
d9cf1485b7ae0614130494f0e73237921323b9a1
80f04b134ae32ad8a9d526007b33dd02f6600f05
23d6c65f9368d3c622a55a3068a6b2f1efa0c8d4
09c195df02536b6a796bd648fce9669397b96109
f2b5c8202fbc904e2ed78260e3fdbd55164799d2
4bfba076120f389994fc46a98e8b7a2622314400
e36ac5417e10ee9b9f94f340e1ccf28afc5705ea
d00dc89a86dd7e2fcfd4618bc3a1c8cfba9e3c3d
d9eef16adaf292f3748db5fb5aa98463de10d712
18ff2ec1bfc1cf9fcd17c1acb05c3b41f8f0ed83
9e7fd2980d723437ea621b78d395fa72ca3f4922
Diff
diff --git a/src/parser.cc b/src/parser.cc deleted file mode 100644 index 784fbdd..0000000 --- a/src/parser.cc +++ /dev/null @@ -1,294 +0,0 @@ -#include "element.h" -#include
-#include
-#include
-#include
-#include
-#include
- -std::unordered_map
parseAttributes(std::string token) { - std::unordered_map
attrs; - - bool inQuote = false; - bool escaped = false; - char quoteType = '"'; - - - std::string word; - for (unsigned short i = 0; i < token.length(); i++) { - char current = token[i]; - if ((current == '"' || current == '\'') && !escaped) { - if (inQuote && current == quoteType) { - inQuote = false; - } else if (!inQuote) { - inQuote = true; - quoteType = current; - } - } - - if (current == '\\') { //' - escaped = true; - continue; - } - - if ((!std::isspace(current) || inQuote) && i != token.length()-1) { - word += current; - } else { - if (i == token.length()-1) { - word += current; - } - - std::string name = ""; - std::string value = ""; - bool setValue = false; - - for (auto c : word) { - if (c == '=') { - setValue = true; - continue; - } - - if (setValue) { - value += c; - } else { - name += c; - } - } - - word = ""; - - std::string trimmedName = ""; - for (auto c : name) { - if (!std::isspace(c)) { - trimmedName += c; - } - } - - if (attrs.size() == 0) { - value = '"'+trimmedName+'"'; - trimmedName = "tagName"; - } - - std::string trimmedValue = ""; - int sliceStart = 0; - int sliceEnd = value.length(); - - if (value.length() >= 2) { - for (unsigned short t = 0; t < value.length(); t++) { - if (!std::isspace(value[t])) { - sliceStart = t; - break; - } - } - - for (int t = value.length()-1; t >= 0; t--) { - // Trim the trailing / on self closing elements if there isn't a space inbetween - if (!std::isspace(value[t]) && value[t] != '/') { - sliceEnd = t; - break; - } - } - - // Add and subtract 1 from each side to remove quotes - for (unsigned short t = sliceStart+1; t < sliceEnd; t++) { - trimmedValue += value[t]; - } - } - - attrs[trimmedName] = trimmedValue; - } - } - - return attrs; -} - -std::unique_ptr
parseStream(std::istream& inputStream) { - std::unique_ptr
root = std::make_unique
(); - root->setTagName("root"); - - Node* currentNode = root.get(); - - bool inTag = false; - bool escaped = false; - bool inQuote = false; - bool inComment = false; - char quoteType = '"'; - - std::string token; - - char current; - while (inputStream.get(current)) { - - // Finds the --> and removes it then resets inComment - if (inComment) { - // added the peek to prevent hitting on every - - if (current == '-' && inputStream.peek() == '-') { - char a,b; - // load the next two - if (inputStream.get(a) && inputStream.get(b)) { - // We know b == - - if (b == '>') { - // Close the comment - inComment = false; - } - } - - if (!inComment) { - // we don't put anything back and that puts us where we need to be - continue; - } else { - // Not the end - inputStream.putback(b); - inputStream.putback(a); - } - } - continue; - } - - if (inTag) { - if ((current == '"' || current == '\'') && !escaped) { - if (inQuote && current == quoteType) { - inQuote = false; - } else if (!inQuote) { - inQuote = true; - quoteType = current; - } - } - - if (current == '>' && !inQuote && !escaped) { - inTag = false; - // Even if the next tag is next still add a text tag as we can just check if its empty and remove it - // this check would still have to be done either way so we aren't wasting anything - if (token.length() > 0) { - bool empty = true; - bool closingTag = false; - for (int i = 0; i < token.length(); i++) { - auto c = token[i]; - if (std::isspace(c)) { - continue; - } else if (c == '/') { - closingTag = true; - } else { - empty = false; - break; - } - } - - bool selfClosing = false; - int selfClosingPosition = token.length()-1; - if (!closingTag) { - for (int i = token.length()-1; i >= 0; i--) { - auto c = token[i]; - if (std::isspace(c)) { - continue; - } else if (c == '/') { - selfClosingPosition = i; - selfClosing = true; - break; - } else { - break; - } - } - } - - if (!empty) { - if (selfClosing) { - //Create element and don't jump inside - // Use node instead of currentNode because we do not need to jump into the created element - auto attrs = parseAttributes(token.substr(0, selfClosingPosition)); - auto node = currentNode->createElement(attrs["tagName"]); - for (auto const& pair : attrs) { - // All attributes are stored as strings so we can just throw them in - node->setAttribute(pair.first, pair.second); - } - } else if (closingTag) { - // Checksum and tree move - std::string tagName = ""; - for (auto t : token) { - if (!std::isspace(t) && t != '/') { - tagName += t; - } else if (t == '/') { - // For closing tags we just want the name - continue; - } else if (tagName.length() > 0) { - break; - } - } - if (currentNode->getTagName() == tagName) { - currentNode = currentNode->parent; - } else { - std::cerr << "malformed html: closing tag (" << tagName << ">) found for <" << currentNode->getTagName() << ">" << std::endl; - } - } else { - // Create a element and jump inside - auto attrs = parseAttributes(token); - currentNode = currentNode->createElement(attrs["tagName"]); - for (auto const& pair : attrs) { - // All attributes are stored as strings so we can just throw them in - currentNode->setAttribute(pair.first, pair.second); - } - - } - } - } - - token = ""; - continue; - } - } else if (!escaped && current == '<') { - // if the next charector is a ! - if (inputStream.peek() == '!') { - char a,b,c; - // load the next three charecters (includes !) - if (inputStream.get(a) && inputStream.get(b) && inputStream.get(c)) { - // We know a == ! - if (b == '-' && c == '-') { - inComment = true; - } - } - - if (inComment) { - continue; - } else { - // Not a comment add all back - inputStream.putback(c); - inputStream.putback(b); - inputStream.putback(a); - } - } - inTag = true; - - // Heres where you actually make the text node and above the real nodes - // can also prob make the vector of tokens a single variable - // it really just needs to be a data string bc we know the type based on inTag - - - bool hasText = false; - for (auto t : token) { - if (!std::isspace(t)) { - hasText = true; - break; - } - } - if (hasText) { - auto node = currentNode->createElement("text"); - node->setInnerText(token); - } - - - token = ""; - continue; - } - - escaped = false; - - if (current == '\\') { //' - escaped = true; - continue; - } - - token += current; - } - - return root; -}
#include "element.h" #include
#include
#include
#include
#include
#include
std::unordered_map
parseAttributes(std::string token) { std::unordered_map
attrs; bool inQuote = false; bool escaped = false; char quoteType = '"'; std::string word; for (unsigned short i = 0; i < token.length(); i++) { char current = token[i]; if ((current == '"' || current == '\'') && !escaped) { if (inQuote && current == quoteType) { inQuote = false; } else if (!inQuote) { inQuote = true; quoteType = current; } } if (current == '\\') { //' escaped = true; continue; } if ((!std::isspace(current) || inQuote) && i != token.length()-1) { word += current; } else { if (i == token.length()-1) { word += current; } std::string name = ""; std::string value = ""; bool setValue = false; for (auto c : word) { if (c == '=') { setValue = true; continue; } if (setValue) { value += c; } else { name += c; } } word = ""; std::string trimmedName = ""; for (auto c : name) { if (!std::isspace(c)) { trimmedName += c; } } if (attrs.size() == 0) { value = '"'+trimmedName+'"'; trimmedName = "tagName"; } std::string trimmedValue = ""; int sliceStart = 0; int sliceEnd = value.length(); if (value.length() >= 2) { for (unsigned short t = 0; t < value.length(); t++) { if (!std::isspace(value[t])) { sliceStart = t; break; } } for (int t = value.length()-1; t >= 0; t--) { // Trim the trailing / on self closing elements if there isn't a space inbetween if (!std::isspace(value[t]) && value[t] != '/') { sliceEnd = t; break; } } // Add and subtract 1 from each side to remove quotes for (unsigned short t = sliceStart+1; t < sliceEnd; t++) { trimmedValue += value[t]; } } attrs[trimmedName] = trimmedValue; } } return attrs; } std::unique_ptr
parseStream(std::istream& inputStream) { std::unique_ptr
root = std::make_unique
(); root->setTagName("root"); Node* currentNode = root.get(); bool inTag = false; bool escaped = false; bool inQuote = false; bool inComment = false; char quoteType = '"'; std::string token; char current; while (inputStream.get(current)) { // Finds the --> and removes it then resets inComment if (inComment) { // added the peek to prevent hitting on every - if (current == '-' && inputStream.peek() == '-') { char a,b; // load the next two if (inputStream.get(a) && inputStream.get(b)) { // We know b == - if (b == '>') { // Close the comment inComment = false; } } if (!inComment) { // we don't put anything back and that puts us where we need to be continue; } else { // Not the end inputStream.putback(b); inputStream.putback(a); } } continue; } if (inTag) { if ((current == '"' || current == '\'') && !escaped) { if (inQuote && current == quoteType) { inQuote = false; } else if (!inQuote) { inQuote = true; quoteType = current; } } if (current == '>' && !inQuote && !escaped) { inTag = false; // Even if the next tag is next still add a text tag as we can just check if its empty and remove it // this check would still have to be done either way so we aren't wasting anything if (token.length() > 0) { bool empty = true; bool closingTag = false; for (int i = 0; i < token.length(); i++) { auto c = token[i]; if (std::isspace(c)) { continue; } else if (c == '/') { closingTag = true; } else { empty = false; break; } } bool selfClosing = false; int selfClosingPosition = token.length()-1; if (!closingTag) { for (int i = token.length()-1; i >= 0; i--) { auto c = token[i]; if (std::isspace(c)) { continue; } else if (c == '/') { selfClosingPosition = i; selfClosing = true; break; } else { break; } } } if (!empty) { if (selfClosing) { //Create element and don't jump inside // Use node instead of currentNode because we do not need to jump into the created element auto attrs = parseAttributes(token.substr(0, selfClosingPosition)); auto node = currentNode->createElement(attrs["tagName"]); for (auto const& pair : attrs) { // All attributes are stored as strings so we can just throw them in node->setAttribute(pair.first, pair.second); } } else if (closingTag) { // Checksum and tree move std::string tagName = ""; for (auto t : token) { if (!std::isspace(t) && t != '/') { tagName += t; } else if (t == '/') { // For closing tags we just want the name continue; } else if (tagName.length() > 0) { break; } } if (currentNode->getTagName() == tagName) { currentNode = currentNode->parent; } else { std::cerr << "malformed html: closing tag (" << tagName << ">) found for <" << currentNode->getTagName() << ">" << std::endl; } } else { // Create a element and jump inside auto attrs = parseAttributes(token); currentNode = currentNode->createElement(attrs["tagName"]); for (auto const& pair : attrs) { // All attributes are stored as strings so we can just throw them in currentNode->setAttribute(pair.first, pair.second); } } } } token = ""; continue; } } else if (!escaped && current == '<') { // if the next charector is a ! if (inputStream.peek() == '!') { char a,b,c; // load the next three charecters (includes !) if (inputStream.get(a) && inputStream.get(b) && inputStream.get(c)) { // We know a == ! if (b == '-' && c == '-') { inComment = true; } } if (inComment) { continue; } else { // Not a comment add all back inputStream.putback(c); inputStream.putback(b); inputStream.putback(a); } } inTag = true; // Heres where you actually make the text node and above the real nodes // can also prob make the vector of tokens a single variable // it really just needs to be a data string bc we know the type based on inTag bool hasText = false; for (auto t : token) { if (!std::isspace(t)) { hasText = true; break; } } if (hasText) { auto node = currentNode->createElement("text"); node->setInnerText(token); } token = ""; continue; } escaped = false; if (current == '\\') { //' escaped = true; continue; } token += current; } return root; }