#include "grim.h" #include #include #include #include #include #include #include #include std::unordered_map parseAttributes(std::string_view token) { std::unordered_map attrs; bool inQuote = false; bool escaped = false; char quoteType = '"'; std::string word; for (unsigned short i = 0; i < token.length(); i++) { char current = token[i]; if ((current == '"' || current == '\'') && !escaped) { if (inQuote && current == quoteType) { inQuote = false; } else if (!inQuote) { inQuote = true; quoteType = current; } } if (current == '\\') { //' escaped = true; continue; } if ((!std::isspace(current) || inQuote) && i != token.length()-1) { word += current; } else { if (i == token.length()-1) { word += current; } std::string name = ""; std::string value = ""; bool setValue = false; for (auto c : word) { if (c == '=') { setValue = true; continue; } if (setValue) { value += c; } else { name += c; } } word = ""; std::string trimmedName = ""; for (auto c : name) { if (!std::isspace(c)) { trimmedName += c; } } if (attrs.size() == 0) { value = '"'+trimmedName+'"'; trimmedName = "tagName"; } std::string trimmedValue = ""; int sliceStart = 0; int sliceEnd = value.length(); if (value.length() >= 2) { for (unsigned short t = 0; t < value.length(); t++) { if (!std::isspace(value[t])) { sliceStart = t; break; } } for (int t = value.length()-1; t >= 0; t--) { // Trim the trailing / on self closing elements if there isn't a space inbetween if (!std::isspace(value[t]) && value[t] != '/') { sliceEnd = t; break; } } // Add and subtract 1 from each side to remove quotes for (unsigned short t = sliceStart+1; t < sliceEnd; t++) { trimmedValue += value[t]; } } attrs[trimmedName] = trimmedValue; } } return attrs; } std::unique_ptr parseHTML(std::istream& inputStream) { std::unique_ptr root = std::make_unique(); root->setTagName("root"); Node* currentNode = root.get(); bool inTag = false; bool escaped = false; bool inQuote = false; bool inComment = false; bool waitToClose = false; char quoteType = '"'; std::string token; char current; while (inputStream.get(current)) { // Finds the --> and removes it then resets inComment if (inComment) { // added the peek to prevent hitting on every - if (current == '-' && inputStream.peek() == '-') { char a,b; // load the next two if (inputStream.get(a) && inputStream.get(b)) { // We know a == - if (b == '>') { // Close the comment inComment = false; } } if (!inComment) { // we don't put anything back and that puts us where we need to be continue; } else { // Not the end inputStream.putback(b); inputStream.putback(a); } } continue; } std::string tagName = currentNode->getTagName(); if (!waitToClose && ( tagName == "style" || tagName == "script" || tagName == "textarea" ) && current == '<' && inputStream.peek() == '/' ) { // If its the starting of a closing tag and we are inside of a special tag // we need to see if the upcoming closing tag is the correct closing tag // if it is then we put the read ahead back and waitToClose. This will let normal parsing // continue const size_t readTo = tagName.length()+2; std::string buffer(readTo, '\0'); inputStream.read(&buffer[0], readTo); if (buffer == "/"+tagName+">") { waitToClose = true; } else { token += current; } for (int i = readTo-1; i>= 0; i--) { inputStream.putback(buffer[i]); } if (!waitToClose) { continue; } } else if (!waitToClose && ( tagName == "style" || tagName == "script" || tagName == "textarea" )) { token += current; continue; } if (inTag) { if ((current == '"' || current == '\'') && !escaped) { if (inQuote && current == quoteType) { inQuote = false; } else if (!inQuote) { inQuote = true; quoteType = current; } } if (current == '>' && !inQuote && !escaped) { inTag = false; waitToClose = false; // Even if the next tag is next still add a text tag as we can just check if its empty and remove it // this check would still have to be done either way so we aren't wasting anything if (token.length() > 0) { bool empty = true; bool closingTag = false; for (size_t i = 0; i < token.length(); i++) { auto c = token[i]; if (std::isspace(c)) { continue; } else if (c == '/') { closingTag = true; } else { empty = false; break; } } bool selfClosing = false; int selfClosingPosition = token.length()-1; if (!closingTag) { for (int i = token.length()-1; i >= 0; i--) { auto c = token[i]; if (std::isspace(c)) { continue; } else if (c == '/') { selfClosingPosition = i; selfClosing = true; break; } else { break; } } } if (!empty) { if (selfClosing) { //Create element and don't jump inside // Use node instead of currentNode because we do not need to jump into the created element auto attrs = parseAttributes(token.substr(0, selfClosingPosition)); auto node = currentNode->createElement(attrs["tagName"]); for (auto const& pair : attrs) { // All attributes are stored as strings so we can just throw them in node->setAttribute(pair.first, pair.second); } } else if (closingTag) { // Checksum and tree move std::string tagName = ""; for (auto t : token) { if (!std::isspace(t) && t != '/') { tagName += t; } else if (t == '/') { // For closing tags we just want the name continue; } else if (tagName.length() > 0) { break; } } if (currentNode->getTagName() == tagName) { currentNode = currentNode->parent; } else { std::cerr << "malformed html: closing tag () found for <" << currentNode->getTagName() << ">" << std::endl; } } else { // Create a element and jump inside auto attrs = parseAttributes(token); // Don't add elements like if (attrs["tagName"][0] != '!') { currentNode = currentNode->createElement(attrs["tagName"]); for (auto const& pair : attrs) { // All attributes are stored as strings so we can just throw them in currentNode->setAttribute(pair.first, pair.second); } } } } } token = ""; continue; } } else if (!escaped && current == '<') { // if the next charector is a ! if (inputStream.peek() == '!') { char a,b,c; // load the next three charecters (includes !) if (inputStream.get(a) && inputStream.get(b) && inputStream.get(c)) { // We know a == ! if (b == '-' && c == '-') { inComment = true; } } if (inComment) { continue; } else { // Not a comment add all back inputStream.putback(c); inputStream.putback(b); inputStream.putback(a); } } inTag = true; // Heres where you actually make the text node and above the real nodes // can also prob make the vector of tokens a single variable // it really just needs to be a data string bc we know the type based on inTag bool hasText = false; for (auto t : token) { if (!std::isspace(t)) { hasText = true; break; } } // TODO: Add a vector that stores inner text and when elements start to close out pop them but add all to their innerText // | Will need something simular for innerHTML if (hasText) { auto node = currentNode->createElement("text"); node->setAttribute("innerText", token); } token = ""; continue; } escaped = false; if (current == '\\') { //' escaped = true; continue; } token += current; } return root; } void typeCheck(std::string key, std::string value, Unit* property, std::vector allowedTypes) { for (size_t i = 0; i < property->children.size(); i++) { bool allowed = false; for (size_t j = 0; j < allowedTypes.size(); j++) { if (property->children[i].type == allowedTypes[j]) { allowed = true; break; } } if (!allowed) { std::cerr << "Invalid value of property: " << key << " Value: " << value << std::endl; std::exit(10); } } } struct SuffixParse { std::string_view suffix; UnitType type; int trim; // chars to strip from the right }; static constexpr SuffixParse table[] = { {"cqmin", UnitType::CQMIN, 5}, {"cqmax", UnitType::CQMAX, 5}, {"rcap", UnitType::RCAP, 4}, {"vmax", UnitType::VMAX, 4}, {"vmin", UnitType::VMIN, 4}, {"cap", UnitType::CAP, 3}, {"rch", UnitType::RCH, 3}, {"rem", UnitType::REM, 3}, {"rex", UnitType::REX, 3}, {"ric", UnitType::RIC, 3}, {"rlh", UnitType::RLH, 3}, {"cqw", UnitType::CQW, 3}, {"cqh", UnitType::CQH, 3}, {"cqi", UnitType::CQI, 3}, {"cqb", UnitType::CQB, 3}, {"px", UnitType::PX, 2}, {"em", UnitType::EM, 2}, {"cm", UnitType::CM, 2}, {"ch", UnitType::CH, 2}, {"ex", UnitType::EX, 2}, {"ic", UnitType::IC, 2}, {"lh", UnitType::LH, 2}, {"vh", UnitType::VH, 2}, {"vw", UnitType::VW, 2}, {"vb", UnitType::VB, 2}, {"vi", UnitType::VI, 2}, {"mm", UnitType::MM, 2}, {"in", UnitType::IN, 2}, {"pc", UnitType::PC, 2}, {"pt", UnitType::PT, 2}, {"%", UnitType::PERCENT,1}, {"q", UnitType::Q, 1}, }; std::vector getUnit(std::string value) { std::vector units; std::vector children; std::string buffer = ""; std::string parenthesesesBuffer = ""; bool inParentheses = false; bool inQuotes = false; // Needs to not be empty but can't be a quote char quoteType = 'a'; bool escaped = false; for (size_t i = 0; i < value.length(); i++) { char v = value[i]; if (v == '\\' && !escaped) escaped = true; if (inParentheses) { parenthesesesBuffer.push_back(v); } // The way this is setup to parse func() calls is like so // | calc(1px, 2px); // | -> value of 1px, 2px get's put into getUnit() // | once those are pulled into children // | the buffer only contains calc // | add the children to the parsed keyword if (v == '(' && !inParentheses && !inQuotes) inParentheses = true; else if (v == ')' && inParentheses && !inQuotes) { children = getUnit(parenthesesesBuffer); inParentheses = false; parenthesesesBuffer.clear(); } else if ((v == '"' || v == '\'') && !inParentheses) { if (inQuotes && v == quoteType) { inQuotes = false; } else if (!inQuotes) { quoteType = v; inQuotes = true; } } else if ((!inQuotes && !inParentheses && (v == ',' || std::isspace(v))) || i >= value.length()-1) { if (buffer.length() == 0) continue; if (v != ',' && !std::isspace(v)) buffer.push_back(v); Unit unit; // Split by spaces into parts bool found = false; for (const auto& [suffix, type, trim] : table) { if (buffer.ends_with(suffix)) { unit.value.FLOAT = std::stof(std::string(buffer.substr(0, buffer.size() - trim))); unit.type = type; found = true; break; } } if (!found) { if (buffer.starts_with("#")) { unit.type = UnitType::HEX; unit.value.UINT = std::stoul(buffer.substr(1), nullptr, 16); found = true; } else if (buffer == "xx-small" || buffer == "x-small" || buffer == "small" || buffer == "medium" || buffer == "large" || buffer == "x-large" || buffer == "xx-large" || buffer == "xxx-large" ) { // read as a EM unit.type = UnitType::EM; if (buffer == "xx-small") unit.value.FLOAT = 0.6; else if (buffer == "x-small") unit.value.FLOAT = 0.75; else if (buffer == "small") unit.value.FLOAT = 0.89; else if (buffer == "medium") unit.value.FLOAT = 1; else if (buffer == "large") unit.value.FLOAT = 1.2; else if (buffer == "x-large") unit.value.FLOAT = 1.5; else if (buffer == "xx-large") unit.value.FLOAT = 2; else if (buffer == "xxx-large") unit.value.FLOAT = 3; found = true; } else if (buffer.ends_with("baseline")) { unit.type = UnitType::BASELINE; std::cout << buffer << std::endl; if (buffer.starts_with("first")) unit.value.ENUM = UnitType::FIRST; else if (buffer.starts_with("last")) unit.value.ENUM = UnitType::LAST; else unit.value.ENUM = UnitType::FIRST; found = true; } else if (buffer.ends_with("-box")) { unit.type = UnitType::BOX_EDGE; if (buffer.starts_with("content")) unit.value.ENUM = UnitType::CONTENT; else if (buffer.starts_with("padding")) unit.value.ENUM = UnitType::PADDING; else if (buffer.starts_with("border")) unit.value.ENUM = UnitType::BORDER; else if (buffer.starts_with("margin")) unit.value.ENUM = UnitType::MARGIN; else if (buffer.starts_with("fill")) unit.value.ENUM = UnitType::FILL; else if (buffer.starts_with("stroke")) unit.value.ENUM = UnitType::STROKE; else if (buffer.starts_with("view")) unit.value.ENUM = UnitType::VIEW; found = true; } } if (found) { // Only process if its found if (children.size() > 0) { unit.children = children; children.clear(); } units.push_back(unit); buffer.clear(); } } else { buffer.push_back(v); } escaped = false; if (v == '\\') escaped = true; } return units; } void StyleHandler::parseStream(std::istream& inputStream) { // nowhitespace: value(can have whitespace); = property // a selector name is anything before a { up to a ; or a } Sheet* root = &window->CSS.root; Sheet* current = root; // how to handle @import std::string buffer; bool incomment = false; char ch; while (inputStream.get(ch)) { if (ch == '*' && inputStream.peek() == '/') { incomment = false; // Skip ahead to the next letter so we don't caputure the trailing / inputStream.get(ch); continue; } if (ch == '/' && inputStream.peek() == '*') { incomment = true; } if (incomment) { continue; } if (ch == '{') { for (size_t i = buffer.length()-1; i >= 0; i--) { if (std::isspace(buffer[i])) { buffer.pop_back(); } else { break; } } // Parse the selector into its parts std::vector> parts = parseSelectorParts(buffer); if (current->selector.size() > 0) { // To be able to handle a subsutution with a parent that has h1, h2 // we need to rebuild the selector so we can add new copies std::vector> subd; // Look for any & if its a style and replace it with the parsed parent selector for (size_t i = 0; i < parts.size(); i++) { bool hasAmp = false; for (size_t e = 0; e < parts[i].size(); e++) { if (parts[i][e] == "&") { hasAmp = true; break; } } if (hasAmp) { // If it contains a amp then we need replace the amp with each parent selector // Inject each part of the parent // Parent [[h1],[h2]] // Child: [[& .class, &, &]] // Result: [[h1 .class, h1, h1]] // [[h2 .class, h2, h2]] for (size_t e = 0; e < current->selector.size(); e++) { std::vector temp; for (size_t f = 0; f < parts[i].size(); f++) { if (parts[i][f] == "&") { for (auto p : current->selector[e]) { temp.push_back(p); } } else { temp.push_back(parts[i][f]); } } subd.push_back(temp); } } else { // If theres nothing to replace just add to back subd.push_back(parts[i]); } } parts = subd; } // Find who the parent is Sheet newSheet; newSheet.parent = current; newSheet.selector = parts; current->children.push_back(newSheet); current = ¤t->children.back(); // here we need to take the selector (buffer) // and find out what type it is. Also need to manage the nesting if (parts[0][0][0] == '@') { if (parts[0][0] == "@container") { current->type = SheetType::CONTAINER; bool hasName = false; if (parts[0].size() >= 3) { // @container not (width < 400px) if (parts[0][2] != "not") { // @container (width > 400px) or|and (height > 400px) if (parts[0].size() >= 5) { if (parts[0][5] != "and" || parts[0][5] != "or") { current->name = parts[0][2]; hasName = true; } } } } // If a name is present then remove the name and tag from the selector // else just remove the @container if (hasName) { current->selector[0] = std::vector(current->selector[0].begin() + 3, current->selector[0].end()); } else { current->selector[0] = std::vector(current->selector[0].begin() + 1, current->selector[0].end()); } } else if (parts[0][0] == "@counter-style") { current->name = parts[0][2]; current->type = SheetType::COUNTER; } else if (parts[0][0] == "@font-face") { current->type = SheetType::FONTFACE; } else if (parts[0][0] == "@font-feature") { current->type = SheetType::FONTFEATURE; } else if (parts[0][0] == "@swash") { current->type = SheetType::SWASH; } else if (parts[0][0] == "@annotation") { current->type = SheetType::ANNOTATION; } else if (parts[0][0] == "@ornaments") { current->type = SheetType::ORNAMENTS; } else if (parts[0][0] == "@stylistic") { current->type = SheetType::STYLISTIC; } else if (parts[0][0] == "@styleset") { current->type = SheetType::STYLESET; } else if (parts[0][0] == "@character-varient") { current->type = SheetType::CHARACTERVARIANT; } else if (parts[0][0] == "@font-palette-values") { current->type = SheetType::FONTPALETTEVALUES; current->name = parts[0][2]; } else if (parts[0][0] == "@keyframes") { current->type = SheetType::KEYFRAMES; current->name = parts[0][2]; } else if (parts[0][0] == "@layer") { current->type = SheetType::LAYER; } else if (parts[0][0] == "@media") { current->type = SheetType::MEDIA; } else if (parts[0][0] == "@property") { current->type = SheetType::PROPERTY; current->name = parts[0][2]; } else if (parts[0][0] == "@starting-style") { current->type = SheetType::STARTINGSTYLE; } } else { current->type = SheetType::STYLE; } buffer.clear(); } else if (ch == '}') { // Calculate the basemap (more info in grim.h) std::vector targetParts = current->selector[current->selector.size()-1]; for (auto p : targetParts) { if (current->parent->basemap.find(p) != current->parent->basemap.end()) { current->parent->basemap[p].push_back(window->CSS.index); } else { std::vector bm = {window->CSS.index}; current->parent->basemap[p] = bm; } } window->CSS.index++; if (current->parent != nullptr) { current = current->parent; } buffer.clear(); } else if (ch == ';') { if (buffer[0] == '@') { // Single line at-rule Sheet inlineRule; std::vector> parts = parseSelectorParts(buffer); inlineRule.selector = parts; if (parts[0][0] == "@import") { inlineRule.path = parts[0][2]; if (parts[0].size() > 3) { // See if the last word ends in ) // if not then its the name of the layer if (parts[0].back().back() != ')') { inlineRule.name = parts[0].back(); } } } else if (parts[0][0] == "@layer") { // ISSUE: need to add speficity } current->children.push_back(inlineRule); } else { // end of a style property std::string key; std::string value; bool keyFound = false; bool firstValue = false; for (size_t i = 0; i < buffer.length(); i++) { char c = buffer[i]; if (!keyFound && c == ':') { keyFound = true; } else if (!std::isspace(c) && !keyFound) { key += c; } else if (!firstValue && keyFound && !std::isspace(c)) { firstValue = true; value += c; } else if (keyFound && firstValue && !(std::isspace(c) && i < buffer.length()-1 && std::isspace(buffer[i+1]))) { // if the key is found and the charactor isn't a ':' // also (if c is a space and the next c will be a space) // this is to auto trim duplicate spaces from the value value += c; } } if (key.length() > 2 && key[0] == '-' && key[1] == '-') { current->variables[key] = value; } else { current->properties[key] = value; } } buffer.clear(); } else { buffer.push_back(ch); } } } std::unordered_map parseCSSInline(std::string inlineStyles) { std::unordered_map props; std::string buffer; for (size_t i = 0; i < inlineStyles.length(); i++) { char ch = inlineStyles[i]; if (ch == ';' || i >= inlineStyles.length()-1) { // If this is a ';' then it will be removed in the last else if statement buffer.push_back(ch); std::string key; std::string value; bool keyFound = false; for (size_t i = 0; i < buffer.length(); i++) { char c = buffer[i]; if (!keyFound && c == ':') { keyFound = true; } else if (!std::isspace(c) && !keyFound) { key += c; } else if (keyFound && !(std::isspace(c) && i < buffer.length()-1 && std::isspace(buffer[i+1])) && c != ';' ) { // if the key is found and the charactor isn't a ':' // also (if c is a space and the next c will be a space) // this is to auto trim duplicate spaces from the value value += c; } } props[key] = value; buffer.clear(); } else { buffer.push_back(ch); } } return props; }