aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLoic Guegan <manzerbredes@mailbox.org>2022-01-26 12:03:24 +0100
committerLoic Guegan <manzerbredes@mailbox.org>2022-01-26 12:03:24 +0100
commitbb914f047b0d96464f3e55234907df4e7c416e97 (patch)
tree82963c3a4c9ea272163365ddf3144bd46fe78e82
parent8a770f9133957ad4356810efa4ede6e20dd01d26 (diff)
- Enable NAG parsing
- Add the LargeFileStream interface to handle large file - Debug parsing
-rw-r--r--CMakeLists.txt3
-rw-r--r--README.md4
-rw-r--r--src/HalfMove.cpp14
-rw-r--r--src/HalfMove.hpp1
-rw-r--r--src/LargeFileStream.cpp63
-rw-r--r--src/LargeFileStream.hpp52
-rw-r--r--src/PGN.cpp116
-rw-r--r--src/PGN.hpp23
8 files changed, 205 insertions, 71 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2381751..f493c30 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10)
project(pgnp)
# Shared library
-add_library(pgnp SHARED src/PGN.cpp src/HalfMove.cpp)
+add_library(pgnp SHARED src/PGN.cpp src/HalfMove.cpp src/LargeFileStream.cpp)
# Includes
set(PGNP_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/includes) # For conveniance
@@ -10,6 +10,7 @@ set(PGNP_INCLUDE_DIR ${PGNP_INCLUDE_DIR} PARENT_SCOPE) # To be used by other pro
file(MAKE_DIRECTORY ${PGNP_INCLUDE_DIR})
configure_file(src/PGN.hpp ${PGNP_INCLUDE_DIR}/pgnp.hpp COPYONLY)
configure_file(src/HalfMove.hpp ${PGNP_INCLUDE_DIR} COPYONLY)
+configure_file(src/LargeFileStream.hpp ${PGNP_INCLUDE_DIR} COPYONLY)
include_directories(${PGNP_INCLUDE_DIR})
diff --git a/README.md b/README.md
index 99fbdb0..af417e3 100644
--- a/README.md
+++ b/README.md
@@ -4,8 +4,10 @@ PGNP is a Portable Game Notation (PGN) parser. More details about the
PGN specification can be found [here](https://www.chessclub.com/help/PGN-spec).
# Features
-- Basic PGN parsing (tags, move, comments, variations etc.)
+- Basic PGN parsing (tags, move, comments, variations, NAG, etc.)
- Merged PGN files parsing (several games in one file)
+- Handle very large file (severals GB)
+- Very efficient
# How to use it ?
PGNP can be used as a shared library in your project.
diff --git a/src/HalfMove.cpp b/src/HalfMove.cpp
index 290587a..7e94ef9 100644
--- a/src/HalfMove.cpp
+++ b/src/HalfMove.cpp
@@ -19,8 +19,8 @@ std::string HalfMove::NestedDump(HalfMove *m, int indent) {
ss << " "
<< " Move=" << m->move << " Count=" << m->count << " Comment=\""
<< m->comment << "\""
- << " IsBlack=" << m->isBlack << " Variations=" << m->variations.size()
- << std::endl;
+ << " NAG=" << m->NAG << " IsBlack=" << m->isBlack
+ << " Variations=" << m->variations.size() << std::endl;
for (auto *var : m->variations) {
ss << NestedDump(var, indent + 1);
@@ -65,15 +65,15 @@ void HalfMove::Copy(HalfMove *copy) {
}
HalfMove *HalfMove::GetHalfMoveAt(int distance) {
- HalfMove *tmp=this;
- while(distance>0){
- if(tmp==NULL){
+ HalfMove *tmp = this;
+ while (distance > 0) {
+ if (tmp == NULL) {
throw HalfMoveOutOfRange();
}
distance--;
- tmp=tmp->MainLine;
+ tmp = tmp->MainLine;
}
- return(tmp);
+ return (tmp);
}
} // namespace pgnp \ No newline at end of file
diff --git a/src/HalfMove.hpp b/src/HalfMove.hpp
index e049571..8edea93 100644
--- a/src/HalfMove.hpp
+++ b/src/HalfMove.hpp
@@ -21,6 +21,7 @@ public:
std::string move;
/// @brief Comment associated to the move
std::string comment;
+ std::string NAG;
/// @brief Next HalfMove link to this line
HalfMove *MainLine;
/// @brief Next HalfMove links to variation of this line
diff --git a/src/LargeFileStream.cpp b/src/LargeFileStream.cpp
new file mode 100644
index 0000000..547415b
--- /dev/null
+++ b/src/LargeFileStream.cpp
@@ -0,0 +1,63 @@
+#include "LargeFileStream.hpp"
+
+namespace pgnp {
+using namespace std;
+
+LargeFileStream::LargeFileStream()
+ : chuck_count(-1), last_read_size(0), last_loc(0), use_string(false),
+ eof(false) {}
+
+void LargeFileStream::FromFile(std::string filepath) {
+ file.open(filepath);
+ ReadNextChunk();
+}
+
+void LargeFileStream::FromString(std::string content) {
+ use_string = true;
+ this->content = content;
+}
+
+void LargeFileStream::ReadNextChunk() {
+ chuck_count++;
+ file.read(buffer, BUFFER_SIZE);
+ last_read_size = file.gcount();
+}
+
+char LargeFileStream::operator[](long loc) {
+ // Perform various checks
+ if (eof) {
+ throw ReadToFar();
+ }
+ if (loc < last_loc) {
+ throw BackwardRead();
+ }
+ last_loc = loc; // Keep track
+
+ // Shortcut the operator for string content
+ if (use_string) {
+ if (loc >= content.size()) {
+ eof = true;
+ }
+ return ('?');
+ }
+
+ // Goto the right memory chuck
+ long loc_chunk_count = loc / BUFFER_SIZE;
+ while (chuck_count < loc_chunk_count) {
+ ReadNextChunk();
+ }
+ long offset = loc - (loc_chunk_count * BUFFER_SIZE);
+
+ // Ensure for EOF
+ if (!file && offset >= last_read_size) {
+ eof = true;
+ return ('?');
+ }
+
+ // Return character
+ return buffer[offset];
+}
+
+bool LargeFileStream::IsEOF(long loc) { return (eof); }
+
+} // namespace pgnp \ No newline at end of file
diff --git a/src/LargeFileStream.hpp b/src/LargeFileStream.hpp
new file mode 100644
index 0000000..6ac87e8
--- /dev/null
+++ b/src/LargeFileStream.hpp
@@ -0,0 +1,52 @@
+#define BUFFER_SIZE (1024 * 1024 / 2)
+
+#include <fstream>
+#include <iostream>
+#include <string>
+
+namespace pgnp {
+using namespace std;
+
+class LargeFileStream {
+ /// @brief File to load
+ ifstream file;
+ /// @brief In memory buffer
+ char buffer[BUFFER_SIZE];
+ /// @brief Number of chuck read minus 1
+ long chuck_count;
+ /// @brief Number of byte read during the last file access
+ long last_read_size;
+ /// @brief Keep track of the file offset (to prevent backward read)
+ long last_loc;
+ /// @brief Use a string as file content
+ std::string content;
+ /// @brief Use to shortcut some methods
+ bool use_string;
+ /// @brief End Of File ?
+ bool eof;
+
+ /// @brief Load the next chuck of data from disk to memory
+ void ReadNextChunk();
+
+public:
+ LargeFileStream();
+ void FromFile(std::string filepath);
+ /// @brief Emulate file access with a string
+ void FromString(std::string content);
+ /// @brief Allow array like access to the file
+ char operator[](long loc);
+ /// @brief Check if we reach the EOF
+ bool IsEOF(long loc);
+
+ // Various Exceptions
+ struct BackwardRead : public std::exception {
+ const char *what() const throw() {
+ return "LargeFileStream cannot read backward";
+ }
+ };
+ struct ReadToFar : public std::exception {
+ const char *what() const throw() { return "You reach the end of the file"; }
+ };
+};
+
+} // namespace pgnp \ No newline at end of file
diff --git a/src/PGN.cpp b/src/PGN.cpp
index cd4bd96..953f225 100644
--- a/src/PGN.cpp
+++ b/src/PGN.cpp
@@ -7,7 +7,7 @@
#define IS_DIGIT(c) \
(c == '0' || c == '1' || c == '2' || c == '3' || c == '4' || c == '5' || \
c == '6' || c == '7' || c == '8' || c == '9')
-#define IS_EOF(loc) ((loc) >= pgn_content.size())
+#define IS_EOF(loc) (pgn_content.IsEOF(loc))
#define EOF_CHECK(loc) \
{ \
if (IS_EOF(loc)) \
@@ -25,17 +25,10 @@ PGN::~PGN() {
std::string PGN::GetResult() { return (result); }
-void PGN::FromFile(std::string filepath) {
- std::ifstream inFile;
- inFile.open(filepath);
- std::stringstream strStream;
- strStream << inFile.rdbuf();
-
- this->pgn_content = strStream.str();
-}
+void PGN::FromFile(std::string filepath) { pgn_content.FromFile(filepath); }
void PGN::FromString(std::string pgn_content) {
- this->pgn_content = pgn_content;
+ this->pgn_content.FromString(pgn_content);
}
void PGN::ParseNextGame() {
@@ -46,20 +39,26 @@ void PGN::ParseNextGame() {
result = "";
tagkeys.clear();
tags.clear();
-
moves = new HalfMove();
+
+ // Search for new game
+ if (IS_EOF(LastGameEndLoc)) {
+ throw NoGameFound();
+ }
int loc = NextNonBlank(LastGameEndLoc);
+
if (IS_EOF(loc)) {
throw NoGameFound();
}
+
+ // Parse game
while (!IS_EOF(loc)) {
char c = pgn_content[loc];
if (!IS_BLANK(c)) {
if (c == '[') {
loc = ParseNextTag(loc);
} else if (IS_DIGIT(c)) {
- loc = ParseHalfMove(loc, moves);
- LastGameEndLoc = loc + 1; // Next game start 1 char after the last one
+ LastGameEndLoc = ParseHalfMove(loc, moves);
break;
} else if (c == '{') {
loc = ParseComment(loc, moves);
@@ -68,6 +67,7 @@ void PGN::ParseNextGame() {
}
loc++;
}
+
if (result.size() <= 0) {
throw InvalidGameResult();
}
@@ -102,7 +102,7 @@ bool PGN::HasTag(std::string key) {
return (std::find(tags.begin(), tags.end(), key) != tags.end());
}
-int PGN::ParseComment(int loc, HalfMove *hm) {
+long PGN::ParseComment(long loc, HalfMove *hm) {
// Goto next char
loc = NextNonBlank(loc);
EOF_CHECK(loc);
@@ -123,42 +123,39 @@ int PGN::ParseComment(int loc, HalfMove *hm) {
return (loc);
}
-int PGN::ParseHalfMove(int loc, HalfMove *hm) {
+long PGN::ParseHalfMove(long loc, HalfMove *hm) {
// Goto next char
loc = NextNonBlank(loc);
EOF_CHECK(loc);
char c = pgn_content[loc];
// Check if we reach score entry (* or 1-0 or 0-1 or 1/2-1/2)
- if (!IS_EOF(loc + 1)) {
- char nc = pgn_content[loc + 1]; // Next c
- if ((IS_DIGIT(c) && nc == '-') or (IS_DIGIT(c) && nc == '/') or c == '*') {
- if (c == '*') {
- result = "*";
- } else if (nc == '-') {
- if (c == '1') {
- result = "1-0";
- loc += 2;
- } else {
- result = "0-1";
- loc += 2;
- }
- } else {
- result = "1/2-1/2";
- loc += 6;
- }
- return (loc);
- }
+ if (c == '*') {
+ result = "*";
+ return (loc + 1);
}
- // Parse (move number
+ // Parse move number and check if end of game
if (IS_DIGIT(c)) {
std::string move_nb;
+ char first_digit = c;
while (IS_DIGIT(c)) {
move_nb += c;
loc++;
c = pgn_content[loc];
EOF_CHECK(loc);
+ if (c == '/' || c == '-') {
+ if (c == '/') {
+ result = "1/2-1/2";
+ return (loc + 6);
+ } else if (first_digit == '1') {
+ result = "1-0";
+ return (loc + 2);
+ } else {
+ result = "0-1";
+ return (loc + 2);
+ }
+ }
}
hm->count = std::stoi(move_nb);
loc++;
@@ -172,10 +169,6 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
hm->isBlack = true;
}
- // Parse comment entries (various comment could appear during HalfMove
- // parsing)
- loc = ParseComment(loc, hm);
-
// Parse the HalfMove
loc = NextNonBlank(loc);
EOF_CHECK(loc);
@@ -189,13 +182,21 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
}
hm->move = move;
- // Parse comment
- loc = ParseComment(loc, hm);
-
- // Skip end of variation
- if (c == ')') {
+ // Check for NAG
+ loc = NextNonBlank(loc);
+ EOF_CHECK(loc);
+ c = pgn_content[loc];
+ if (c == '$') {
+ hm->NAG += c;
loc++;
- return (loc);
+ EOF_CHECK(loc);
+ c = pgn_content[loc];
+ while (IS_DIGIT(c)) {
+ hm->NAG += c;
+ loc++;
+ EOF_CHECK(loc);
+ c = pgn_content[loc];
+ }
}
// Parse comment
@@ -209,10 +210,19 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
loc = ParseHalfMove(loc, var);
hm->variations.push_back(var);
loc++; // Skip ')'
+ // Goto next var
+ loc = NextNonBlank(loc);
+ EOF_CHECK(loc);
+ c = pgn_content[loc];
}
- // Parse comment
- loc = ParseComment(loc, hm);
+ // Skip end of variation
+ loc = NextNonBlank(loc);
+ EOF_CHECK(loc);
+ c = pgn_content[loc];
+ if (c == ')') {
+ return (loc);
+ }
// Parse next HalfMove
loc = NextNonBlank(loc);
@@ -231,10 +241,10 @@ int PGN::ParseHalfMove(int loc, HalfMove *hm) {
return (loc);
}
-int PGN::ParseNextTag(int start_loc) {
+long PGN::ParseNextTag(long start_loc) {
// Parse key
std::string key;
- int keyloc = start_loc + 1;
+ long keyloc = start_loc + 1;
EOF_CHECK(keyloc);
char c = pgn_content[keyloc];
while (!IS_BLANK(c)) {
@@ -246,7 +256,7 @@ int PGN::ParseNextTag(int start_loc) {
// Parse value
std::string value;
- int valueloc = NextNonBlank(keyloc) + 1;
+ long valueloc = NextNonBlank(keyloc) + 1;
EOF_CHECK(keyloc);
c = pgn_content[valueloc];
while (c != '"' or IS_EOF(valueloc)) {
@@ -294,14 +304,16 @@ std::string PGN::Dump() {
return (ss.str());
}
-int PGN::NextNonBlank(int loc) {
+long PGN::NextNonBlank(long loc) {
char c = pgn_content[loc];
while (IS_BLANK(c)) {
loc++;
- if (IS_EOF(loc))
+ if (IS_EOF(loc)) {
return (loc);
+ }
c = pgn_content[loc];
}
+
return (loc);
}
diff --git a/src/PGN.hpp b/src/PGN.hpp
index c4e27d0..7e4d0e2 100644
--- a/src/PGN.hpp
+++ b/src/PGN.hpp
@@ -1,4 +1,5 @@
#include "HalfMove.hpp"
+#include "LargeFileStream.hpp"
#include <algorithm>
#include <exception>
#include <fstream>
@@ -17,9 +18,10 @@ private:
/// @brief Contains the parsed PGN moves
HalfMove *moves;
/// @brief Contains the PGN data
- std::string pgn_content;
- /// @brief Contains the location of the end of the last parsed game (1 PGN file may have multiple games)
- int LastGameEndLoc;
+ LargeFileStream pgn_content;
+ /// @brief Contains the location of the end of the last parsed game (1 PGN
+ /// file may have multiple games)
+ long LastGameEndLoc;
public:
PGN();
@@ -27,8 +29,9 @@ public:
void FromFile(std::string);
void FromString(std::string);
/**
- * Parse the next available game. Note that it raises a @a NoGameFound exception if no more game is available.
- * A call to this method flush all the last parsed game data. Be careful.
+ * Parse the next available game. Note that it raises a @a NoGameFound
+ * exception if no more game is available. A call to this method flush all the
+ * last parsed game data. Be careful.
*/
void ParseNextGame();
/// @brief Check if PGN contains a specific tag
@@ -49,13 +52,13 @@ public:
private:
/// @brief Populate @a tags with by parsing the one starting at location in
/// argument
- int ParseNextTag(int);
+ long ParseNextTag(long);
/// @brief Get the next non-blank char location starting from location in
/// argument
- int NextNonBlank(int);
+ long NextNonBlank(long);
/// @brief Parse a HalfMove at a specific location into @a pgn_content
- int ParseHalfMove(int, HalfMove *);
- int ParseComment(int,HalfMove *);
+ long ParseHalfMove(long, HalfMove *);
+ long ParseComment(long, HalfMove *);
};
struct UnexpectedEOF : public std::exception {
@@ -76,7 +79,7 @@ struct NoGameFound : public std::exception {
struct UnexpectedCharacter : public std::exception {
std::string msg;
- UnexpectedCharacter(char actual, char required, int loc) {
+ UnexpectedCharacter(char actual, char required, long loc) {
std::stringstream ss;
ss << "Expected \'" << required << "\' at location " << loc
<< " but read \'" << actual << "\'";