I was a bit bored and compared various ways to read a file in c++ under Windows. The test was to read kissassist.mac (552k) 100 times, count the number of bytes, spaces and lines and take the time.
Source
Results
I'm pretty pleased that the 2 idiomatic c++ ways of reading give good performance at least with VS2017. iostreams used to have a bad rep performance wise. There is almost no error handling, so don't copy paste this into your next mission critical project.
Edit: Refactored and added a 7th way: win-api. This is realy unbufferd byte for byte and ridiculously slow, ~50s.
Source
Rich (BB code):
#include <cstdio>
#include <algorithm>
#include <iostream>
#include <fstream>
#include <chrono>
#include <filesystem>
#include <windows.h>
using namespace std;
using namespace std::experimental::filesystem::v1;
class Timer
{
chrono::high_resolution_clock::time_point start_time;
public:
Timer() noexcept : start_time{chrono::high_resolution_clock::now()} {}
void reset() noexcept {start_time = chrono::high_resolution_clock::now();}
auto duration_ns() const noexcept {return (chrono::high_resolution_clock::now() - start_time);}
auto duration_s() const noexcept {return chrono::duration<double>(duration_ns()).count();}
};
struct Counters
{
long bytes{ 0 };
long lines{ 0 };
long spaces{ 0 };
Counters& operator+=(const Counters &other) noexcept
{
bytes += other.bytes;
lines += other.lines;
spaces += other.spaces;
return *this;
}
void count(const int c) noexcept
{
bytes++;
switch (c)
{
case '\n':
lines++;
break;
case ' ':
case '\t':
spaces++;
break;
}
}
};
ostream& operator<< (ostream& os, const Counters& counter)
{
os << "Bytes:" << counter.bytes << " Spaces:" << counter.spaces << " Lines:" << counter.lines;
return os;
}
static auto read_file1(const char *filename) // c-style
{
Counters counters;
FILE *fp = nullptr;
fopen_s(&fp, filename, "r");
int c;
while((c = fgetc(fp)) != EOF)
counters.count(c);
fclose(fp);
return counters;
}
static auto read_file2(const char *filename) // extractor-style
{
Counters counters;
auto is = ifstream(filename, ifstream::binary);
is >> noskipws;
while (is.good())
{
char c;
is >> c;
counters.count(c);
}
return counters;
}
static auto read_file3(const char *filename) // get()-style
{
Counters counters;
auto is = ifstream(filename, ifstream::binary);
int c;
while ((c = is.get()) != EOF)
counters.count(c);
return counters;
}
static auto read_file4(const char *filename) // iterator-style
{
Counters counters;
auto is = ifstream(filename, ifstream::binary);
constexpr istreambuf_iterator<char> eof;
istreambuf_iterator<char> it(is);
while (it != eof)
{
const auto c = *it++;
counters.count(c);
}
return counters;
}
static auto read_file5(const char *filename) // lambda-style
{
Counters counters;
auto is = ifstream(filename, ifstream::binary);
constexpr istreambuf_iterator<char> eof;
istreambuf_iterator<char> it(is);
for_each(it,eof,[&counters](const auto c)
{counters.count(c);});
return counters;
}
static auto read_file6(const char *filename) // memory mapped-style
{
Counters counters;
auto fh = CreateFile(filename, GENERIC_READ, 0, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL|FILE_FLAG_SEQUENTIAL_SCAN, NULL);
auto fm = CreateFileMapping(fh, nullptr, PAGE_READONLY, 0, 0, nullptr);
const char* const base = (const char* const)MapViewOfFile(fm, FILE_MAP_READ, 0,0,0);
LARGE_INTEGER li{ 0 };
GetFileSizeEx(fh, &li);
for (decltype(li.QuadPart) i = 0; i < li.QuadPart; i++)
{
const auto c = base;
counters.count(c);
}
UnmapViewOfFile(base);
CloseHandle(fm);
CloseHandle(fh);
return counters;
}
static auto read_file7(const char *filename) // win api-style
{
Counters counters;
auto fh = CreateFile(filename, GENERIC_READ, 0, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, NULL);
SetFilePointer(fh, 0, nullptr, FILE_BEGIN);
char c = '\0';
DWORD bytes_read = 0;
while (ReadFile(fh, &c, 1, &bytes_read, nullptr) && bytes_read)
counters.count(c);
CloseHandle(fh);
return counters;
}
static void time_it(const char *drive_type, const char* filename, decltype(read_file1)*read_function)
{
constexpr auto loop_count = 100;
Counters counters;
Timer t;
for (auto i = 0; i < loop_count; i++)
counters += read_function(filename);
auto duration = t.duration_s();
cout << drive_type << counters << " s:" << duration << '\n';
}
static void time_it(const char* title, decltype(read_file1)*read_function)
{
const path ssd_file{ "C:\\Users\\User\\Desktop\\MQ2\\Release\\Macros\\kissassist.mac" };
if (!is_regular_file(ssd_file))
{
cerr << ssd_file << ": isn't a regular file\n";
return;
}
const path hdd_file{ "K:\\kissassist.mac" };
if (!is_regular_file(hdd_file))
{
cerr << hdd_file << ": isn't a regular file\n";
return;
}
cout << title << '\n';
time_it("SSD: ", ssd_file.string().c_str(), read_function);
time_it("HDD: ", hdd_file.string().c_str(), read_function);
}
int main()
{
time_it("C-style", read_file1);
time_it("C++ extractor-style", read_file2);
time_it("C++ get()-style", read_file3);
time_it("C++ iterator-style", read_file4);
time_it("C++ lambda-style", read_file5);
time_it("win api-style", read_file7);
time_it("memory mapped-style", read_file6);
getchar();
return (0);
}
Results
Rich (BB code):
C-style
SSD: Bytes:55200000 Spaces:15815200 Lines:918800 s:1.52117
HDD: Bytes:55200000 Spaces:15815200 Lines:918800 s:1.51888
C++ extractor-style
SSD: Bytes:55200100 Spaces:15815200 Lines:918800 s:1.76182
HDD: Bytes:55200100 Spaces:15815200 Lines:918800 s:1.75876
C++ get()-style
SSD: Bytes:55200000 Spaces:15815200 Lines:918800 s:2.47827
HDD: Bytes:55200000 Spaces:15815200 Lines:918800 s:2.47817
C++ iterator-style
SSD: Bytes:55200000 Spaces:15815200 Lines:918800 s:0.50451
HDD: Bytes:55200000 Spaces:15815200 Lines:918800 s:0.503831
C++ lambda-style
SSD: Bytes:55200000 Spaces:15815200 Lines:918800 s:0.347344
HDD: Bytes:55200000 Spaces:15815200 Lines:918800 s:0.345406
memory mapped-style
SSD: Bytes:55200000 Spaces:15815200 Lines:918800 s:0.0970198
HDD: Bytes:55200000 Spaces:15815200 Lines:918800 s:0.0966426
Edit: Refactored and added a 7th way: win-api. This is realy unbufferd byte for byte and ridiculously slow, ~50s.
Last edited: