SwatDB
Loading...
Searching...
No Matches
hashjoin.h
Go to the documentation of this file.
1/*
2 * SwatDB
3 *
4 * @authors: See Contributors.doc for code contributors
5 *
6 * Copyright (c) 2020, 2025 Swarthmore College Computer Science Department
7 * Swarthmore PA, Professors Tia Newhall, Ameet Soni
8 */
9
10// @@@@ SWATDB_ASSIGNMENT: ADDTODODATE
11
12#ifndef _SWATDB_HASHJOIN_H_
13#define _SWATDB_HASHJOIN_H_
14
20#include <string>
21#include <vector>
22#include "swatdb_types.h"
23#include "join.h"
24
25class FileManager;
26class Catalog;
27class Schema;
28class File;
29class RelationFile;
30class HeapFileScanner;
32class HeapFile;
33class Record;
34class Data;
35class Key;
36
37// @@@@ SWATDB_ASSIGNMENT: NOTEMSG Do not modify this definition, you may add private helper methods only
38
42class HashJoin : public Join {
43
44 public:
45
68 HashJoin(FileId outer_id, FileId inner_id, FileId result_id,
69 std::vector<FieldId> outer_fields,
70 std::vector<FieldId> inner_fields,
71 std::uint32_t num_buckets,
72 std::string temp_path, Catalog *catalog, BufferManager *buf_mgr,
74
79
88
89 protected:
90
95
100
105 std::vector<HeapFile *> outer_partitions;
106
111 std::vector<HeapFile *> inner_partitions;
112
116 std::uint32_t num_buckets;
117
121 std::vector<std::pair<HeapPage *, PageId>> hash_table;
122
127 std::string temp_path;
128
129
130 //TODO replace
131 std::uint32_t result_num;
132
138 std::uint32_t hash1(Record *rec, bool is_outer);
139
146
147
156 void _firstHash(bool is_outer);
157
164
173 RecordId _part1(Record* record, bool is_outer,
174 BlockHeapFileScanner *scanner);
175
183 void cleanup();
184};
185
186#endif
Definition blockheapfilescanner.h:35
Definition bufmgr.h:121
Definition catalog.h:161
Definition data.h:26
Definition filemgr.h:32
Definition file.h:45
Definition hashjoin.h:42
~HashJoin()
Destructor for hashjoin class.
HashJoin(FileId outer_id, FileId inner_id, FileId result_id, std::vector< FieldId > outer_fields, std::vector< FieldId > inner_fields, std::uint32_t num_buckets, std::string temp_path, Catalog *catalog, BufferManager *buf_mgr, FileManager *file_mgr)
Constructor for the join operater using a hash join algorithm. Sets up the state for a single join op...
std::vector< std::pair< HeapPage *, PageId > > hash_table
Definition hashjoin.h:121
std::vector< HeapFile * > outer_partitions
Definition hashjoin.h:105
std::uint32_t hash1(Record *rec, bool is_outer)
Performs the first hash function on the inputted record.
RecordId _part1(Record *record, bool is_outer, BlockHeapFileScanner *scanner)
Performs the main looping functionality for first hash.
void _secondHash()
Performs the second hashing of each relation for the second step of hash join.
std::vector< HeapFile * > inner_partitions
Definition hashjoin.h:111
BufferManager * buf_mgr
Definition hashjoin.h:94
void cleanup()
Function that cleans up state and deletes all allocated memory. It will be called once by one thread ...
void _createTempFiles()
Performs the set up phase of hash join, creating temporary hashed files to be used by hash join algor...
void runOperation()
Performs the join operation using the hash join alogrithm.
FileManager * file_mgr
Definition hashjoin.h:99
void _firstHash(bool is_outer)
Performs the initial hashing of each relation for the first step of hash join.
std::string temp_path
Holds the path to which the temp files should be saved. "/local/" is recommended for performance.
Definition hashjoin.h:127
std::uint32_t num_buckets
Definition hashjoin.h:116
Definition heapfilescanner.h:39
Definition heapfile.h:76
Definition join.h:37
std::vector< FieldId > inner_fields
Definition join.h:83
std::vector< FieldId > outer_fields
Definition join.h:78
Definition key.h:38
Definition record.h:34
Definition relationfile.h:32
Definition schema.h:37
Definition swatdb_types.h:70
std::uint32_t FileId
Definition swatdb_types.h:33