Files
gitlabhq/spec/scripts/sql_fingerprint_extractor_spec.rb
2025-04-28 15:09:57 +00:00

195 lines
7.2 KiB
Ruby

# frozen_string_literal: true
require 'fast_spec_helper'
require_relative '../../scripts/sql_fingerprint_extractor'
RSpec.describe SQLFingerprintExtractor, feature_category: :tooling do
let(:logger) { instance_double(Logger, info: nil, warn: nil, error: nil) }
let(:extractor) { described_class.new(logger) }
describe '#initialize' do
it 'uses the provided logger' do
expect(extractor.logger).to eq(logger)
end
it 'creates a default logger if none provided' do
allow(Logger).to receive(:new).with($stdout).and_call_original
expect(described_class.new.logger).to be_a(Logger)
end
end
describe '#extract_queries_from_file' do
context 'with a regular text file' do
let(:file_path) { 'test_queries.ndjson' }
let(:valid_line) { '{"fingerprint":"def123","normalized":"SELECT * FROM accounts"}' }
let(:second_valid_line) { '{"fingerprint":"abc123","normalized":"SELECT * FROM users"}' }
let(:invalid_line) { 'invalid json' }
let(:no_fingerprint) { '{"normalized":"SELECT * FROM users"}' }
before do
allow(File).to receive(:foreach)
.with(file_path)
.and_yield(valid_line)
.and_yield(invalid_line)
.and_yield(no_fingerprint)
.and_yield(second_valid_line)
end
it 'extracts valid queries with fingerprints' do
queries = extractor.extract_queries_from_file(file_path)
expect(queries.size).to eq(2)
expect(queries.first['fingerprint']).to eq('def123')
expect(queries.first['normalized']).to eq('SELECT * FROM accounts')
expect(queries.second['fingerprint']).to eq('abc123')
expect(queries.second['normalized']).to eq('SELECT * FROM users')
end
it 'logs the extraction process' do
expect(logger).to receive(:info).with("Extracting queries from file: #{file_path}")
expect(logger).to receive(:info).with(/Extracted \d+ queries from file:/)
extractor.extract_queries_from_file(file_path)
end
end
context 'with a gzipped file' do
let(:gz_file_path) { 'test_queries.ndjson.gz' }
let(:gz_reader) { instance_double(Zlib::GzipReader) }
let(:valid_line) { '{"fingerprint":"def456","normalized":"SELECT * FROM posts"}' }
before do
allow(Zlib::GzipReader).to receive(:open).with(gz_file_path).and_yield(gz_reader)
allow(gz_reader).to receive(:each_line).and_yield(valid_line)
end
it 'extracts queries from a gzipped file' do
queries = extractor.extract_queries_from_file(gz_file_path)
expect(queries.size).to eq(1)
expect(queries.first['fingerprint']).to eq('def456')
end
end
context 'when an error occurs' do
let(:file_path) { 'nonexistent_file.ndjson' }
before do
allow(File).to receive(:foreach).with(file_path).and_raise(StandardError.new('File read error'))
end
it 'logs the error and returns an empty array' do
expect(logger).to receive(:warn).with("Warning: Error reading file: File read error")
queries = extractor.extract_queries_from_file(file_path)
expect(queries).to be_empty
end
end
end
describe '#extract_fingerprints_from_file' do
let(:file_path) { 'test_queries.ndjson' }
let(:queries) { [{ 'fingerprint' => 'abc123' }, { 'fingerprint' => 'def456' }, {}] }
before do
allow(extractor).to receive(:extract_queries_from_file).with(file_path).and_return(queries)
end
it 'extracts only the fingerprints as a Set' do
fingerprints = extractor.extract_fingerprints_from_file(file_path)
expect(fingerprints).to be_a(Set)
expect(fingerprints.size).to eq(2)
expect(fingerprints).to include('abc123', 'def456')
end
end
describe '#extract_from_tar_gz' do
let(:tar_gz_content) { 'mock_tar_gz_content' }
let(:string_io) { instance_double(StringIO) }
let(:gzip_reader) { instance_double(Zlib::GzipReader) }
let(:tar_reader) { instance_double(Gem::Package::TarReader) }
let(:entry) { instance_double(Gem::Package::TarReader::Entry, file?: true, directory?: false) }
let(:entry_header) { instance_double(Gem::Package::TarHeader, size: 1000) }
let(:entry_content) { "fingerprint1\nfingerprint2\n" }
before do
allow(StringIO).to receive(:new).with(tar_gz_content).and_return(string_io)
allow(Zlib::GzipReader).to receive(:new).with(string_io).and_return(gzip_reader)
allow(Gem::Package::TarReader).to receive(:new).with(gzip_reader).and_return(tar_reader)
allow(tar_reader).to receive(:each).and_yield(entry)
allow(entry).to receive_messages(header: entry_header, read: entry_content)
end
it 'extracts fingerprints from tar.gz content' do
fingerprints = extractor.extract_from_tar_gz(tar_gz_content)
expect(fingerprints).to be_a(Set)
expect(fingerprints.size).to eq(2)
expect(fingerprints).to include('fingerprint1', 'fingerprint2')
end
context 'when file is too large' do
let(:max_size_mb) { 0.001 } # 1KB max
let(:entry_header) { instance_double(Gem::Package::TarHeader, size: 2000) } # 2KB file size
it 'logs the error and returns empty set' do
expect(logger).to receive(:error).with(/File too large:/)
fingerprints = extractor.extract_from_tar_gz(tar_gz_content, max_size_mb)
expect(fingerprints).to be_a(Set)
expect(fingerprints).to be_empty
end
end
context 'when an error occurs' do
before do
allow(StringIO).to receive(:new).with(tar_gz_content).and_raise(StandardError.new('Tar.gz processing error'))
end
it 'logs the error and returns empty set' do
expect(logger).to receive(:error).with("Error processing tar.gz: Tar.gz processing error")
fingerprints = extractor.extract_from_tar_gz(tar_gz_content)
expect(fingerprints).to be_a(Set)
expect(fingerprints).to be_empty
end
end
end
describe '#write_fingerprints_to_file' do
let(:fingerprints) { Set.new(%w[abc123 def456]) }
let(:output_file) { 'output_fingerprints.txt' }
let(:file) { instance_double(File) }
before do
allow(File).to receive(:open).with(output_file, 'w').and_yield(file)
allow(file).to receive(:puts)
end
it 'writes each fingerprint to the file' do
expect(file).to receive(:puts).with('abc123')
expect(file).to receive(:puts).with('def456')
extractor.write_fingerprints_to_file(fingerprints, output_file)
end
it 'logs the number of fingerprints written' do
expect(logger).to receive(:info).with("Wrote 2 fingerprints to output_fingerprints.txt")
extractor.write_fingerprints_to_file(fingerprints, output_file)
end
end
describe '#process_json_line' do
let(:queries) { [] }
it 'adds valid queries with fingerprints' do
extractor.send(:process_json_line, '{"fingerprint":"abc123"}', queries)
expect(queries.size).to eq(1)
expect(queries.first['fingerprint']).to eq('abc123')
end
it 'skips lines without fingerprints' do
extractor.send(:process_json_line, '{"other":"value"}', queries)
expect(queries).to be_empty
end
it 'handles JSON parse errors' do
extractor.send(:process_json_line, 'invalid json', queries)
expect(queries).to be_empty
end
end
end