Skip to content

Commit

Permalink
Count blank lines and comments for calculating line numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
kaaveland committed Jun 27, 2024
1 parent f48fa9d commit 6ff699e
Show file tree
Hide file tree
Showing 6 changed files with 242 additions and 66 deletions.
52 changes: 42 additions & 10 deletions eugene/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,14 @@
//!
//! THe library also provides syntax tree analysis for SQL scripts, so it can be used to
//! analyze migration scripts for potential issues before running them.
use std::collections::HashMap;

use postgres::{Client, NoTls, Transaction};
use std::collections::HashMap;

use crate::error::{ContextualError, InnerError};
use crate::script_discovery::ReadFrom;
use tracing::trace_transaction;

use crate::sqltext::sql_statements;
use crate::sqltext::sql_statements_with_line_no;
use crate::tracing::TxLockTracer;
use tracing::trace_transaction;

/// Static data for hints and lints, used to identify them in output or input.
pub mod hint_data;
Expand Down Expand Up @@ -189,27 +187,30 @@ pub fn perform_trace<'a, T: WithClient>(
ignored_hints: &'a [&'a str],
commit: bool,
) -> Result<TxLockTracer<'a>> {
let sql_statements = sql_statements(script.sql.as_str())?;
let all_concurrently = sql_statements.iter().all(sqltext::is_concurrently);
let sql_statements = sql_statements_with_line_no(script.sql.as_str())?;
let all_concurrently = sql_statements
.iter()
.map(|(_, s)| s)
.all(sqltext::is_concurrently);
if all_concurrently && commit {
connection_settings.with_client(|client| {
for s in sql_statements.iter() {
for (_, s) in sql_statements.iter() {
client.execute(*s, &[])?;
}
Ok(())
})?;

Ok(TxLockTracer::tracer_for_concurrently(
Some(script.name.clone()),
sql_statements.iter(),
sql_statements.iter().copied(),
ignored_hints,
))
} else {
connection_settings.in_transaction(commit, |conn| {
trace_transaction(
Some(script.name.clone()),
conn,
sql_statements.iter(),
sql_statements.iter().copied(),
ignored_hints,
)
})
Expand Down Expand Up @@ -272,3 +273,34 @@ pub fn generate_new_test_db() -> String {
.unwrap();
db_name
}

#[cfg(test)]
mod tests {

#[test]
fn lint_line_numbers_should_make_sense_ex2() {
let script = "ALTER TABLE foo ADD a text;
-- A comment
CREATE UNIQUE INDEX my_index ON foo (a);";

let report = super::lints::anon_lint(script).unwrap();
assert_eq!(report.statements[0].line_number, 1);
assert_eq!(report.statements[1].line_number, 5);
}

#[test]
fn lint_line_numbers_should_make_sense_ex1() {
let script = "ALTER TABLE
foo
ADD
a text;
CREATE UNIQUE INDEX
my_index ON foo (a);";
let report = super::lints::anon_lint(script).unwrap();
assert_eq!(report.statements[0].line_number, 1);
assert_eq!(report.statements[1].line_number, 6);
}
}
9 changes: 4 additions & 5 deletions eugene/src/lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use itertools::Itertools;
use crate::comments::filter_rules;
pub use crate::lints::ast::StatementSummary;
use crate::output::output_format::{LintReport, LintedStatement};
use crate::sqltext;

/// The `ast` module provides a way to describe a parsed SQL statement in a structured way,
/// using simpler trees than the ones provided by `pg_query`.
Expand Down Expand Up @@ -114,13 +115,12 @@ pub fn lint<S: AsRef<str>>(
ignored_lints: &[&str],
skip_summary: bool,
) -> crate::Result<LintReport> {
let statements = pg_query::split_with_parser(sql.as_ref())?;
let statements = sqltext::sql_statements_with_line_no(sql.as_ref())?;
let mut ctx = TransactionState::default();
let mut lints = Vec::new();
let mut no: usize = 1;
let mut line_number: usize = 1;
let mut passed_all = true;
for stmt in statements {
for (line, stmt) in statements {
let action = crate::comments::find_comment_action(sql.as_ref())?;
let tree = pg_query::parse(stmt)?;
for raw in tree.protobuf.stmts.iter() {
Expand All @@ -136,13 +136,12 @@ pub fn lint<S: AsRef<str>>(

lints.push(LintedStatement {
statement_number: no,
line_number,
line_number: line,
sql: stmt.trim().to_string(),
triggered_rules: matched_lints,
});
ctx.update_from(&summary);
no += 1;
line_number += stmt.chars().filter(|&c| c == '\n').count();
}
}
}
Expand Down
5 changes: 1 addition & 4 deletions eugene/src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ impl Settings {
struct OutputContext {
output_settings: Settings,
statement_number: usize,
line_number: usize,
held_locks_context: Vec<TracedLock>,
duration_millis_so_far: u64,
duration_millis_total: u64,
Expand Down Expand Up @@ -95,7 +94,7 @@ impl OutputContext {

let result = FullSqlStatementLockTrace {
statement_number_in_transaction: self.statement_number,
line_number: self.line_number,
line_number: statement.line_no,
sql: statement.sql.clone(),
duration_millis: statement.duration.as_millis() as u64,
start_time_millis: self.duration_millis_so_far,
Expand Down Expand Up @@ -130,7 +129,6 @@ impl OutputContext {
triggered_rules: hints.to_vec(),
};
self.statement_number += 1;
self.line_number += result.sql.lines().count();
self.held_locks_context
.extend(result.new_locks_taken.clone());
self.duration_millis_so_far += result.duration_millis;
Expand All @@ -144,7 +142,6 @@ impl OutputContext {
OutputContext {
output_settings,
statement_number: 1,
line_number: 1,
held_locks_context: vec![],
duration_millis_so_far: 0,
duration_millis_total,
Expand Down
139 changes: 123 additions & 16 deletions eugene/src/sqltext.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
use crate::error::InnerError::UnresolvedPlaceHolder;
use crate::error::{ContextualError, ContextualResult};
use std::collections::HashMap;
use std::io::Read;

use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::{anychar, multispace0, multispace1};
use nom::combinator::recognize;
use nom::multi::{many0, many_till};
use nom::sequence::pair;
use nom::IResult;

use crate::error::InnerError::UnresolvedPlaceHolder;
use crate::error::{ContextualError, ContextualResult};

/// Naively resolve placeholders in SQL script in ${} format using provided mapping
pub fn resolve_placeholders(sql: &str, mapping: &HashMap<&str, &str>) -> crate::Result<String> {
let placeholder_re = regex::Regex::new(r"\$\{[a-zA-Z0-9]+}").unwrap();
Expand All @@ -17,13 +26,50 @@ pub fn resolve_placeholders(sql: &str, mapping: &HashMap<&str, &str>) -> crate::
}
}

/// Separate SQL script into statements
pub fn sql_statements(sql: &str) -> crate::Result<Vec<&str>> {
Ok(pg_query::split_with_parser(sql)?
fn parse_line_comment(s: &str) -> IResult<&str, &str> {
let (s, _) = pair(multispace0, tag("--"))(s)?;
let (s, _) = many_till(anychar, tag("\n"))(s)?;
Ok((s, ""))
}

fn parse_comment_block(s: &str) -> IResult<&str, &str> {
let (s, _) = pair(multispace0, tag("/*"))(s)?;
let (s, _) = many_till(anychar, tag("*/"))(s)?;
Ok((s, ""))
}

fn parse_blanks_and_comments(s: &str) -> IResult<&str, &str> {
let (s, pre) = recognize(many0(alt((
multispace1,
parse_line_comment,
parse_comment_block,
))))(s)?;
Ok((s, pre))
}

/// Discover which line within possibly multiline statement that the actual statement starts on
fn line_no_of_start(statement: &str) -> crate::Result<usize> {
if let Ok((_, pre)) = parse_blanks_and_comments(statement) {
Ok(pre.chars().filter(|&c| c == '\n').count())
} else {
Ok(0usize)
}
}

/// Split into statements along with the line number where each statement starts, skipping leading blanks and comments
pub fn sql_statements_with_line_no(sql: &str) -> crate::Result<Vec<(usize, &str)>> {
let numbered_statements: crate::Result<Vec<_>> = pg_query::split_with_parser(sql)?
.into_iter()
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect())
.map(|s| Ok((line_no_of_start(s)?, s)))
.collect();
let mut numbered_statements = numbered_statements?;
let mut line = 1;
for st in numbered_statements.iter_mut() {
st.0 += line;
line += st.1.chars().filter(|&c| c == '\n').count();
st.1 = st.1.trim();
}
Ok(numbered_statements)
}

/// This function reads SQL script files, discards comments and returns a vector of
Expand All @@ -49,15 +95,62 @@ pub fn is_concurrently<S: AsRef<str>>(sql: S) -> bool {
mod tests {
use pretty_assertions::assert_eq;

#[test]
fn number_those_lines_ex1() {
let ex = "ALTER TABLE foo ADD a text;
-- A comment
CREATE UNIQUE INDEX my_index ON foo (a);";
let result = super::sql_statements_with_line_no(ex).unwrap();
assert_eq!(result[0].0, 1);
assert_eq!(result[1].0, 5);
}

#[test]
fn number_those_lines_ex2() {
let ex = "ALTER TABLE
foo
ADD
a text;
CREATE UNIQUE INDEX
my_index ON foo (a);";
let result = super::sql_statements_with_line_no(ex).unwrap();
assert_eq!(result[0].0, 1);
assert_eq!(result[1].0, 6);
}

#[test]
fn number_those_lines_ex3() {
let ex = "CREATE TABLE AUTHORS (
ID INT GENERATED ALWAYS AS IDENTITY
PRIMARY KEY,
NAME TEXT
);
ALTER TABLE BOOKS
ADD COLUMN AUTHOR_ID INT;
ALTER TABLE BOOKS
ADD CONSTRAINT AUTHOR_ID_FK
FOREIGN KEY (AUTHOR_ID)
REFERENCES AUTHORS (ID);";
let result = super::sql_statements_with_line_no(ex).unwrap();
assert_eq!(result[0].0, 1);
assert_eq!(result[1].0, 7);
assert_eq!(result[2].0, 10);
}

#[test]
fn test_split_statements_with_comments() -> crate::Result<()> {
let sql = "SELECT * FROM tab; -- This is a comment\nSELECT * FROM tab; /* This is a block comment */";
let result = super::sql_statements(sql)?;
let result = super::sql_statements_with_line_no(sql)?;
assert_eq!(
result,
vec![
"SELECT * FROM tab",
"-- This is a comment\nSELECT * FROM tab"
(1, "SELECT * FROM tab"),
(2, "-- This is a comment\nSELECT * FROM tab")
]
);
Ok(())
Expand All @@ -72,12 +165,12 @@ BEGIN
END;
$$
LANGUAGE plpgsql; select * from tab";
let result = super::sql_statements(s)?;
let result = super::sql_statements_with_line_no(s)?;
assert_eq!(
result,
vec![
&s[..s.len() - 1 - " select * from tab".len()],
"select * from tab"
(1, &s[..s.len() - 1 - " select * from tab".len()]),
(7, "select * from tab")
]
);
Ok(())
Expand Down Expand Up @@ -107,7 +200,21 @@ BEGIN
e.id = emp_id;
END;
$body$ LANGUAGE plpgsql;"; // generated example by ai
let result = super::sql_statements(s).unwrap();
assert_eq!(result, vec![&s[..s.len() - 1]]);
let result = super::sql_statements_with_line_no(s).unwrap();
assert_eq!(result, vec![(1, &s[..s.len() - 1])]);
}

#[test]
fn parses_blanks_line_comments() {
let s = " \n--comment\nsqltext";
let result = super::parse_blanks_and_comments(s);
assert_eq!(result.unwrap(), ("sqltext", " \n--comment\n"));
}

#[test]
fn parses_comment_blocks() {
let s = " /*comment\n\n*/sqltext";
let result = super::parse_blanks_and_comments(s);
assert_eq!(result.unwrap(), ("sqltext", " /*comment\n\n*/"));
}
}
Loading

0 comments on commit 6ff699e

Please sign in to comment.