0
0
Fork 0

Refactor Snowflake to avoid brakeman sql injection warnings (#25879)

This commit is contained in:
Matt Jankowski 2023-07-12 04:44:58 -04:00 committed by GitHub
parent 6c5a2233a8
commit f831452037
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 63 deletions

View file

@ -64,46 +64,7 @@ module Mastodon::Snowflake
def define_timestamp_id
return if already_defined?
connection.execute(<<~SQL)
CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
RETURNS bigint AS
$$
DECLARE
time_part bigint;
sequence_base bigint;
tail bigint;
BEGIN
time_part := (
-- Get the time in milliseconds
((date_part('epoch', now()) * 1000))::bigint
-- And shift it over two bytes
<< 16);
sequence_base := (
'x' ||
-- Take the first two bytes (four hex characters)
substr(
-- Of the MD5 hash of the data we documented
md5(table_name || '#{SecureRandom.hex(16)}' || time_part::text),
1, 4
)
-- And turn it into a bigint
)::bit(16)::bigint;
-- Finally, add our sequence number to our base, and chop
-- it to the last two bytes
tail := (
(sequence_base + nextval(table_name || '_id_seq'))
& 65535);
-- Return the time part and the sequence part. OR appears
-- faster here than addition, but they're equivalent:
-- time_part has no trailing two bytes, and tail is only
-- the last two bytes.
RETURN time_part | tail;
END
$$ LANGUAGE plpgsql VOLATILE;
SQL
connection.execute(sanitized_timestamp_id_sql)
end
def ensure_id_sequences_exist
@ -153,6 +114,57 @@ module Mastodon::Snowflake
SQL
end
def sanitized_timestamp_id_sql
ActiveRecord::Base.sanitize_sql_array(timestamp_id_sql_array)
end
def timestamp_id_sql_array
[timestamp_id_sql_string, { random_string: SecureRandom.hex(16) }]
end
def timestamp_id_sql_string
<<~SQL
CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
RETURNS bigint AS
$$
DECLARE
time_part bigint;
sequence_base bigint;
tail bigint;
BEGIN
time_part := (
-- Get the time in milliseconds
((date_part('epoch', now()) * 1000))::bigint
-- And shift it over two bytes
<< 16);
sequence_base := (
'x' ||
-- Take the first two bytes (four hex characters)
substr(
-- Of the MD5 hash of the data we documented
md5(table_name || :random_string || time_part::text),
1, 4
)
-- And turn it into a bigint
)::bit(16)::bigint;
-- Finally, add our sequence number to our base, and chop
-- it to the last two bytes
tail := (
(sequence_base + nextval(table_name || '_id_seq'))
& 65535);
-- Return the time part and the sequence part. OR appears
-- faster here than addition, but they're equivalent:
-- time_part has no trailing two bytes, and tail is only
-- the last two bytes.
RETURN time_part | tail;
END
$$ LANGUAGE plpgsql VOLATILE;
SQL
end
def connection
ActiveRecord::Base.connection
end