Refactor Snowflake to avoid brakeman sql injection warnings (#25879)

2023-07-12 04:44:58 -04:00 · 2023-07-12 04:44:58 -04:00 · f831452037
commit f831452037
parent 6c5a2233a8
2 changed files with 52 additions and 63 deletions
--- a/lib/mastodon/snowflake.rb
+++ b/lib/mastodon/snowflake.rb
@ -64,46 +64,7 @@ module Mastodon::Snowflake
    def define_timestamp_id
      return if already_defined?

-      connection.execute(<<~SQL)
-        CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
-        RETURNS bigint AS
-        $$
-          DECLARE
-            time_part bigint;
-            sequence_base bigint;
-            tail bigint;
-          BEGIN
-            time_part := (
-              -- Get the time in milliseconds
-              ((date_part('epoch', now()) * 1000))::bigint
-              -- And shift it over two bytes
-              << 16);
-
-            sequence_base := (
-              'x' ||
-              -- Take the first two bytes (four hex characters)
-              substr(
-                -- Of the MD5 hash of the data we documented
-                md5(table_name || '#{SecureRandom.hex(16)}' || time_part::text),
-                1, 4
-              )
-            -- And turn it into a bigint
-            )::bit(16)::bigint;
-
-            -- Finally, add our sequence number to our base, and chop
-            -- it to the last two bytes
-            tail := (
-              (sequence_base + nextval(table_name || '_id_seq'))
-              & 65535);
-
-            -- Return the time part and the sequence part. OR appears
-            -- faster here than addition, but they're equivalent:
-            -- time_part has no trailing two bytes, and tail is only
-            -- the last two bytes.
-            RETURN time_part | tail;
-          END
-        $$ LANGUAGE plpgsql VOLATILE;
-      SQL
+      connection.execute(sanitized_timestamp_id_sql)
    end

    def ensure_id_sequences_exist
@ -153,6 +114,57 @@ module Mastodon::Snowflake
      SQL
    end

+    def sanitized_timestamp_id_sql
+      ActiveRecord::Base.sanitize_sql_array(timestamp_id_sql_array)
+    end
+
+    def timestamp_id_sql_array
+      [timestamp_id_sql_string, { random_string: SecureRandom.hex(16) }]
+    end
+
+    def timestamp_id_sql_string
+      <<~SQL
+        CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
+        RETURNS bigint AS
+        $$
+          DECLARE
+            time_part bigint;
+            sequence_base bigint;
+            tail bigint;
+          BEGIN
+            time_part := (
+              -- Get the time in milliseconds
+              ((date_part('epoch', now()) * 1000))::bigint
+              -- And shift it over two bytes
+              << 16);
+
+            sequence_base := (
+              'x' ||
+              -- Take the first two bytes (four hex characters)
+              substr(
+                -- Of the MD5 hash of the data we documented
+                md5(table_name || :random_string || time_part::text),
+                1, 4
+              )
+            -- And turn it into a bigint
+            )::bit(16)::bigint;
+
+            -- Finally, add our sequence number to our base, and chop
+            -- it to the last two bytes
+            tail := (
+              (sequence_base + nextval(table_name || '_id_seq'))
+              & 65535);
+
+            -- Return the time part and the sequence part. OR appears
+            -- faster here than addition, but they're equivalent:
+            -- time_part has no trailing two bytes, and tail is only
+            -- the last two bytes.
+            RETURN time_part | tail;
+          END
+        $$ LANGUAGE plpgsql VOLATILE;
+      SQL
+    end
+
    def connection
      ActiveRecord::Base.connection
    end