From 53286c22ba64247e0d37a137f9b2f5fc3808fdea Mon Sep 17 00:00:00 2001 From: Maikel Linke Date: Fri, 10 May 2024 13:39:52 +1000 Subject: [PATCH 1/2] Sanitise existing long descriptions of enterprises --- ...06_sanitize_enterprise_long_description.rb | 38 ++++++++++++++ ...nitize_enterprise_long_description_spec.rb | 51 +++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 db/migrate/20240510033206_sanitize_enterprise_long_description.rb create mode 100644 spec/migrations/20240510033206_sanitize_enterprise_long_description_spec.rb diff --git a/db/migrate/20240510033206_sanitize_enterprise_long_description.rb b/db/migrate/20240510033206_sanitize_enterprise_long_description.rb new file mode 100644 index 00000000000..5f69464ed33 --- /dev/null +++ b/db/migrate/20240510033206_sanitize_enterprise_long_description.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +class SanitizeEnterpriseLongDescription < ActiveRecord::Migration[7.0] + class Enterprise < ApplicationRecord + end + + # This is a copy from our application code at the time of writing. + # We prefer to keep migrations isolated and not affected by changing + # application code in the future. + # If we need to change the sanitizer in the future we may need a new + # migration (not change the old one) to sanitise the data properly. + class HtmlSanitizer + ALLOWED_TAGS = %w[h1 h2 h3 h4 div p br b i u a strong em del pre blockquote ul ol li hr + figure].freeze + ALLOWED_ATTRIBUTES = %w[href target].freeze + ALLOWED_TRIX_DATA_ATTRIBUTES = %w[data-trix-attachment].freeze + + def self.sanitize(html) + @sanitizer ||= Rails::HTML5::SafeListSanitizer.new + @sanitizer.sanitize( + html, tags: ALLOWED_TAGS, attributes: (ALLOWED_ATTRIBUTES + ALLOWED_TRIX_DATA_ATTRIBUTES) + ) + end + end + + def up + Enterprise.where.not(long_description: [nil, ""]).find_each do |enterprise| + long_description = HtmlSanitizer.sanitize(enterprise.long_description) + enterprise.update!(long_description:) + end + end + + private + + def sanitize(html) + HtmlSanitizer.sanitize(html) + end +end diff --git a/spec/migrations/20240510033206_sanitize_enterprise_long_description_spec.rb b/spec/migrations/20240510033206_sanitize_enterprise_long_description_spec.rb new file mode 100644 index 00000000000..ee3d7b57ae5 --- /dev/null +++ b/spec/migrations/20240510033206_sanitize_enterprise_long_description_spec.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +require 'spec_helper' +require_relative '../../db/migrate/20240510033206_sanitize_enterprise_long_description' + +RSpec.describe SanitizeEnterpriseLongDescription do + describe "#up" do + let!(:enterprise_nil_desc) { create(:enterprise, long_description: nil) } + let!(:enterprise_empty_desc) { create(:enterprise, long_description: "") } + let!(:enterprise_normal) { create(:enterprise, long_description: normal_desc) } + let!(:enterprise_bad) { + # The attribute is sanitised at assignment. So we need to inject into the + # database differently: + create(:enterprise).tap do |enterprise| + enterprise.update_columns(long_description: bad_desc) + end + } + let(:normal_desc) { + <<~HTML.squish +

𝐂𝐎̛𝐌 𝐓𝐀̂́𝐌 𝐂𝐇𝐔́ 𝐁𝐄́ is now available in Melbourne, everyone. 😂
+ <>>> The story is this is a person who loves to eat...

+ HTML + } + let(:normal_desc_sanitised) { + <<~HTML.squish +

𝐂𝐎̛𝐌 𝐓𝐀̂́𝐌 𝐂𝐇𝐔́ 𝐁𝐄́ is now available in Melbourne, everyone. 😂
+ <>>> The story is this is a person who loves to eat...

+ HTML + } + let(:bad_desc) { + <<~HTML.squish +

Fred Farmer is a certified organic + ...

+ HTML + } + let(:bad_desc_sanitised) { + "

Fred Farmer is a certified organic alert(\"Gotcha!\")...

" + } + + it "sanitises the long description" do + expect { subject.up }.to change { + enterprise_bad.reload.attributes["long_description"] + }.from(bad_desc).to(bad_desc_sanitised) + + expect(enterprise_nil_desc.reload.long_description).to eq nil + expect(enterprise_empty_desc.reload.long_description).to eq "" + expect(enterprise_normal.reload.attributes["long_description"]) + .to eq normal_desc_sanitised + end + end +end From d061fe8ad926e60294b68b9c5dfdfc4d37a20c5b Mon Sep 17 00:00:00 2001 From: Maikel Linke Date: Thu, 5 Sep 2024 12:38:33 +1000 Subject: [PATCH 2/2] Remove unnecessary sanitising Existing descriptions have been sanitised in a migration. New descriptions are sanitised when assigned. That should cover everything. --- app/models/enterprise.rb | 5 ----- spec/models/enterprise_spec.rb | 5 ----- 2 files changed, 10 deletions(-) diff --git a/app/models/enterprise.rb b/app/models/enterprise.rb index 7c88411c056..5a0c84d7efd 100644 --- a/app/models/enterprise.rb +++ b/app/models/enterprise.rb @@ -247,11 +247,6 @@ def self.distinct_count count(distinct: true) end - # Remove any unsupported HTML. - def long_description - HtmlSanitizer.sanitize(super) - end - # Remove any unsupported HTML. def long_description=(html) super(HtmlSanitizer.sanitize(html)) diff --git a/spec/models/enterprise_spec.rb b/spec/models/enterprise_spec.rb index 6e26d0415fb..62cf6ced6e2 100644 --- a/spec/models/enterprise_spec.rb +++ b/spec/models/enterprise_spec.rb @@ -404,11 +404,6 @@ subject.long_description = "Hello dearest monster." expect(subject.long_description).to eq "Hello alert dearest monster." end - - it "sanitises existing HTML in long_description" do - subject[:long_description] = "Hello dearest monster." - expect(subject.long_description).to eq "Hello alert dearest monster." - end end describe "callbacks" do