diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..9cdfb9c --- /dev/null +++ b/.pylintrc @@ -0,0 +1,441 @@ +# This Pylint rcfile contains a best-effort configuration to uphold the +# best-practices and style described in the Google Python style guide: +# https://google.github.io/styleguide/pyguide.html +# +# Its canonical open-source location is: +# https://google.github.io/styleguide/pylintrc + +[MASTER] + +# Files or directories to be skipped. They should be base names, not paths. +ignore=third_party + +# Files or directories matching the regex patterns are skipped. The regex +# matches against base names, not paths. +ignore-patterns= + +# Pickle collected data for later comparisons. +persistent=no + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. +jobs=4 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable=abstract-method, + apply-builtin, + arguments-differ, + attribute-defined-outside-init, + backtick, + bad-option-value, + basestring-builtin, + buffer-builtin, + c-extension-no-member, + consider-using-enumerate, + cmp-builtin, + cmp-method, + coerce-builtin, + coerce-method, + delslice-method, + div-method, + duplicate-code, + eq-without-hash, + execfile-builtin, + file-builtin, + filter-builtin-not-iterating, + fixme, + getslice-method, + global-statement, + hex-method, + idiv-method, + implicit-str-concat-in-sequence, + import-error, + import-self, + import-star-module-level, + inconsistent-return-statements, + input-builtin, + intern-builtin, + invalid-str-codec, + locally-disabled, + long-builtin, + long-suffix, + map-builtin-not-iterating, + misplaced-comparison-constant, + missing-function-docstring, + metaclass-assignment, + next-method-called, + next-method-defined, + no-absolute-import, + no-else-break, + no-else-continue, + no-else-raise, + no-else-return, + no-init, # added + no-member, + no-name-in-module, + no-self-use, + nonzero-method, + oct-method, + old-division, + old-ne-operator, + old-octal-literal, + old-raise-syntax, + parameter-unpacking, + print-statement, + raising-string, + range-builtin-not-iterating, + raw_input-builtin, + rdiv-method, + reduce-builtin, + relative-import, + reload-builtin, + round-builtin, + setslice-method, + signature-differs, + standarderror-builtin, + suppressed-message, + sys-max-int, + too-few-public-methods, + too-many-ancestors, + too-many-arguments, + too-many-boolean-expressions, + too-many-branches, + too-many-instance-attributes, + too-many-locals, + too-many-nested-blocks, + too-many-public-methods, + too-many-return-statements, + too-many-statements, + trailing-newlines, + unichr-builtin, + unicode-builtin, + unnecessary-pass, + unpacking-in-except, + useless-else-on-loop, + useless-object-inheritance, + useless-suppression, + using-cmp-argument, + wrong-import-order, + xrange-builtin, + zip-builtin-not-iterating, + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". This option is deprecated +# and it will be removed in Pylint 2.0. +files-output=no + +# Tells whether to display a full report or only the messages +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma +good-names=main,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl + +# Regular expression matching correct function names +function-rgx=^(?:(?PsetUp|tearDown|setUpModule|tearDownModule)|(?P_?[A-Z][a-zA-Z0-9]*)|(?P_?[a-z][a-z0-9_]*))$ + +# Regular expression matching correct variable names +variable-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct constant names +const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ + +# Regular expression matching correct attribute names +attr-rgx=^_{0,2}[a-z][a-z0-9_]*$ + +# Regular expression matching correct argument names +argument-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct class names +class-rgx=^_?[A-Z][a-zA-Z0-9]*$ + +# Regular expression matching correct module names +module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$ + +# Regular expression matching correct method names +method-rgx=(?x)^(?:(?P_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P_{0,2}[a-z][a-z0-9_]*))$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=10 + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# TODO(https://github.com/PyCQA/pylint/issues/3352): Direct pylint to exempt +# lines made too long by directives to pytype. + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=(?x)( + ^\s*(\#\ )??$| + ^\s*(from\s+\S+\s+)?import\s+.+$) + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=yes + +# List of optional constructs for which whitespace checking is disabled. `dict- +# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. +# `trailing-comma` allows a space between comma and closing bracket: (a, ). +# `empty-line` allows space-only lines. +no-space-check= + +# Maximum number of lines in a module +max-module-lines=99999 + +# String used as indentation unit. The internal Google style guide mandates 2 +# spaces. Google's externaly-published style guide says 4, consistent with +# PEP 8. Here, we use 2 spaces, for conformity with many open-sourced Google +# projects (like TensorFlow). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=TODO + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=yes + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_) + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging,absl.logging,tensorflow.io.logging + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub, + TERMIOS, + Bastion, + rexec, + sets + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant, absl + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls, + class_ + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=StandardError, + Exception, + BaseException diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 0000000..7c93ab7 --- /dev/null +++ b/.style.yapf @@ -0,0 +1,3 @@ +[style] +based_on_style = google +indent_width = 2 \ No newline at end of file diff --git a/scripts/roll_preload_list.py b/scripts/roll_preload_list.py index 9b7ccd5..0e1cbdf 100644 --- a/scripts/roll_preload_list.py +++ b/scripts/roll_preload_list.py @@ -1,114 +1,131 @@ +"""Updates the HSTS preload list JSON file.""" + +from __future__ import print_function import argparse import json import re import requests import sys + def log(s): sys.stderr.write(s) -class Chunk: - BlankLine, CommentLine, OneLineEntry, Unknown = range(4) -def getPendingRemovals(): +class Chunk(object): + BLANK_LINE, COMMENT_LINE, ONE_LINE_ENTRY, UNKNOWN = list(range(4)) + + +def get_pending_removals(): log("Fetching pending removal...\n") return requests.get("https://hstspreload.org/api/v2/pending-removal").json() -def getRawText(preloadListPath): + +def get_raw_text(preload_list_path): log("Fetching preload list from Chromium source...\n") - with open(preloadListPath, "r") as f: - s = f.read() + with open(preload_list_path, "r", encoding="utf-8") as f: + s = f.read() return s -def getPendingScan(pendingDataFilePath): + +def get_pending_scan(pending_data_file_path): log("Fetching pending list from provided path...\n") - log(" %s\n" % pendingDataFilePath) - with open(pendingDataFilePath, "r") as f: - return json.load(f) - -def domainsToPreload(pendingData, domainsToReject): - numSkipping = 0 - numPreloading = 0 - for result in pendingData: + log(f" {pending_data_file_path}\n") + with open(pending_data_file_path, "r", encoding="utf-8") as f: + return json.load(f) + + +def domains_to_preload(pending_data, domains_to_reject): + num_skipping = 0 + num_preloading = 0 + for result in pending_data: if len(result["issues"]["errors"]) == 0: - numPreloading += 1 + num_preloading += 1 yield result["domain"] else: errors = list(error["code"] for error in result["issues"]["errors"]) - domainsToReject += [ - {"domain": result["domain"], "errors": errors} - ] - numSkipping += 1 - log("Pending entries preloaded: %d\n" % numPreloading) - log("Pending entries rejected: %d\n" % numSkipping) - -def chunks(rawText): + domains_to_reject += [{"domain": result["domain"], "errors": errors}] + num_skipping += 1 + log(f"Pending entries preloaded: {num_preloading}\n") + log(f"Pending entries rejected: {num_skipping}\n") + + +def chunks(raw_text): log("Chunking...\n") - lines = iter(rawText.splitlines()) + lines = iter(raw_text.splitlines()) while True: try: chunk = next(lines) if chunk == "": - yield chunk, Chunk.BlankLine + yield chunk, Chunk.BLANK_LINE continue - elif re.match(r'^ *//.*', chunk): - yield chunk, Chunk.CommentLine + elif re.match(r"^ *//.*", chunk): + yield chunk, Chunk.COMMENT_LINE continue - elif re.match(r'^ \{.*\},', chunk): - yield chunk, Chunk.OneLineEntry + elif re.match(r"^ \{.*\},", chunk): + yield chunk, Chunk.ONE_LINE_ENTRY else: - yield chunk, Chunk.Unknown + yield chunk, Chunk.UNKNOWN except StopIteration: break -def update(pendingRemovals, pendingAdditions, entryStrings): + +def update(pending_removals, pending_additions, entry_strings): log("Removing and adding entries...\n") - removedCount = 0 - for l, c in entryStrings: - if c == Chunk.OneLineEntry: - parsed = json.loads("[%s{}]" % l)[0] + removed_count = 0 + for l, c in entry_strings: + if c == Chunk.ONE_LINE_ENTRY: + # `l` will have a trailing comma -- remove it first, and then the line + # can be directly parsed as JSON. + parsed = json.loads(re.sub(r"},\w*$", r"}", l)) domain = parsed["name"] - if domain in pendingRemovals: - removedCount += 1 - pendingRemovals.remove(domain) + if domain in pending_removals: + removed_count += 1 + pending_removals.remove(domain) else: yield l elif l == " // END OF 1-YEAR BULK HSTS ENTRIES": - for domain in sorted(pendingAdditions): - yield ' { "name": "%s", "policy": "bulk-1-year", "mode": "force-https", "include_subdomains": true },' % domain + for domain in sorted(pending_additions): + yield (f' {{ "name": "{domain}", "policy": "bulk-1-year", ' + f'"mode": "force-https", "include_subdomains": true }},') yield l else: yield l - log("Removed: %s\n" % removedCount) + log(f"Removed: {removed_count}\n") + -def write(file, output): - log("Writing to %s...\n" % file) - with open(file, 'w') as file: +def write(filename, output): + log(f"Writing to {filename}...\n") + with open(filename, "w", encoding="utf-8") as file: file.write(output) file.close() -def getArgs(): - parser = argparse.ArgumentParser(description='Roll the HSTS preload list (experimental).') - parser.add_argument('preload_list_path', type=str) - parser.add_argument('pending_scan_path', type=str) - parser.add_argument('rejected_domains_path', type=str) - parser.add_argument('--skip_removals', action='store_true') + +def get_args(): + parser = argparse.ArgumentParser( + description="Roll the HSTS preload list (experimental).") + parser.add_argument("preload_list_path", type=str) + parser.add_argument("pending_scan_path", type=str) + parser.add_argument("rejected_domains_path", type=str) + parser.add_argument("--skip_removals", action="store_true") return parser.parse_args() -def parseJsonWithComments(rawText): + +def parse_json_with_comments(raw_text): s = "" - for l, c in chunks(rawText): - if c == Chunk.CommentLine: + for l, c in chunks(raw_text): + if c == Chunk.COMMENT_LINE: continue else: s += l + "\n" return json.loads(s) -def checkForDupes(parsedList): + +def check_for_dupes(parsed_list): log("Checking for duplicates...\n") seen = set() dupes = set() - for entry in parsedList["entries"]: + for entry in parsed_list["entries"]: name = entry["name"] if name in seen: dupes.add(name) @@ -116,32 +133,43 @@ def checkForDupes(parsedList): seen.add(name) return dupes + def main(): - args = getArgs() + args = get_args() - rawText = getRawText(args.preload_list_path) - pendingRemovals = [] + raw_text = get_raw_text(args.preload_list_path) + pending_removals = [] if not args.skip_removals: - pendingRemovals = getPendingRemovals() - domainsToReject = [] - pendingAdditions = domainsToPreload(getPendingScan(args.pending_scan_path), domainsToReject) - updated = update(pendingRemovals, pendingAdditions, chunks(rawText)) - updatedText = "\n".join(updated) + "\n" + pending_removals = get_pending_removals() + domains_to_reject = [] + pending_additions = \ + domains_to_preload(get_pending_scan(args.pending_scan_path), + domains_to_reject) + updated = update(pending_removals, pending_additions, chunks(raw_text)) + updated_text = "\n".join(updated) + "\n" - dupes = checkForDupes(parseJsonWithComments(updatedText)) + dupes = check_for_dupes(parse_json_with_comments(updated_text)) - write(args.preload_list_path, updatedText) - write(args.rejected_domains_path, json.dumps(domainsToReject, indent=2) + "\n") + write(args.preload_list_path, updated_text) + write(args.rejected_domains_path, + json.dumps(domains_to_reject, indent=2) + "\n") if dupes: - print "\nWARNING\nDuplicate entries:" + print("\nWARNING\nDuplicate entries:") for dupe in dupes: - print "- %s" % dupe - print "\nYou'll need to manually deduplicate entries before commiting them to Chromium." - print "\nNote: if there are a lot of duplicate entries, you may have accidentally run this script twice. Reset your checkout and try again." + print(f"- {dupe}") + print( + "\nYou'll need to manually deduplicate entries before commiting them " + "to Chromium." + ) + print( + "\nNote: if there are a lot of duplicate entries, you may have " + "accidentally run this script twice. Reset your checkout and try " + "again." + ) else: - print "\nSUCCESS\n" + print("\nSUCCESS\n") if __name__ == "__main__": - main() + main() diff --git a/scripts/update_bulk_preloaded.py b/scripts/update_bulk_preloaded.py index f12e3f3..17971b9 100644 --- a/scripts/update_bulk_preloaded.py +++ b/scripts/update_bulk_preloaded.py @@ -1,108 +1,124 @@ +"""Update the bulk HSTS preload list.""" + +from __future__ import print_function import base64 import json import re import requests import sys + def log(s): sys.stderr.write(s) -class State: - BeforeLegacy18WeekBulkEntries, \ - DuringLegacy18WeekBulkEntries, \ - AfterLegacy18WeekBulkEntries, \ - During18WeekBulkEntries, \ - After18WeekBulkEntries, \ - During1YearBulkEntries, \ - After1YearBulkEntries, \ - During1YearBulkSubdomainEntries, \ - After1YearBulkSubdomainEntries = range(9) - -def getRawText(): + +class State(object): + BEFORE_LEGACY_18WEEK_BULK_ENTRIES, \ + DURING_LEGACY_18WEEK_BULK_ENTRIES, \ + AFTER_LEGACY_18WEEK_BULK_ENTRIES, \ + DURING_18WEEK_BULK_ENTRIES, \ + AFTER_18WEEK_BULK_ENTRIES, \ + DURING_1YEAR_BULK_ENTRIES, \ + AFTER_1YEAR_BULK_ENTRIES, \ + DURING_1YEAR_BULK_SUBDOMAIN_ENTRIES, \ + AFTER_1YEAR_BULK_SUBDOMAIN_ENTRIES = list(range(9)) + + +def get_raw_text(): log("Fetching preload list from Chromium source...\n") - return base64.b64decode(requests.get("https://chromium.googlesource.com/chromium/src/+/main/net/http/transport_security_state_static.json?format=TEXT").text) + return base64.b64decode( + requests.get( + "https://chromium.googlesource.com/chromium/src/+/main/net/http/" + "transport_security_state_static.json?format=TEXT" + ).text).decode("UTF-8") + -def extractBulkEntries(rawText): +def extract_bulk_entries(raw_text): log("Extracting bulk entries...\n") - state = State.BeforeLegacy18WeekBulkEntries - bulkEntryString = "[\n" - for line in rawText.splitlines(): - if state == State.BeforeLegacy18WeekBulkEntries: + state = State.BEFORE_LEGACY_18WEEK_BULK_ENTRIES + bulk_entry_string = "[\n" + for line in raw_text.splitlines(): + if state == State.BEFORE_LEGACY_18WEEK_BULK_ENTRIES: if "START OF LEGACY 18-WEEK BULK HSTS ENTRIES" in line: - state = State.DuringLegacy18WeekBulkEntries - elif state == State.DuringLegacy18WeekBulkEntries: + state = State.DURING_LEGACY_18WEEK_BULK_ENTRIES + elif state == State.DURING_LEGACY_18WEEK_BULK_ENTRIES: if "END OF LEGACY 18-WEEK BULK HSTS ENTRIES" in line: - state = State.AfterLegacy18WeekBulkEntries + state = State.AFTER_LEGACY_18WEEK_BULK_ENTRIES else: - bulkEntryString += line + "\n" - if state == State.AfterLegacy18WeekBulkEntries: + bulk_entry_string += line + "\n" + if state == State.AFTER_LEGACY_18WEEK_BULK_ENTRIES: if "START OF 18-WEEK BULK HSTS ENTRIES" in line: - state = State.During18WeekBulkEntries - elif state == State.During18WeekBulkEntries: + state = State.DURING_18WEEK_BULK_ENTRIES + elif state == State.DURING_18WEEK_BULK_ENTRIES: if "END OF 18-WEEK BULK HSTS ENTRIES" in line: - state = State.After18WeekBulkEntries + state = State.AFTER_18WEEK_BULK_ENTRIES else: - bulkEntryString += line + "\n" - if state == State.After18WeekBulkEntries: + bulk_entry_string += line + "\n" + if state == State.AFTER_18WEEK_BULK_ENTRIES: if "START OF 1-YEAR BULK HSTS ENTRIES" in line: - state = State.During1YearBulkEntries - elif state == State.During1YearBulkEntries: + state = State.DURING_1YEAR_BULK_ENTRIES + elif state == State.DURING_1YEAR_BULK_ENTRIES: if "END OF 1-YEAR BULK HSTS ENTRIES" in line: - state = State.After1YearBulkEntries + state = State.AFTER_1YEAR_BULK_ENTRIES else: - bulkEntryString += line + "\n" - elif state == State.After1YearBulkEntries: + bulk_entry_string += line + "\n" + elif state == State.AFTER_1YEAR_BULK_ENTRIES: if "START OF 1-YEAR BULK SUBDOMAIN HSTS ENTRIES" in line: - state = State.During1YearBulkSubdomainEntries - elif state == State.During1YearBulkSubdomainEntries: + state = State.DURING_1YEAR_BULK_SUBDOMAIN_ENTRIES + elif state == State.DURING_1YEAR_BULK_SUBDOMAIN_ENTRIES: if "END OF 1-YEAR BULK SUBDOMAIN HSTS ENTRIES" in line: - state = State.After1YearBulkSubdomainEntries + state = State.AFTER_1YEAR_BULK_SUBDOMAIN_ENTRIES else: - bulkEntryString += line + "\n" - elif state == State.After1YearBulkSubdomainEntries: + bulk_entry_string += line + "\n" + elif state == State.AFTER_1YEAR_BULK_SUBDOMAIN_ENTRIES: if "BULK" in line: print(line) raise Exception("Preload list contains unexpected bulk entry markers.") - if state != State.After1YearBulkSubdomainEntries: - raise Exception("Unexpected end state: %d" % state) + if state != State.AFTER_1YEAR_BULK_SUBDOMAIN_ENTRIES: + raise Exception(f"Unexpected end state: {state}") # Add an empty object for the last entry to go after the trailing comma. - bulkEntryString += "{}]" + bulk_entry_string += "{}]" - entries = json.loads(bulkEntryString) + entries = json.loads(bulk_entry_string) # Remove empty object at the end. del entries[-1] - log("Found %d bulk entries.\n" % len(entries)) + log(f"Found {len(entries)} bulk entries.\n") return entries -def sanityCheck(domainList): + +def sanity_check(domain_list): log("Sanity checking domains...\n") - for domain in domainList: - log("\033[K\rChecking: %s" % domain) - if not re.match(r'^[a-z0-9-\.]+$', domain): - raise Exception("Incorrectly formatted domain: %s" % domain) + for domain in domain_list: + log(f"\033[K\rChecking: {domain}") + if not re.match(r"^[a-z0-9-\.]+$", domain): + raise Exception(f"Incorrectly formatted domain: {domain}") if domain in ["google.com", "gmail.com", "hstspreload.org"]: raise Exception("Unexpected domain in list") log("\n") -def formatForGo(domainList): + +def format_for_go(domain_list): obj = {} - for domain in domainList: + for domain in domain_list: obj[domain] = True return obj -def write(bulkDomains): + +def write(bulk_domains): log("Writing...\n") - with open(sys.argv[1], 'w') as file: - json.dump(formatForGo(bulkDomains), file) + with open(sys.argv[1], "w", encoding="utf-8") as file: + json.dump(format_for_go(bulk_domains), file) + def main(): - rawText = getRawText() - bulkEntries = extractBulkEntries(rawText) - bulkDomains = [entry["name"] for entry in bulkEntries] - sanityCheck(bulkDomains) - write(bulkDomains) + raw_text = get_raw_text() + bulk_entries = extract_bulk_entries(raw_text) + bulk_domains = [entry["name"] for entry in bulk_entries] + sanity_check(bulk_domains) + write(bulk_domains) log("\033[92mStatic bulk domain data update done!\x1b[0m\n") + if __name__ == "__main__": - main() + main()