Module: Emendate

Extended by:
Dry::Configurable, Emendate
Includes:
ExampleHelpers
Included in:
Emendate
Defined in:
lib/emendate.rb,
lib/emendate/errors.rb,
lib/emendate/result.rb,
lib/emendate/options.rb,
lib/emendate/version.rb,
lib/emendate/examples.rb,
lib/emendate/date_utils.rb,
lib/emendate/error_util.rb,
lib/emendate/translator.rb,
lib/emendate/parsed_date.rb,
lib/emendate/translators.rb,
lib/emendate/examples/csv.rb,
lib/emendate/examples/row.rb,
lib/emendate/subsourceable.rb,
lib/emendate/date_segmenter.rb,
lib/emendate/range_indicator.rb,
lib/emendate/token_collapser.rb,
lib/emendate/date_part_tagger.rb,
lib/emendate/edtf_set_handler.rb,
lib/emendate/examples/row_set.rb,
lib/emendate/options_contract.rb,
lib/emendate/translators/edtf.rb,
lib/emendate/examples/taggable.rb,
lib/emendate/month_day_analyzer.rb,
lib/emendate/ordinal_translator.rb,
lib/emendate/short_year_handler.rb,
lib/emendate/format_standardizer.rb,
lib/emendate/segment/month_alpha.rb,
lib/emendate/examples/example_set.rb,
lib/emendate/segment/season_alpha.rb,
lib/emendate/segment_set_editable.rb,
lib/emendate/translators/abstract.rb,
lib/emendate/unprocessable_tagger.rb,
lib/emendate/untokenizable_tagger.rb,
lib/emendate/translators/edtf/year.rb,
lib/emendate/examples/date_testable.rb,
lib/emendate/examples/result_testable.rb,
lib/emendate/translators/edtf/century.rb,
lib/emendate/examples/testable_example.rb,
lib/emendate/segment/uncertainty_digits.rb,
lib/emendate/translators/edtf/year_month.rb,
lib/emendate/examples/translation_testable.rb,
lib/emendate/translators/edtf/known_unknown.rb,
lib/emendate/translators/edtf/year_month_day.rb,
lib/emendate/translators/lyrasis_pseudo_edtf.rb,
lib/emendate/translators/collectionspace/year.rb,
lib/emendate/translators/collectionspace/error.rb,
lib/emendate/translators/collectionspace/range.rb,
lib/emendate/translators/collectionspace/decade.rb,
lib/emendate/translators/lyrasis_pseudo_edtf/year.rb,
lib/emendate/translators/lyrasis_pseudo_edtf/range.rb,
lib/emendate/translators/collectionspace/year_month.rb,
lib/emendate/translators/collectionspace/year_season.rb,
lib/emendate/translators/lyrasis_pseudo_edtf/century.rb,
lib/emendate/translators/collectionspace/known_unknown.rb,
lib/emendate/translators/collectionspace/untokenizable.rb,
lib/emendate/translators/collectionspace/year_month_day.rb,
lib/emendate/translators/lyrasis_pseudo_edtf/year_month.rb,
lib/emendate/translators/lyrasis_pseudo_edtf/known_unknown.rb,
lib/emendate/translators/lyrasis_pseudo_edtf/year_month_day.rb

Defined Under Namespace

Modules: DateUtils, ErrorUtil, Examples, SegmentSetEditable, Subsourceable, Translators Classes: DatePartTagger, DateSegmenter, DateTypeCreationError, DecadeTypeError, DerivedSegmentError, EdtfSetHandler, EmptyTestSetError, ForbiddenSegmentAdditionError, FormatStandardizer, ImpossibleCollapseError, InvalidDateError, MillenniumTypeError, MonthAlpha, MonthDayAnalyzer, MonthDayError, MonthDayYearError, MonthLiteralError, NonDateTypeError, Options, OptionsContract, OrdinalTranslator, ParsedDate, PreferredMdyOrderInvalidError, QualifierTypeError, RangeIndicator, RangeStartOpenError, Result, SeasonAlpha, SeasonLiteralError, ShortYearHandler, TokenCollapser, TokenLexemeError, TokenTypeError, Translator, UncertaintyDigits, UnexpectedPluralizedYearPatternError, UnprocessableTagger, UnsegmentableDatePatternError, UntaggableDatePatternError, UntokenizableTagger

Constant Summary collapse

PROCESSING_STEPS =

Steps called by ProcessingManager. Key is the step processing class. Value is the state recorded if the step completes successfully or used to indicate the step in which processing failed

Also used when running tests/examples to convert test strings into segment sets appropriate for input for a given test.

{
  Emendate::Lexer => :lexed,
  Emendate::UntokenizableTagger => :untokenizable_tagged,
  Emendate::UnprocessableTagger => :unprocessable_tagged,
  Emendate::KnownUnknownTagger => :known_unknown_tagged,
  Emendate::EdtfQualifier => :edtf_qualified,
  Emendate::TokenCollapser => :tokens_collapsed,
  Emendate::OrdinalTranslator => :ordinals_translated,
  Emendate::EdtfSetHandler => :edtf_sets_handled,
  Emendate::BracketPairHandler => :bracket_pairs_handled,
  Emendate::UnstructuredCertaintyHandler => :unstructured_certainty_handled,
  Emendate::FormatStandardizer => :format_standardized,
  Emendate::LetterCTagger => :letter_c_tagged,
  Emendate::DatePartTagger => :date_parts_tagged,
  Emendate::DateSegmenter => :dates_segmented,
  Emendate::RangeIndicator => :ranges_indicated,
  Emendate::TokenCleaner => :cleaned
}
Error =
Module.new
VERSION =
"0.1.1"

Common use commands for individual strings collapse

Dev/debugging commands for individual strings collapse

Batch processing commands, for use in scripts collapse

Instance Method Summary collapse

Instance Method Details

#batch_process(strings, options = {}) {|Emendate::ProcessingManager| ... } ⇒ Array<String>

Returns original strings.

Parameters:

  • strings (Array<String>)
  • options (Hash) (defaults to: {})

Yields:

  • (Emendate::ProcessingManager)

Returns:

  • (Array<String>)

    original strings



224
225
226
227
228
229
230
231
232
233
# File 'lib/emendate.rb', line 224

def batch_process(strings, options = {})
  Emendate::Options.new(options) unless options.empty?
  strings.each do |str|
    Emendate::ProcessingManager.call(str)
      .either(
        ->(success) { yield success },
        ->(failure) { yield failure }
      )
  end
end

#batch_translate(strings, verbose = false, options = {}) {|Emendate::Translation| ... } ⇒ Array<String>

Returns original strings.

Parameters:

  • strings (Array<String>)
  • options (Hash) (defaults to: {})

Yields:

  • (Emendate::Translation)

Returns:

  • (Array<String>)

    original strings



239
240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/emendate.rb', line 239

def batch_translate(strings, verbose = false, options = {})
  Emendate::Options.new(options) unless options.empty?
  strings.each do |str|
    if verbose
      puts str
      puts "  Processing..."
    end
    pm = Emendate::ProcessingManager.call(str)
    processed = pm.success? ? pm.value! : pm.failure
    puts "  Translating..." if verbose
    translator = Emendate::Translator.new(processed)
    yield translator.call
  end
end

#lex(str) ⇒ Emendate::SegmentSet

Use this command to quickly determine whether the date string can be lexed (broken into its meaningful segments) for processing. This is generally the first thing to try when adding handling for a new date pattern

Parameters:

  • str (String)

Returns:

  • (Emendate::SegmentSet)

    the initial Segments derived from date string



165
166
167
# File 'lib/emendate.rb', line 165

def lex(str)
  prepped_for(string: str, target: Emendate::UntokenizableTagger)
end

#lex_inspect(str, opts = nil) ⇒ String

A quick representation of the segment types produced by the lex command

Parameters:

Returns:

  • (String)

    orig string, delim value, comma-separated list of the types returned by calling #lex on str



211
212
213
214
# File 'lib/emendate.rb', line 211

def lex_inspect(str, opts = nil)
  tokens = lex(str).map(&:type)
  "#{str}\t\t#{tokens.inspect}"
end

#parse(str, options = {}) ⇒ Emendate::Result

Use this command to get a Result: parsed date data in a structured format you can do useful stuff with. The intent of this command is to mirror the behavior of the Timetwister parse command. It's not fully there yet, but returns something similar.

Parameters:

  • str (String)

Returns:



127
128
129
130
# File 'lib/emendate.rb', line 127

def parse(str, options = {})
  Emendate::Options.new(options) unless options.empty?
  process(str).result
end

#prepped_for(string:, target:, options: nil) ⇒ Emendate::SegmentSet, String

Get the input segments for the given target. Runs all steps prior to the target.

Parameters:

  • string (String)

    original date string

  • target (Class)

    the processing step to get input segments for. All steps prior to the target will be carried out, and the result that would normally be passed to the target will be returned

  • options (Hash) (defaults to: nil)

Returns:

  • (Emendate::SegmentSet)

    for all targets other than Lexer, will return the result of the processing step prior to the target

  • (String)

    if target is Lexer



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/emendate.rb', line 185

def prepped_for(string:, target:, options: nil)
  Emendate::Options.new(options) if options

  to_prep = prep_steps(target)
  return string unless to_prep

  lexed = to_prep.first.call(string)
  tokens = lexed.failure? ? nil : lexed.value!
  return lexed.failure unless tokens
  return tokens if to_prep.length == 1

  to_prep.shift
  to_prep.each do |step|
    result = step.call(tokens)
    tokens = result.failure? ? nil : result.value!
    return result.failure unless tokens
  end

  tokens
end

#process(str, options = {}) ⇒ Emendate::ProcessingManager

Use this command to explore how a given date string is processed, in detail. Primarily used for development and debugging

Parameters:

  • str (String)

Returns:

  • (Emendate::ProcessingManager)


152
153
154
155
156
# File 'lib/emendate.rb', line 152

def process(str, options = {})
  Emendate::Options.new(options) unless options.empty?
  Emendate::ProcessingManager.call(str)
    .either(->(success) { success }, ->(failure) { failure })
end

#set_unhandled_modeObject



256
257
258
259
260
261
# File 'lib/emendate.rb', line 256

def set_unhandled_mode
  mode = Emendate.options.final_check_failure_handling
  return :collapse_unhandled if mode == :collapse_unhandled_first_date

  mode
end

#translate(str, options = {}) ⇒ Emendate::Translation

Use this command to parse a date string and convert the result into an expression of the date in a given dialect.

Parameters:

  • str (String)

    to translate

  • options (Hash) (defaults to: {})

    of Options; Indication of the dialect is required

Returns:

  • (Emendate::Translation)


138
139
140
141
# File 'lib/emendate.rb', line 138

def translate(str, options = {})
  Emendate::Options.new(options) unless options.empty?
  Emendate::Translator.call(process(str))
end