From 4ab1f2f4bb1aa51d8a5443b50c14ccd82b896947 Mon Sep 17 00:00:00 2001 From: philwalk Date: Wed, 10 Jan 2024 11:51:13 -0700 Subject: [PATCH] v0.10.6 --- README.md | 10 +- build.sbt | 5 +- jsrc/bashPath.sc | 2 +- jsrc/bashPathCli.sc | 2 +- jsrc/chronoParse.sc | 473 +++++++++ jsrc/csvParser.sc | 47 + jsrc/fastCsv.sc | 10 + jsrc/fstabCli.sc | 2 +- jsrc/palletRef.sc | 2 +- jsrc/palletRefCli.sc | 2 +- jsrc/parseDates.sc | 351 +++++++ jsrc/unameGreeting.sc | 2 +- src/main/scala-2.13/vastblue/Info.scala | 7 - .../scala-2.13/vastblue/file/EzPath.scala | 132 --- .../vastblue/time/TimeExtensions.scala | 116 --- .../{scala-3/vastblue => scala}/Info.scala | 0 .../vastblue/{demo => examples}/Demo.scala | 2 +- .../vastblue/{demo => examples}/Find.scala | 2 +- .../vastblue/{demo => examples}/GlobArg.scala | 2 +- .../{demo => examples}/MainName.scala | 2 +- .../vastblue/file/EzPath.scala | 0 src/main/scala/vastblue/file/FastCsv.scala | 14 - .../scala/vastblue/time/ChronoParse.scala | 938 ++++++++++++++++++ src/main/scala/vastblue/time/TimeDate.scala | 235 +++-- .../vastblue/time/TimeExtensions.scala | 0 .../time/{ParsDate.scala => TimeParser.scala} | 185 ++-- src/test/scala/vastblue/file/CsvTests.scala | 2 +- .../{PathSpec.scala => FilePathSpec.scala} | 42 +- .../scala/vastblue/file/PathnameTest.scala | 2 +- ...DateTests.scala => ChronoParseTests.scala} | 24 +- 30 files changed, 2145 insertions(+), 468 deletions(-) create mode 100644 jsrc/chronoParse.sc create mode 100644 jsrc/csvParser.sc create mode 100644 jsrc/fastCsv.sc create mode 100644 jsrc/parseDates.sc delete mode 100644 src/main/scala-2.13/vastblue/Info.scala delete mode 100644 src/main/scala-2.13/vastblue/file/EzPath.scala delete mode 100644 src/main/scala-2.13/vastblue/time/TimeExtensions.scala rename src/main/{scala-3/vastblue => scala}/Info.scala (100%) rename src/main/scala/vastblue/{demo => examples}/Demo.scala (96%) rename src/main/scala/vastblue/{demo => examples}/Find.scala (99%) rename src/main/scala/vastblue/{demo => examples}/GlobArg.scala (94%) rename src/main/scala/vastblue/{demo => examples}/MainName.scala (93%) rename src/main/{scala-3 => scala}/vastblue/file/EzPath.scala (100%) create mode 100644 src/main/scala/vastblue/time/ChronoParse.scala rename src/main/{scala-3 => scala}/vastblue/time/TimeExtensions.scala (100%) rename src/main/scala/vastblue/time/{ParsDate.scala => TimeParser.scala} (87%) rename src/test/scala/vastblue/file/{PathSpec.scala => FilePathSpec.scala} (93%) rename src/test/scala/vastblue/time/{ParsDateTests.scala => ChronoParseTests.scala} (95%) diff --git a/README.md b/README.md index 9815c6e..5f078ab 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Provides support for expressive idioms typical of scripting languages, for writi To use `pallet` in an `SBT` project, add this dependency to `build.sbt` ```sbt - "org.vastblue" % "pallet_3" % "0.10.4" + "org.vastblue" % "pallet_3" % "0.10.6" ``` For `scala` or `scala-cli` scripts, see examples below. @@ -35,7 +35,7 @@ Simplicity and Universal Portability: ```scala #!/usr/bin/env -S scala-cli shebang -//> using lib "org.vastblue::pallet::0.10.4" +//> using lib "org.vastblue::pallet::0.10.6" import vastblue.pallet.* printf("uname / osType / osName:\n%s\n", s"platform info: ${unameLong} / ${osType} / ${osName}") @@ -226,7 +226,7 @@ Some differences to be aware of between `scala-cli` scripts and conventional `sc For a per-user classpath `atFile`, define your classpath in a file named, e.g., `/Users/username/.scala3cp`. To include the `scala3` version of this library, for example, the `@file` might contain: ``` --classpath /Users/username/.ivy2/local/org.vastblue/pallet_3/0.10.4/jars/pallet_3.jar +-classpath /Users/username/.ivy2/local/org.vastblue/pallet_3/0.10.6/jars/pallet_3.jar ``` With this configuration, your scala 3 `shebang` line will look like this: ```scala @@ -265,7 +265,7 @@ object Fstab { #!/ usr / bin / env -S scala -cli shebang //> using scala "3.3.1" -//> using lib "org.vastblue::pallet::0.10.4" +//> using lib "org.vastblue::pallet::0.10.6" import vastblue.pallet.* import vastblue.Platform.* @@ -297,7 +297,7 @@ Note that on Darwin, there is no `/etc/fstab` file, so the `Path#lines` extensio #!/usr/bin/env -S scala-cli shebang //> using scala "3.3.1" -//> using lib "org.vastblue::pallet::0.10.4" +//> using lib "org.vastblue::pallet::0.10.6" import vastblue.pallet.* diff --git a/build.sbt b/build.sbt index 990d8d1..c11ee68 100644 --- a/build.sbt +++ b/build.sbt @@ -9,7 +9,7 @@ javacOptions ++= Seq("-source", "11", "-target", "11") //ThisBuild / envFileName := "dev.env" // sbt-dotenv plugin gets build environment here ThisBuild / scalaVersion := scalaVer -ThisBuild / version := "0.10.4" +ThisBuild / version := "0.10.6" ThisBuild / versionScheme := Some("semver-spec") ThisBuild / organization := "org.vastblue" @@ -73,7 +73,8 @@ libraryDependencies ++= Seq( "org.scalatest" %% "scalatest" % "3.2.17" % Test, "org.scalacheck" %% "scalacheck" % "1.17.0" % Test, "io.github.chronoscala" %% "chronoscala" % "2.0.10", - "org.vastblue" % "unifile_3" % "0.2.4", + "org.vastblue" % "unifile_3" % "0.3.0", + "com.github.sisyphsu" % "dateparser" % "1.0.11", ) /* diff --git a/jsrc/bashPath.sc b/jsrc/bashPath.sc index efee1a8..8867754 100755 --- a/jsrc/bashPath.sc +++ b/jsrc/bashPath.sc @@ -1,7 +1,7 @@ #!/usr/bin/env -S scala //> using scala "3.3.1" -//> using lib "org.vastblue::pallet::0.10.4" +//> using lib "org.vastblue::pallet::0.10.6" import vastblue.pallet.* diff --git a/jsrc/bashPathCli.sc b/jsrc/bashPathCli.sc index 77a723a..7dc38ae 100755 --- a/jsrc/bashPathCli.sc +++ b/jsrc/bashPathCli.sc @@ -1,7 +1,7 @@ #!/usr/bin/env -S scala-cli shebang //> using scala "3.3.1" -//> using lib "org.vastblue::pallet::0.10.4" +//> using lib "org.vastblue::pallet::0.10.6" import vastblue.pallet.* diff --git a/jsrc/chronoParse.sc b/jsrc/chronoParse.sc new file mode 100644 index 0000000..cb8e74b --- /dev/null +++ b/jsrc/chronoParse.sc @@ -0,0 +1,473 @@ +#!/usr/bin/env -S scala -deprecation +package vastblue.time + +import vastblue.pallet.* +import vastblue.time.TimeDate.* +import java.time.LocalDateTime + +object ChronoParse { + // by default, prefer US format, but swap month and day if unavoidable + // (e.g., 24/12/2022 incompatible with US format, not with Int'l format + var monthFirst = true + + def usage(m: String=""): Nothing = { + _usage(m, Seq( + "[] ; one datetime string per line", + "[-test | -flds] ; verify testdate.csv conversions", + "[-df] ; prefer day-first format (non-US)", + "by default, op == \"-flds\"", + )) + } + var (op, verbose, infiles) = ("", false, Vector.empty[Path]) + + def main(args: Array[String]): Unit = { + parseArgs(args.toSeq) + try { + op match { + case "" | "-test" | "-flds" => + verifyFields(testDataFile) + case "-file" => + convertTimestamps(infiles) + case _ => + usage(s"op == $op") + } + } catch { + case t: Throwable => + showLimitedStack(t) + sys.exit(3) + } + } + + def verifyFields(p: Path): Unit = { + if (p.isFile) { + val rows = p.csvRows + printf("%d rows\n", rows.size) + hook += 1 + for ((row, i) <- rows.zipWithIndex) { + if (i > 0) { + // skip headings row, preserve file row numbers + val rawline = row.toList match { + case _ :: targetstr :: Nil => + targetstr + case targetstr :: Nil => + targetstr + case _ => + hook += 1 + "" + } + if (verbose) printf("%04d : %s\n", i, row.mkString("|")) + + val format = DateFlds(rawline) + if (verbose) { + printf("rawlin: [%s]\n", rawline) + printf(" format: [%s]\n", format.toString) + } else { + printf("%-22s, \"%s\"\n", format.toString, rawline) + } + } + } + } + } + + def convertTimestamps(files: Seq[Path]): Seq[DateFlds] = { + for { + p <- files + if p.isFile + (row, i) <- p.csvRows.zipWithIndex + if i > 0 // skip headings row + rawline = row.toList match { + case targetstr :: Nil => + targetstr // if 1 column, convert it + case _ :: targetstr :: Nil => + targetstr // if 2 columns, convert 2nd + case _ => + "" + } + if rawline.nonEmpty + dateflds = DateFlds(rawline) + if dateflds.valid + } yield dateflds + } + + def parseArgs(args: Seq[String]): Unit = { + eachArg(args.toSeq, usage) { + case f if f.path.isFile => + assert(op.isEmpty, s"op[$op] but also specified file [$f]") + op = "-file" + infiles :+= f.path + case "-v" => + verbose = true + case "-df" => + monthFirst = false + case "-test" | "-flds" => + if (!testDataFile.isFile) { + usage(s"not found: ${testDataFile.posx}") + } else { + op = thisArg + } + case arg => + usage(s"unrecognized arg [$arg]") + } + } + + lazy val testDataFile = "testdates.csv".path + lazy val nowdate = now.toString("yyyy-MM-dd") +} + +// derive parse format for dates with numeric fields +// preparation must include converting names of day and month to numeric +object DateFlds { + lazy val TimeZoneSplitter = "(.*:.*) ?([-+][0-9]{1,2}:00)$".r + lazy val BadDate: DateTime = dateParser("1900-01-01") + + def apply(rawdatetime: String): DateFlds = { + var valid = true + val numerified = numerifyNames(rawdatetime) // toss weekday name, convert month name to number + + // TODO: split into time and date near the outset, handle each separately + var (datetime, timezone) = numerified match { + case TimeZoneSplitter(time, zone) => + (time, zone) + case str => + (str, "") + } + // TODO: use timezone info, including CST, etc + + /* + val timezone = datetime.replaceAll(".*:.* ?([-+][0-9]{1,2}:00)$", "$1").trim + if (timezone.length < datetime.length) { + datetime = datetime.stripSuffix(timezone).trim + } + */ + var numstrings = datetime.replaceAll("\\D+", " ").trim.split(" ").toIndexedSeq + val widenums = numstrings.filter { _.length >= 4 } + widenums.toList match { + case Nil => // no wide num fields + case year :: _ if year.length == 4 => + val i = numstrings.indexOf(year) + if (i > 3) { + val (left, rite) = numstrings.splitAt(i) + val newnumstrings = Seq(year) ++ left ++ rite.drop(1) + numstrings = newnumstrings.toIndexedSeq + } + hook += 1 + case ymd :: _ => + hook += 1 // maybe 20240213 or similar + var (y, m, d) = ("", "", "") + if (ymd.startsWith("2") && ymd.length == 8) { + // assume yyyy/mm/dd + y = ymd.take(4) + m = ymd.drop(4).take(2) + d = ymd.drop(6) + } else if (ymd.drop(4).matches("2[0-9]{3}") ){ + if (monthFirst) { + // assume mm/dd/yyyy + m = ymd.take(2) + d = ymd.drop(2).take(2) + y = ymd.drop(4) + } else { + // assume dd/mm/yyyy + d = ymd.take(2) + m = ymd.drop(2).take(2) + y = ymd.drop(4) + } + } + val newymd = Seq(y, m, d) + val newnumstrings: Seq[String] = { + val head: String = numstrings.head + if (head == ymd) { + val rite: Seq[String] = numstrings.tail + val (mid: Seq[String], tail: Seq[String]) = rite.splitAt(1) + val hrmin: String = mid.mkString + if (hrmin.matches("[0-9]{3,4}")) { + val (hr: String, min: String) = hrmin.splitAt(hrmin.length-2) + val hour = if (hr.length == 1) { + s"0$hr" + } else { + hr + } + val lef: Seq[String] = newymd + val mid: Seq[String] = Seq(hour, min) + newymd ++ mid ++ tail + } else { + newymd ++ rite + } + } else { + val i = numstrings.indexOf(ymd) + val (left, rite) = numstrings.splitAt(i) + val newhrmin = rite.drop(1) + left ++ newymd ++ newhrmin + } + } + numstrings = newnumstrings.toIndexedSeq + } + + var nums = numstrings.map { ti(_) } + val timeOnly: Boolean = numstrings.size <= 4 && rawdatetime.matches("[0-9]{2}:[0-9]{2}.*") + if ( !timeOnly ) { + def adjustYear(year: Int): Unit = { + nums = nums.take(2) ++ Seq(year) ++ nums.drop(3) + numstrings = nums.map { + _.toString + } + } + val dateFields = nums.take(3) + dateFields match { + case Seq(a, b, c) if a > 31 || b > 31 || c > 31 => + hook += 1 // the typical case where 4-digit year is provided + case Seq(a, b) => + // the problem case; assume no year provided + adjustYear(now.getYear) // no year provided, use current year + case Seq(mOrD, dOrM, relyear) => + // the problem case; assume M/d/y or d/M/y format + val y = now.getYear + val century = y - y % 100 + adjustYear(century + relyear) + case _ => + hook += 1 // huh? + } + } + + val fields: Seq[(String, Int)] = numstrings.zipWithIndex + var (yval, mval, dval) = (0, 0, 0) + val farr = fields.toArray + var formats: Array[String] = farr.map { (s: String, i: Int) => + if (i < 3 && !timeOnly) { + ti(s) match { + case y if y > 31 || s.length == 4 => + yval = y + s.replaceAll(".", "y") + case d if d > 12 && s.length <= 2 => + dval = d + s.replaceAll(".", "d") + case _ => // can't resolve month without more context + s + } + } else { + i match { + case 3 => s.replaceAll(".", "H") + case 4 => s.replaceAll(".", "m") + case 5 => s.replaceAll(".", "s") + case 6 => s.replaceAll(".", "Z") + case _ => + s // not expecting any more numeric fields + } + } + } + def indexOf(s: String): Int = { + formats.indexWhere((fld: String) => + fld.startsWith(s) + ) + } + def numIndex: Int = { + formats.indexWhere((s: String) => s.matches("[0-9]+")) + } + def setFirstNum(s: String): Int = { + val i = numIndex + val numval = formats(i) + val numfmt = numval.replaceAll("[0-9]", s) + formats(i) = numfmt + ti(numval) + } + // if two yyyy-MM-dd fields already fixed, the third is implied + formats.take(3).map { _.distinct }.sorted match { + case Array(_, "M", "y") => dval = setFirstNum("d") + case Array(_, "d", "y") => mval = setFirstNum("M") + case Array(_, "M", "d") => yval = setFirstNum("y") + case _arr => + hook += 1 // more than one numeric fields, so not ready to resolve + } + hook += 1 + def is(s: String, v: String): Boolean = s.startsWith(v) + + val yidx = indexOf("y") + val didx = indexOf("d") + val midx = indexOf("M") + + def hasY = yidx >= 0 + def hasM = midx >= 0 + def hasD = didx >= 0 + def needsY = yidx < 0 + def needsM = midx < 0 + def needsD = didx < 0 + + def replaceFirstNumericField(s: String): Unit = { + val i = numIndex + if (i < 0) { + hook += 1 // no numerics found + } else { + assert(i >= 0 && i < 3, s"internal error: $datetime [i: $i, s: $s]") + s match { + case "y" => + assert(yval == 0, s"yval: $yval") + yval = ti(formats(i)) + case "M" => + assert(mval == 0, s"mval: $mval") + mval = ti(formats(i)) + case "d" => + if (dval > 0) { + hook += 1 + } + assert(dval == 0, s"dval: $dval") + dval = ti(formats(i)) + case _ => + sys.error(s"internal error: bad format indicator [$s]") + } + setFirstNum(s) + } + } + + val needs = Seq(needsY, needsM, needsD) + (needsY, needsM, needsD) match { + case (false, false, true) => + replaceFirstNumericField("d") + case (false, true, false) => + replaceFirstNumericField("M") + case (true, false, false) => + replaceFirstNumericField("y") + + case (false, true, true) => + // has year, needs month and day + yidx match { + case 1 => + // might as well support bizarre formats (M-y-d or d-M-y) + if (monthFirst) { + replaceFirstNumericField("M") + replaceFirstNumericField("d") + } else { + replaceFirstNumericField("d") + replaceFirstNumericField("M") + } + case 0 | 2 => + // y-M-d + if (monthFirst) { + replaceFirstNumericField("M") + replaceFirstNumericField("d") + } else { + replaceFirstNumericField("d") + replaceFirstNumericField("M") + } + + } + case (true, true, false) => + // has day, needs month and year + didx match { + case 0 => + // d-M-y + replaceFirstNumericField("M") + replaceFirstNumericField("y") + case 2 => + // y-M-d + replaceFirstNumericField("y") + replaceFirstNumericField("M") + case 1 => + // AMBIGUOUS ... + if (monthFirst) { + // M-d-y + replaceFirstNumericField("d") + replaceFirstNumericField("M") + } else { + // d-M-y + replaceFirstNumericField("M") + replaceFirstNumericField("d") + } + } + case (false, false, false) => + hook += 1 // done + case (true, true, true) if timeOnly => + hook += 1 // done + case (yy, mm, dd) => + formats.toList match { + case a :: b :: Nil => + val (ta, tb) = (ti(a), ti(b)) + // interpret as missing day or missing year + // missing day if either field is > 31 + if (monthFirst && ta <= 12) { + mval = ta + dval = tb + } else { + mval = tb + dval = tb + } + if (mval > 31) { + // assume day is missing + yval = mval + mval = dval + dval = 1 // convention + } else if (dval > 31) { + // assume day is missing + yval = dval + dval = 1 // convention + } else { + if (mval > 12) { + // the above swap might make this superfluous + // swap month and day + val temp = mval + mval = dval + dval = temp + } + yval = now.getYear // supply missing year + } + // TODO: reorder based on legal field values, if appropriate + formats = Array("yyyy", "MM", "dd") + numstrings = IndexedSeq(yval, mval, dval).map { _.toString } + case _ => + sys.error(s"yy[$yy], mm[$mm], dd[$dd] datetime[$datetime], formats[${formats.mkString("|")}]") + } + } + if (numstrings.endsWith("2019") ){ + hook += 1 + } + + def fromStandardOrder(so: List[Int]): LocalDateTime = { + so match { + case yr :: mo :: dy :: hr :: mn :: sc :: nano :: Nil => + LocalDateTime.of(yr, mo, dy, hr, mn, sc, nano) + case yr :: mo :: dy :: hr :: mn :: sc :: Nil => + if (sc > 59 || mn > 59 || hr > 59) { + hook += 1 + } + LocalDateTime.of(yr, mo, dy, hr, mn, sc) + case yr :: mo :: dy :: hr :: mn :: Nil => + LocalDateTime.of(yr, mo, dy, hr, mn, 0) + case yr :: mo :: dy :: hr :: Nil => + LocalDateTime.of(yr, mo, dy, hr, 0, 0) + case yr :: mo :: dy :: Nil => + if (mo > 12) { + hook += 1 + } + LocalDateTime.of(yr, mo, dy, 0, 0, 0) + case other => + sys.error(s"not enough date-time fields: [${so.mkString("|")}]") + } + } + + val bareformats = formats.map { _.distinct }.toList + nums = numstrings.map { ti(_) }.toIndexedSeq + def ymd(iy: Int, im: Int, id: Int, tail: List[String]): LocalDateTime = { + if (iy <0 || im <0 || id <0) { + hook += 1 + } else if (nums.size < 3) { + hook += 1 + } + val standardOrder = List(nums(iy), nums(im), nums(id)) ++ nums.drop(3) + fromStandardOrder(standardOrder) + } + val dateTime: LocalDateTime = bareformats match { + case "d" :: "M" :: "y" :: tail => ymd(2,1,0, tail) + case "M" :: "d" :: "y" :: tail => ymd(2,0,1, tail) + case "d" :: "y" :: "M" :: tail => ymd(1,2,0, tail) + case "M" :: "y" :: "d" :: tail => ymd(1,0,2, tail) + case "y" :: "d" :: "M" :: tail => ymd(0,2,1, tail) + case "y" :: "M" :: "d" :: tail => ymd(0,1,2, tail) + case other => + valid = false + BadDate + } + new DateFlds(dateTime, rawdatetime, numerified, timezone, formats, valid) + } +} + +case class DateFlds(dateTime: LocalDateTime, rawdatetime: String, numerified: String, timezone: String, formats: Seq[String], valid: Boolean) { + override def toString: String = dateTime.toString("yyyy-MM-dd HH:mm:ss") +} diff --git a/jsrc/csvParser.sc b/jsrc/csvParser.sc new file mode 100644 index 0000000..59324f6 --- /dev/null +++ b/jsrc/csvParser.sc @@ -0,0 +1,47 @@ +#!/usr/bin/env -S scala + +import org.simpleflatmapper.csv.* +import java.io.{FileReader, StringReader} +import scala.jdk.CollectionConverters.* + +def main(args: Array[String]): Unit = { + import vastblue.unifile.* + + var p = java.nio.file.Paths.get("testdates.csv") + var content = p.contentAsString + val reader = StringReader(content) + def iterator: Iterator[Seq[String]] = CsvParser.separator(',').iterator(reader).asScala.map { _.toSeq } + def rawrows: Seq[Seq[String]] = iterator.toSeq.filter { (cols: Seq[String]) => cols != Seq("") } // discard gratuitous empty rows + def rows = rawrows.map { row => row.map(_.trim) } + def rowstrimmed = rows + for (row <- rows){ + if (row.size > 2) { + printf("%s: %s\n", row.size, row) + } + } + + /* + case class FastCsv(val reader: Reader, identifier: String, delimiter: String) { + if (delimiter.length != 1) { + System.err.printf("warning: will use only the first character delimiter [%s]\n", delimiter) + } + + def delim: Char = delimiter match { + case "" => ' ' // treat rows with no delimiter as a single column + case "," => ',' + case "\t" => '\t' + case "|" => '|' + case ";" => ';' + case _ => delimiter.charAt(0) + } + def iterator: Iterator[Seq[String]] = CsvParser.separator(delim).iterator(reader).asScala.map { _.toSeq } + + def rawrows: Seq[Seq[String]] = iterator.toSeq.filter { (cols: Seq[String]) => cols != Seq("") } // discard gratuitous empty rows + def rows = rawrows.map { row => row.map(_.trim) } + def rowstrimmed = rows + + // def stream = CsvParser.separator(delim).iterator(reader).asScala.iterator + override def toString = identifier + } + */ +} diff --git a/jsrc/fastCsv.sc b/jsrc/fastCsv.sc new file mode 100644 index 0000000..80d27d6 --- /dev/null +++ b/jsrc/fastCsv.sc @@ -0,0 +1,10 @@ +#!/usr/bin/env -S scala + +import vastblue.pallet.* +import vastblue.time.TimeDate.* +import vastblue.time.ChronoParse.* + +object FastCsvTool { + def main(args: Array[String]): Unit = { + } +} diff --git a/jsrc/fstabCli.sc b/jsrc/fstabCli.sc index 040145f..c1bf2d1 100755 --- a/jsrc/fstabCli.sc +++ b/jsrc/fstabCli.sc @@ -1,7 +1,7 @@ #!/usr/bin/env -S scala-cli shebang //> using scala "3.3.1" -//> using lib "org.vastblue::pallet::0.10.4" +//> using lib "org.vastblue::pallet::0.10.6" import vastblue.pallet._ diff --git a/jsrc/palletRef.sc b/jsrc/palletRef.sc index 9d574dc..9b6bdf1 100755 --- a/jsrc/palletRef.sc +++ b/jsrc/palletRef.sc @@ -1,7 +1,7 @@ #!/usr/bin/env -S scala-cli shebang //> using scala "3.3.1" -//> using lib "org.vastblue::pallet::0.10.4" +//> using lib "org.vastblue::pallet::0.10.6" import vastblue.pallet._ diff --git a/jsrc/palletRefCli.sc b/jsrc/palletRefCli.sc index 88dbe2f..bd0ec4d 100755 --- a/jsrc/palletRefCli.sc +++ b/jsrc/palletRefCli.sc @@ -1,7 +1,7 @@ #!/usr/bin/env -S scala-cli shebang //> using scala "3.3.1" -//> using lib "org.vastblue::pallet::0.10.4" +//> using lib "org.vastblue::pallet::0.10.6" import vastblue.pallet.* diff --git a/jsrc/parseDates.sc b/jsrc/parseDates.sc new file mode 100644 index 0000000..29ce2e1 --- /dev/null +++ b/jsrc/parseDates.sc @@ -0,0 +1,351 @@ +#!/usr/bin/env -S scala-cli shebang -deprecation + +//> using dep "org.vastblue:pallet_3:0.10.6" +//> using dep "org.vastblue:unifile_3:0.3.0" +//> using dep "org.simpleflatmapper:sfm-csv-jre6:8.2.3" +//> using dep "io.github.chronoscala::chronoscala::2.0.10" +//> using dep "com.github.sisyphsu:dateparser:1.0.11" + +import vastblue.pallet.* +import vastblue.time.TimeDate.* +import vastblue.time.TimeParser +import com.github.sisyphsu.dateparser.* +import java.time.LocalDateTime +import java.time.format.DateTimeParseException + +ParseDates.main(args) +object ParseDates { + var monthFirst = true // by default, prefer US format + + def usage(m: String=""): Nothing = { + _usage(m, Seq( + "[] ; one datetime string per line", + "[-test | -flds] ; verify testdate.csv conversions", + "[-df] ; prefer day-first format (non-US)", + "by default, op == \"-flds\"", + )) + } + var (op, verbose, infiles) = ("", false, Vector.empty[Path]) + + def main(args: Array[String]): Unit = { + parseArgs(args.toSeq) + DateParserUtils.preferMonthFirst(monthFirst) + try { + op match { + case "" => + verifyFields(testDataFile) + case "-test" => + verifyConversions(testDataFile) + case "-file" => + for (p <- infiles) { + convertEntries(p) + } + } + } catch { + case t: Throwable => + showLimitedStack(t) + sys.exit(3) + } + } + + def parseArgs(args: Seq[String]): Unit = { + eachArg(args.toSeq, usage) { + case f if f.path.isFile => + assert(op.isEmpty, s"op[$op] but also specified file [$f]") + op = "-file" + infiles :+= f.path + case "-v" => + verbose = true + case "-df" => + monthFirst = false + case "-test" | "-flds" => + if (!testDataFile.isFile) { + usage(s"not found: ${testDataFile.posx}") + } else { + op = thisArg + } + case arg => + usage(s"unrecognized arg [$arg]") + } + } + def verifyFields(file: Path): Unit = { + if (file.isFile) { + val rows = file.csvRows.drop(1) // discard column names + eprintf("%d rows\n", rows.size) + for ((row, i) <- rows.zipWithIndex){ + printf("%04d : %s\n", i, row.mkString("|")) + val Seq(expected, rawline) = row + val format = DateFlds(rawline) + printf("rawlin: [%s]\n", rawline) + printf(" format: [%s]\n", format) + printf(" expect: [%s]\n", expected) + } + } + } + def verifyConversions(file: Path): Unit = { + if (file.isFile) { + val rows = file.csvRows.drop(1) // discard column names + for (row <- rows){ + val Seq(expected, rawline) = row + val dateTime = newDateParser(rawline) + val dtstr = dateTime.toString("yyyy-MM-dd HH:mm:ss") + if (dtstr != expected) { + printf("rawlin: [%s]\n", rawline) + printf(" result: [%s]\n", dtstr) + printf(" expect: [%s]\n", expected) + } + //printf("%s # [%s]\n", dtstr, rawline) + } + } + } + def convertEntries(file: Path): Unit = { + if (file.isFile) { + for (rawline <- file.lines) { + val dateTime = newDateParser(rawline) + val dtstr = dateTime.toString("yyyy-MM-dd HH:mm:ss") + printf("%s # [%s]\n", dtstr, rawline) + } + } + } + lazy val testDataFile = "testdates.csv".path + + lazy val nowdate = now.toString("yyyy-MM-dd") + + def newDateParser(rawline: String): LocalDateTime = { + try { + sysiphus(rawline) + } catch { + case e: java.time.format.DateTimeParseException => + var line = rawline.replaceAll("/{2,}", "/").replaceAll("[\"\\\\]", "") + parseDate(line) + } + } + /* + def fieldTypes(numstrings: List[String]): String = { + val nums: Seq[Int] = numstrings.map { (s: String) => Fld(s) } + val fields: Seq[(String, Int)] = numstrings.zipWithIndex.sortBy { case (str, i) => str } + nums match { + case a :: b :: c :: Nil if a > 1000 => + "yyyyMMdd" + case a :: b :: c :: Nil if a > 12 && c > 1000 => + "ddMMyyyy" + case a :: b :: c :: Nil if c > 1000 => + "MMddyyyy" + case a :: b :: c :: Nil if a > 12 && c > 1000 => + "ddMMyyyy" + } else if (num > 12) { + "d" // day + } else { + "m" // resolve ambiguity in favor of month + } + } + */ + def sysiphus(rawline: String): LocalDateTime = { + var line = rawline + val numstrings = line.split("\\D+").map { _.trim }.filter { _.nonEmpty } + val nums = numstrings.map { _.toInt } + if (verbose) { + printf("%d number fields [%s]\n", numstrings.size, numstrings.mkString("|")) + } + if (numstrings.length == 3) { + if (line.contains(":")) { + // time fields only + DateParserUtils.parseDateTime(s"$nowdate $line") + } else { + // date fields only + val Seq(a, b, c) = nums.toSeq + if (a < 1000) { + // year-first + } else if (a > 12) { + // day-first, swap first two fields + line = "%02d-%02d-%04d".format(b, a, c) + } else { + // assume month-first, but is ambiguous + } + DateParserUtils.parseDateTime(s"$line 00:00:00") + } + } else { + if (verbose) { + eprintf("== [%s]\n", rawline) + } + DateParserUtils.parseDateTime(line) + } + } + + // derive parse format for dates with numeric fields + // preparation must include converting names of day and month to numeric + case class DateFlds(rawdatetime: String) { + val datetime = numerifyNames(rawdatetime) + val numstrings = datetime.replaceAll("\\D+", " ").trim.split(" ").toIndexedSeq + val fields: Seq[(String, Int)] = numstrings.zipWithIndex + def hasTime = datetime.contains(":") + def hasDate = datetime.contains("-") || datetime.contains("/") + + var formats: Array[String] = fields.toArray.map { (s: String, i: Int) => + if (i < 3) { + s.toInt match { + case y if y > 31 || s.length == 4 => + s.replaceAll(".", "y") + case d if d > 12 && s.length <= 2 => + s.replaceAll(".", "d") + case _ => // can't resolve month without more context + s + } + } else { + i match { + case 3 => s.replaceAll(".", "H") + case 4 => s.replaceAll(".", "m") + case 5 => s.replaceAll(".", "s") + case 6 => s.replaceAll(".", "Z") + case _ => + s // not expecting any more numeric fields + } + } + } + + def is(s: String, v: String): Boolean = s.startsWith(v) + + val yidx = formats.indexWhere((s: String) => s.startsWith("y")) + val didx = formats.indexWhere((s: String) => s.startsWith("d")) + val midx = formats.indexWhere((s: String) => s.startsWith("M")) + + def hasY = yidx >= 0 + def hasM = midx >= 0 + def hasD = didx >= 0 + def needsY = yidx < 0 + def needsM = midx < 0 + def needsD = didx < 0 + + var (yval, mval, dval) = (0, 0, 0) + def replaceFirstNumericField(s: String): Unit = { + val i = formats.indexWhere((s: String) => s.matches("[0-9]+")) + assert(i < 3, s"internal error: $datetime [i: $i, s: $s]") + s match { + case "y" => + assert(yval < 1, s"yval: $yval") + yval = formats(i).toInt + case "M" => + assert(mval < 1, s"mval: $mval") + mval = formats(i).toInt + case "d" => + assert(dval < 1, s"dval: $dval") + dval = formats(i).toInt + case _ => + sys.error(s"internal error: bad format indicator [$s]") + } + formats(i) = formats(i).replaceAll("[0-9]", s) + } + + (needsY, needsM, needsD) match { + case (false, false, true) => + replaceFirstNumericField("d") + case (false, true, false) => + replaceFirstNumericField("M") + case (true, false, false) => + replaceFirstNumericField("y") + + case (false, true, true) => + // has year, needs month and day + yidx match { + case 0 => + // y-M-d + replaceFirstNumericField("M") + replaceFirstNumericField("d") + case 2 => + // d-M-y + replaceFirstNumericField("d") + replaceFirstNumericField("M") + } + case (true, true, false) => + // has day, needs month and year + didx match { + case 0 => + // d-M-y + replaceFirstNumericField("M") + replaceFirstNumericField("d") + case 2 => + // y-M-d + replaceFirstNumericField("d") + replaceFirstNumericField("M") + case 1 => + // AMBIGUOUS ... + if (monthFirst) { + // M-d-y + replaceFirstNumericField("d") + replaceFirstNumericField("M") + } else { + // d-M-y + replaceFirstNumericField("M") + replaceFirstNumericField("d") + } + } + case (true, true, true) => + // done with date fields + case (yy, mm, dd) => + sys.error(s"yy[$yy], mm[$mm], dd[$dd] datetime[$datetime], formats[${formats.mkString("|")}]") + } + override def toString = "%s : %s".format(formats.mkString("|"), datetime) + + /* + case (true, false, false) => + + case (false, false, true) => + formats(i) = m.map { _ => "M" } + case (m :: d :: y :: tail), i) if hasY && hasM => + formats(i) = d.map { _ => "d" } + case (m :: d :: y :: tail, i) if hasY => + // by convention (in US): + formats(0) = m.replaceAll(".", "M") + formats(1) = d.replaceAll(".", "d") + + case m :: d :: y :: tail if !hasY => + formats(2) = m.map { _ => "M" } + case m :: d :: y :: tail if hasY && hasM => + formats(1) = d.map { _ => "d" } + case m :: d :: y :: tail if hasY => + // by convention (in US): + formats(0) = m.replaceAll(".", "M") + formats(1) = d.replaceAll(".", "d") + + case m :: d :: y if y.startsWith("y") :: tail => + formats(0) = m.replaceAll(".", "M") + formats(1) = d.replaceAll(".", "M") + */ + /* + def hasy = fmts.contains("y") + for ((str, idx) <- fields) { + val width = str.length + val valu = str.toInt + (valu, idx) match { + case (y, 0 | 2) if y > 31 || width == 4 => + fmts(idx) = "y" * width + + case (d, _) if d > 12 && width <= 2 => + fmts(idx) = "d" * width + + case (m, 0 | 1) if m < 31 && width <= 2 => + fmts(idx) = "M" * width + + case unk => + if (width >= 4) { + "Z" + } else { + sys.error(s"datetime[$datetime] unknown field: [$unk]") + } + } + } + + override def toString: String = { + if (fmts.size <= 3) { + val delim = if (hasTime) { ":" } else { "-" } + fmts.mkString(delim) + } else { + val (dateff, timeff) = fmts.splitAt(3) + val datefmt = dateff.mkString("-") + val timefmt = timeff.mkString(":") + s"$datefmt $timefmt" + } + } + */ + } +} diff --git a/jsrc/unameGreeting.sc b/jsrc/unameGreeting.sc index 247ebd1..bca131c 100644 --- a/jsrc/unameGreeting.sc +++ b/jsrc/unameGreeting.sc @@ -1,7 +1,7 @@ #!/usr/bin/env -S scala //> using scala "3.3.1" -//> using lib "org.vastblue::pallet::0.10.4" +//> using lib "org.vastblue::pallet::0.10.6" import vastblue.pallet.* diff --git a/src/main/scala-2.13/vastblue/Info.scala b/src/main/scala-2.13/vastblue/Info.scala deleted file mode 100644 index 00f9cbc..0000000 --- a/src/main/scala-2.13/vastblue/Info.scala +++ /dev/null @@ -1,7 +0,0 @@ -package vastblue - -object Info { - lazy val scalaRuntimeVersion: String = { - scala.util.Properties.versionNumberString - } -} diff --git a/src/main/scala-2.13/vastblue/file/EzPath.scala b/src/main/scala-2.13/vastblue/file/EzPath.scala deleted file mode 100644 index ab25c11..0000000 --- a/src/main/scala-2.13/vastblue/file/EzPath.scala +++ /dev/null @@ -1,132 +0,0 @@ -package vastblue.file - -//import vastblue.file.EzPath.notWindows -//import vastblue.pallet.ExtendString - -import java.nio.file.Path - -sealed trait SlashType -class Slash(s: String) - -object Slash { - def unx = '/' - def win = '\\' - - object Unx extends Slash(unx.toString) with SlashType - object Win extends Slash(win.toString) with SlashType -} -import Slash._ -import EzPath._ - -class EzPath(val initstring: String, val sl: Slash) { - val p: Path = { - def str: String = if (notWindows) initstring.replace('\\', '/') else initstring - Paths.get(str) - } - def ab: Path = p.toAbsolutePath.normalize - def abs: String = ab.slash(sl) - def posx: String = { - if (sl == Win) { - initstring - } else { - initstring.posx - } - } - def slash: String = { - if (sl == Win) { - initstring.replace('/', '\\') - } else { - initstring.replace('\\', '/') - } - } -} -object EzPath { - implicit class StExtend(s: String) { - def slash: String = s - } - // val winu = EzPath("c:\\opt", Unx) // valid - // val winw = EzPath("c:\\opt", Win) // valid - def apply(p: Path, sl: Slash) = { - val pstr: String = if (notWindows) p.toString.replace('\\', '/') else p.toString - sl match { - case Unx => new PathUnx(pstr) - case Win => new PathWin(pstr) - } - } - - def apply(s: String, sl: Slash): EzPath = { - def str: String = if (notWindows) s.replace('\\', '/') else s - - if (sl == Unx) { - new PathUnx(str) - } else { - new PathWin(str) - } - } - - def apply(s: String): EzPath = { - def str: String = if (notWindows) s.replace('\\', '/') else s - - if (notWindows) { - new PathUnx(str) - } else { - new PathWin(str) - } - } - - def defaultSlash = if (isWindows) Slash.Win else Slash.Unx - - def notWindows = java.io.File.separatorChar == '/' - - def isWindows = !notWindows - - def platformPrefix: String = Paths.get(".").toAbsolutePath.getRoot.toString match { - case "/" => "" - case s => s.take(2) - } - - def winlikePathstr(s: String): Boolean = { - s.contains(':') || s.contains('\\') - } - - def defaultSlash(s: String): Slash = { - if (winlikePathstr(s)) Slash.Win else Slash.Unx - } - - object PathUnx { - def apply(s: String): PathUnx = new PathUnx(s) - } - class PathUnx(s: String) extends EzPath(s, Slash.Unx) { - override def toString = abs - } - - object PathWin { - def apply(s: String): PathWin = new PathWin(s) - } - class PathWin(s: String) extends EzPath(s, Slash.Win) { - override def toString = abs - } - - implicit class PathExt(p: Path) { - def slash(sl: Slash): String = { - if (sl == Win) { - p.toString.replace('/', '\\') - } else { - p.toString.replace('\\', '/') - } - } - } - implicit class StrExt(s: String) { - def posx: String = { - s.replace('\\', '/') - } - - def slash(sl: Slash): String = { - if (sl == Win) { - s.replace('/', '\\') - } else { - s.posx - } - } - } -} diff --git a/src/main/scala-2.13/vastblue/time/TimeExtensions.scala b/src/main/scala-2.13/vastblue/time/TimeExtensions.scala deleted file mode 100644 index 31a99e7..0000000 --- a/src/main/scala-2.13/vastblue/time/TimeExtensions.scala +++ /dev/null @@ -1,116 +0,0 @@ -package vastblue.time - -import vastblue.time.TimeDate.* -import io.github.chronoscala.Imports.* -import io.github.chronoscala.* - -import java.time.DayOfWeek -import java.time.DayOfWeek.* -import java.time.temporal.TemporalAdjusters -import java.time.* // {ZoneId, ZonedDateTime} -import java.time.format.* -import scala.language.implicitConversions - -trait TimeExtensions { - implicit def date2option(date: DateTime): Option[DateTime] = Some(date) - - implicit def ldt2zdt(ldt: LocalDateTime): ZonedDateTime = { - ldt.atZone(UTC) - } - implicit def str2richStr(s: String): RichString = new RichString(s) - implicit def ta2zdt(ta: java.time.temporal.TemporalAccessor): ZonedDateTime = { - try { - ta match { - case ld: LocalDateTime => - ld.atZone(zoneid) - } - } catch { - case _: java.time.DateTimeException => - sys.error(s"cannot convert to DateTime from: ${ta.getClass.getName}") - } - } - implicit def dateTimeOrdering: Ordering[DateTime] = Ordering.fromLessThan(_ isBefore _) - implicit def int2richInt(i: Int): RichInt = new RichInt(i) - implicit def int2Period(i: Int): java.time.Period = java.time.Period.ofWeeks(i) - - implicit def sqlDate2LocalDateTime(sd: java.sql.Date): LocalDateTime = sd.toLocalDate.atStartOfDay() - implicit def sqlDate2LocalDate(sd: java.sql.Date): LocalDate = sd.toLocalDate - - import java.time.Duration - implicit class aInterval(val i: Interval) { - def toDuration = i.duration - } - - // format: off - implicit class aDayOfWeek(val d: java.time.DayOfWeek) { - def >=(other: java.time.DayOfWeek) = { d.compareTo(other) >= 0 } - def > (other: java.time.DayOfWeek) = { d.compareTo(other) > 0 } - def <=(other: java.time.DayOfWeek) = { d.compareTo(other) <= 0 } - def < (other: java.time.DayOfWeek) = { d.compareTo(other) < 0 } - } - // format: on - - implicit class aDuration(val pd: java.time.Duration) { - def getStandardSeconds: Long = pd.seconds - def getStandardMinutes: Long = getStandardSeconds / 60 - def getStandardHours: Long = getStandardMinutes / 60 - def getStandardDays: Long = getStandardHours / 24 - } - - implicit def between(date1: DateTime, date2: DateTime): Duration = { - Duration.between(date1, date2) - } - - implicit class aDateTime(val d: DateTime) extends Ordered[aDateTime] { - override def compare(that: aDateTime): Int = { - val (a, b) = (getMillis(), that.getMillis()) - if (a < b) -1 - else if (a > b) +1 - else 0 - } - def ymd: String = d.format(dateTimeFormatPattern(dateonlyFmt)) - - def ymdhms: String = d.format(dateTimeFormatPattern(datetimeFmt7)) - - def startsWith(str: String): Boolean = d.toString(ymdhms).startsWith(str) - - def toString(fmt: String): String = { - d.format(dateTimeFormatPattern(fmt)) - } - def getMillis(): Long = { - d.atZone(zoneid).toInstant().toEpochMilli() - } - def >(other: DateTime): Boolean = { - d.compareTo(other) > 0 - } - def >=(other: DateTime): Boolean = { - d.compareTo(other) >= 0 - } - def to(other: DateTime): Duration = { - between(d, other) - } - def +(p: java.time.Period) = d.plus(p) - def -(p: java.time.Period) = d.minus(p) - - def minute = d.getMinute - def second = d.getSecond - def hour = d.getHour - def day = d.getDayOfMonth - def month = d.getMonth - def year = d.getYear - - def setHour(h: Int): LocalDateTime = d.plusHours((d.getHour + h).toLong) - def setMinute(m: Int): LocalDateTime = d.plusMinutes((d.getMinute + m).toLong) - - def compare(that: DateTime): Int = d.getMillis() compare that.getMillis() - def dayOfYear = d.getDayOfYear - def getDayOfYear = d.getDayOfYear - def dayOfMonth = d.getDayOfMonth - def getDayOfMonth = d.getDayOfMonth - def dayOfWeek: DayOfWeek = d.getDayOfWeek - def getDayOfWeek: DayOfWeek = d.getDayOfWeek - - def withDayOfWeek(dow: java.time.DayOfWeek): DateTime = d.`with`(TemporalAdjusters.next(dow)) - def lastDayOfMonth: LocalDateTime = d.`with`(LastDayAdjuster) - } -} diff --git a/src/main/scala-3/vastblue/Info.scala b/src/main/scala/Info.scala similarity index 100% rename from src/main/scala-3/vastblue/Info.scala rename to src/main/scala/Info.scala diff --git a/src/main/scala/vastblue/demo/Demo.scala b/src/main/scala/vastblue/examples/Demo.scala similarity index 96% rename from src/main/scala/vastblue/demo/Demo.scala rename to src/main/scala/vastblue/examples/Demo.scala index f664885..2805c10 100644 --- a/src/main/scala/vastblue/demo/Demo.scala +++ b/src/main/scala/vastblue/examples/Demo.scala @@ -1,5 +1,5 @@ //#!/usr/bin/env -S scala @./atFile -package vastblue.demo +package vastblue.examples import vastblue.pallet.* import vastblue.MainArgs diff --git a/src/main/scala/vastblue/demo/Find.scala b/src/main/scala/vastblue/examples/Find.scala similarity index 99% rename from src/main/scala/vastblue/demo/Find.scala rename to src/main/scala/vastblue/examples/Find.scala index 837b7be..04eb8e1 100644 --- a/src/main/scala/vastblue/demo/Find.scala +++ b/src/main/scala/vastblue/examples/Find.scala @@ -1,5 +1,5 @@ //#!/usr/bin/env -S scala @${HOME}/.scala3cp -package vastblue.demo +package vastblue.examples // hash bang line error on OSX/Darwin due to non-gnu /usr/bin/env // portable way to set classpath: diff --git a/src/main/scala/vastblue/demo/GlobArg.scala b/src/main/scala/vastblue/examples/GlobArg.scala similarity index 94% rename from src/main/scala/vastblue/demo/GlobArg.scala rename to src/main/scala/vastblue/examples/GlobArg.scala index e2f6692..aacad22 100644 --- a/src/main/scala/vastblue/demo/GlobArg.scala +++ b/src/main/scala/vastblue/examples/GlobArg.scala @@ -1,5 +1,5 @@ //#!/usr/bin/env -S scala -cp target/scala-3.3.1/classes -package vastblue.demo +package vastblue.examples import vastblue.pallet.* diff --git a/src/main/scala/vastblue/demo/MainName.scala b/src/main/scala/vastblue/examples/MainName.scala similarity index 93% rename from src/main/scala/vastblue/demo/MainName.scala rename to src/main/scala/vastblue/examples/MainName.scala index 7baeeca..ba0e1b7 100644 --- a/src/main/scala/vastblue/demo/MainName.scala +++ b/src/main/scala/vastblue/examples/MainName.scala @@ -1,5 +1,5 @@ //#!/usr/bin/env -S scala @./atFile -deprecation -package vastblue.demo +package vastblue.examples import vastblue.pallet.* diff --git a/src/main/scala-3/vastblue/file/EzPath.scala b/src/main/scala/vastblue/file/EzPath.scala similarity index 100% rename from src/main/scala-3/vastblue/file/EzPath.scala rename to src/main/scala/vastblue/file/EzPath.scala diff --git a/src/main/scala/vastblue/file/FastCsv.scala b/src/main/scala/vastblue/file/FastCsv.scala index 645d56e..3718b07 100644 --- a/src/main/scala/vastblue/file/FastCsv.scala +++ b/src/main/scala/vastblue/file/FastCsv.scala @@ -87,20 +87,6 @@ object FastCsv { ) } } - - /* - def readLines(p: Path): Seq[String] = { - try { - JFiles.readAllLines(p).asScala.toSeq - } catch { - case t: Throwable => - sys.error(s"${p.norm}") - } - } - def contentAsString(p: Path): String = { - readLines(p).mkString("\n") - } - */ } case class FastCsv(val reader: Reader, identifier: String, delimiter: String) { diff --git a/src/main/scala/vastblue/time/ChronoParse.scala b/src/main/scala/vastblue/time/ChronoParse.scala new file mode 100644 index 0000000..f50b9cb --- /dev/null +++ b/src/main/scala/vastblue/time/ChronoParse.scala @@ -0,0 +1,938 @@ +//#!/usr/bin/env -S scala -deprecation +package vastblue.time + +import vastblue.pallet.* +//import vastblue.time.TimeDate.* + +import java.time.LocalDateTime +import scala.runtime.RichInt + +object ChronoParse { + var hook = 0 + var monthFirst = true + lazy val testDataFile = "testdates.csv".path +// lazy val nowdate = now.toString("yyyy-MM-dd") + lazy val TimeZoneSplitter = "(.*:.*) ?([-+][0-9]{1,2}:00)$".r + lazy val BadDate: LocalDateTime = yyyyMMddHHmmssToDate(List(1900,01,01)) + lazy val now: LocalDateTime = LocalDateTime.now() + lazy val MonthNamesPattern = "(?i)(.*)(Jan[uary]*|Feb[ruary]*|Mar[ch]*|Apr[il]*|May|June?|July?|Aug[ust]*|Sep[tember]*|Oct[ober]*|Nov[ember]*|Dec[ember]*)(.*)".r + + + // by default, prefer US format, but swap month and day if unavoidable + // (e.g., 24/12/2022 incompatible with US format, not with Int'l format + def usage(m: String=""): Nothing = { + _usage(m, Seq( + "[] ; one datetime string per line", + "[-test | -flds] ; verify testdate.csv conversions", + "[-df] ; prefer day-first format (non-US)", + "by default, op == \"-flds\"", + )) + } + var (op, verbose, infiles) = ("", false, Vector.empty[Path]) + + def main(args: Array[String]): Unit = { + parseArgs(args.toSeq) + try { + op match { + case "" | "-test" | "-flds" => + verifyFields(testDataFile) + case "-file" => + convertTimestamps(infiles) + case _ => + usage(s"op == $op") + } + } catch { + case t: Throwable => + showLimitedStack(t) + sys.exit(3) + } + } + + def verifyFields(p: Path): Unit = { + if (p.isFile) { + val rows = p.csvRows + printf("%d rows\n", rows.size) + hook += 1 + for ((row, i) <- rows.zipWithIndex) { + if (i > 0) { + // skip headings row, preserve file row numbers + val rawline = row.toList match { + case _ :: targetstr :: Nil => + targetstr + case targetstr :: Nil => + targetstr + case _ => + hook += 1 + "" + } + if (verbose) printf("%04d : %s\n", i, row.mkString("|")) + + val format = ChronoParse(rawline) + if (verbose) { + printf("rawlin: [%s]\n", rawline) + printf(" format: [%s]\n", format.toString) + } else { + printf("%-22s, \"%s\"\n", format.toString, rawline) + } + } + } + } + } + + def convertTimestamps(files: Seq[Path]): Seq[ChronoParse] = { + for { + p <- files + if p.isFile + (row, i) <- p.csvRows.zipWithIndex + if i > 0 // skip headings row + rawline = row.toList match { + case targetstr :: Nil => + targetstr // if 1 column, convert it + case _ :: targetstr :: Nil => + targetstr // if 2 columns, convert 2nd + case _ => + "" + } + if rawline.nonEmpty + dateflds = ChronoParse(rawline) + if dateflds.valid + } yield dateflds + } + + def parseArgs(args: Seq[String]): Unit = { + eachArg(args.toSeq, usage) { + case f if f.path.isFile => + assert(op.isEmpty, s"op[$op] but also specified file [$f]") + op = "-file" + infiles :+= f.path + case "-v" => + verbose = true + case "-df" => + monthFirst = false + case "-test" | "-flds" => + if (!testDataFile.isFile) { + usage(s"not found: ${testDataFile.posx}") + } else { + op = thisArg + } + case arg => + usage(s"unrecognized arg [$arg]") + } + } + + def toNum(str: String): Int = { + str match { + case n if n.matches("0\\d+") => + n.replaceAll("0+(.)", "$1").toInt + case n if (n.matches("\\d+")) => + n.toInt + case n if n.contains(".") => + Math.round(n.toDouble).toInt + case "-0" => 0 + case other => + sys.error(s"internal error A: toI($str)") + } +} + + // If month name present, convert to numeric equivalent. + // month-day order is also implied and must be captured. + // Return array of numbers plus month index (can be -1). + private[vastblue] def numerifyNames(_cleanFields: Array[String]): (Int, Seq[String]) = { + var cleanFields = _cleanFields + //val clean = datestr.replaceAll("(?i)(Sun[day]*|Mon[day]*|Tue[sday]*|Wed[nesday]*|Thu[rsday]*|Fri[day]*|Sat[urday]*),? *", "") + //val cleanFields: Array[String] = clean.split("[-/,\\s]+") + val clean = cleanFields.mkString(" ") + + val monthIndex = clean match { + case MonthNamesPattern(pre, monthName, post) => + val month: Int = TimeParser.monthAbbrev2Number(monthName) + //val numerified = s"$pre $month $post" + val midx = cleanFields.indexWhere( _.contains(monthName) ) +// val midx = cleanFields.indexWhere { (s: String) => s.matches("(?i).*[JFMASOND][aerpuco][nbrylgptvc][a-z]*.*") } + if (midx < 0) { + sys.error(s"internal error: failed to find index of month[$monthName] in [$clean]") + } + cleanFields(midx) = month.toString + midx + case _ => + -1 + } + (monthIndex, cleanFields.toIndexedSeq) + } + + def cleanPrep(rawdatetime: String): (Seq[String], String, String, Int, Int) = { + var monthIndex: Int = -1 + var yearIndex: Int = -1 + if (rawdatetime.endsWith(" 2020")) { + hook += 1 + } + val (datefields, timestring, timezone) = { + // toss weekday name, convert month name to number + val (cleandates: Array[String], cleantimes: Array[String], timezone: String) = { + val cleaned = rawdatetime. + replaceAll("([0-9])-", "$1/"). // remove all hyphens except minus signs + replaceAll("([a-zA-Z])([0-9])", "$1 $2"). // separate immediately adjacent numeric and alpha fields + replaceAll("([0-9])([a-zA-Z])", "$1 $2"). // ditto + replaceAll("([-])([AP]M)\\b", "$1 $2"). // separate hyphens from AM/PM fields (e.g., AM- or -AM) + replaceAll("(\\b[[AP]M])([-])", "$1 $2"). // ditto + replaceAll("([0-9])T([0-9])", "$1 $2"). // remove T separating date and time + // discard day-of-week + replaceAll("(?i)(Sun[day]*|Mon[day]*|Tue[sday]*|Wed[nesday]*|Thu[rsday]*|Fri[day]*|Sat[urday]*),? *", "") + + val splitRegex = if (cleaned.contains(":")) { + "[/,\\s]+" + } else { + "[-/,\\s]+" // also split on hyphens + } + val (dts, tms) = { + val ff = cleaned. + split(splitRegex). + filter { + case "-AM" | "AM" => false + case _ => true + } + ff.partition { + case s if s.contains(":") => + false // HH mm or ss + case s if s.matches("[-+][0-9]{4}") => + false // time zone + case s if s.matches("[.][0-9]+") => + false // decimal time field + case s if timeZoneCodes.contains(s) => + false + case _ => + true // date + } + } + val (times, zones) = tms.partition { _.contains(":") } + (dts, times, zones.mkString(" ")) + } + + val (_monthIndex, cleanFields) = numerifyNames(cleandates ++ cleantimes) + monthIndex = _monthIndex + + // separate into time and date, then handle each separately + val (datefields, timefields) = { + cleanFields.indexWhere(_.contains(":")) match { + case -1 => // no time fields + (cleanFields, Nil) + case num => + // TODO: date fields (e.g., year) can sometimes appear after time fields + var (dates, times) = cleanFields.splitAt(num) + val widefields: Seq[String] = times.filter { (s: String) => !s.startsWith("0") && !s.contains(":") && s.length == 4 } + if (widefields.nonEmpty) { + val yi: Int = widefields.indexWhere(_.startsWith("2")) + if (yi < 0) { + hook += 1 // unexpected? + } + val yy = widefields(yi) + if (yy.length == 4) { + // move year from times to dates + val yi = times.indexOf(yy) + dates = Seq(yy) ++ dates + yearIndex = 0 + times = { + val (a, b) = times.splitAt(yi) + a ++ b.drop(1) + } + } + } + (dates, times) + } + } + val timestring: String = timefields.mkString(" ") + + (datefields, timestring, timezone) + } + (datefields, timestring, timezone, monthIndex, yearIndex) + } + + /* + * ChronoParse constructor. + */ + def apply(_rawdatetime: String): ChronoParse = { + if (_rawdatetime.startsWith("10:")) { + hook += 1 + } + var valid = true + var confident = true + + val (datefields, timestring, timezone, _monthIndex, _yearIndex) = cleanPrep(_rawdatetime) + + var (monthIndex: Int, yearIndex: Int) = (_monthIndex, _yearIndex) + +// def timestring = s"$timestring $timezone" + val rawdatetime = s"${datefields.mkString(" ")} ${timestring} $timezone".trim + + var _datenumstrings: IndexedSeq[String] = Nil.toIndexedSeq + if (datefields.nonEmpty) { + setDatenums( + datefields.mkString(" ").replaceAll("\\D+", " ").trim.split(" +").toIndexedSeq + ) + } + + def setDatenums(newval: Seq[String]): Unit = { + if (newval.isEmpty) { + hook += 1 + } + val bad = newval.exists{ (s: String) => + s.trim.isEmpty || !s.matches("[0-9]+") + } + if (bad){ + hook += 1 + } + _datenumstrings = newval.toIndexedSeq + } + def datenumstrings = _datenumstrings + + def swapDayAndMonth(dayIndex: Int, monthIndex: Int, monthStr: String, numstrings: IndexedSeq[String]): IndexedSeq[String] = { + val maxIndex = numstrings.length - 1 + assert(dayIndex >= 0 && monthIndex >= 0 && dayIndex <= maxIndex && monthIndex <= maxIndex) + val day = numstrings(dayIndex) + var newnumstrings = numstrings.updated(monthIndex, day) + newnumstrings = newnumstrings.updated(dayIndex, monthStr) + newnumstrings + } + + val widenums = datenumstrings.filter { _.length >= 4 } + widenums.toList match { + case Nil => // no wide num fields + case year :: _ if year.length == 4 => + yearIndex = datenumstrings.indexOf(year) + if (yearIndex > 3) { + val (left, rite) = datenumstrings.splitAt(yearIndex) + val newnumstrings = Seq(year) ++ left ++ rite.drop(1) + setDatenums(newnumstrings.toIndexedSeq) + } + hook += 1 + case ymd :: _ => + hook += 1 // maybe 20240213 or similar + var (y, m, d) = ("", "", "") + if (ymd.startsWith("2") && ymd.length == 8) { + // assume yyyy/mm/dd + y = ymd.take(4) + m = ymd.drop(4).take(2) + d = ymd.drop(6) + } else if (ymd.drop(4).matches("2[0-9]{3}") ){ + if (monthFirst) { + // assume mm/dd/yyyy + m = ymd.take(2) + d = ymd.drop(2).take(2) + y = ymd.drop(4) + } else { + // assume dd/mm/yyyy + d = ymd.take(2) + m = ymd.drop(2).take(2) + y = ymd.drop(4) + } + } + val newymd = Seq(y, m, d) + val newnumstrings: Seq[String] = { + val head: String = datenumstrings.head + if (head == ymd) { + val rite: Seq[String] = datenumstrings.tail + val (mid: Seq[String], tail: Seq[String]) = rite.splitAt(1) + val hrmin: String = mid.mkString + if (hrmin.matches("[0-9]{3,4}")) { + val (hr: String, min: String) = hrmin.splitAt(hrmin.length-2) + val hour = if (hr.length == 1) { + s"0$hr" + } else { + hr + } + val lef: Seq[String] = newymd + val mid: Seq[String] = Seq(hour, min) + newymd ++ mid ++ tail + } else { + newymd ++ rite + } + } else { + val i = datenumstrings.indexOf(ymd) + val (left, rite) = datenumstrings.splitAt(i) + val newhrmin = rite.drop(1) + left ++ newymd ++ newhrmin + } + } + setDatenums(newnumstrings.toIndexedSeq) + } + if (monthIndex < 0) { + assert(yearIndex <= 2, s"year index > 2: $yearIndex") // TODO : wrap this as a return value + (yearIndex, monthFirst) match { + case (-1, _) => // not enough info + case (0, true) => // y-m-d + monthIndex = 1 + case (0, false) => // y-d-m + monthIndex = 2 + case (2, true) => // m-d-y + monthIndex = 0 + case (2, false) => // d-m-y + monthIndex = 1 + case (1, true) => // m-y-d // ambiguous! + confident = false + monthIndex = 0 + case (1, false) => // d-y-m // ambiguous! + confident = false + monthIndex = 2 + case _ => + hook += 1 // TODO + } + } + def centuryPrefix(year: Int = now.getYear): String = { + century(year).toString.take(2) + } + def century(y: Int = now.getYear): Int = { + (y - y % 100) + } + (monthIndex, yearIndex) match { + case (_, -1) => + datenumstrings.take(3) match { + case Seq(m: String, d: String, y:String) if m.length <= 2 & d.length <= 2 && y.length <= 2 => + if (monthFirst) { + val fullyear = s"${centuryPrefix()}$y" + setDatenums(datenumstrings.updated(2, fullyear)) + } + case _ => + // TODO verify this cannot happen (year index not initialized yet, so previous case is complete) + hook += 1 + } + + case (-1, _) => + hook += 1 + case (0, 2) | (1, 0) => // m-d-y | y-m-d (month precedes day) + val month = toNum(datenumstrings(monthIndex)) + if (!monthFirst && month <= 12) { + val dayIndex = monthIndex + 1 + // swap month and day, if preferred and possible + val day = datenumstrings(dayIndex) + var newnums = datenumstrings.updated(monthIndex, day) + newnums = newnums.updated(dayIndex, month.toString) + setDatenums(newnums) + } + case (1, 2) => // d-m-y + val month = toNum(datenumstrings(monthIndex)) + if (monthFirst && month <= 12) { + // swap month and day, if preferred and possible + val dayIndex = monthIndex - 1 + val swapped = swapDayAndMonth(dayIndex, monthIndex, month.toString, datenumstrings) + setDatenums(swapped) + // swap month and day, if preferred and possible +// val day = datenumstrings(dayIndex) +// setDatenums(datenumstrings.updated(monthIndex, day)) +// setDatenums(datenumstrings.updated(dayIndex, month.toString)) + } + case (m, y) => // d-m-y + hook += 1 // TODO + } + if (monthIndex >= 0) { + val month = toNum(datenumstrings(monthIndex)) + if (!monthFirst && month <= 12) { + // swap month and day, if preferred and possible + val day = datenumstrings(monthIndex + 1) + var newnums = datenumstrings.updated(monthIndex, day) + newnums = newnums.updated(monthIndex+1, month.toString) + setDatenums(newnums) + } + } + //var nums: Array[Int] = datefields.map { (s: String) => toI(s) } + var nums: Seq[Int] = datenumstrings.map { (numstr: String) => + if (!numstr.matches("[0-9]+")) { + hook += 1 + } + toNum(numstr) + } + + val timeOnly: Boolean = datenumstrings.size <= 4 && rawdatetime.matches("[0-9]{2}:[0-9]{2}.*") + if ( !timeOnly ) { + def adjustYear(year: Int): Unit = { + nums = nums.take(2) ++ Seq(year) ++ nums.drop(3) + val newnums = nums.map { _.toString } + setDatenums(newnums) + } + val dateFields = nums.take(3) + dateFields match { + case Seq(a, b, c) if a > 31 || b > 31 || c > 31 => + hook += 1 // the typical case where 4-digit year is provided + case Seq(a, b) => + // the problem case; assume no year provided + adjustYear(now.getYear) // no year provided, use current year + case Seq(mOrD, dOrM, relyear) => + // the problem case; assume M/d/y or d/M/y format + val y = now.getYear + val century = y - y % 100 + adjustYear(century + relyear) + case _ => + hook += 1 // huh? + } + } + + val fields: Seq[(String, Int)] = datenumstrings.zipWithIndex + var (yval, mval, dval) = (0, 0, 0) + val farr = fields.toArray + var formats: Array[String] = farr.map { (s: String, i: Int) => + if (i < 3 && !timeOnly) { + toNum(s) match { + case y if y > 31 || s.length == 4 => + yval = y + s.replaceAll(".", "y") + case d if d > 12 && s.length <= 2 => + dval = d + s.replaceAll(".", "d") + case _ => // can't resolve month without more context + s + } + } else { + i match { + case 3 => s.replaceAll(".", "H") + case 4 => s.replaceAll(".", "m") + case 5 => s.replaceAll(".", "s") + case 6 => s.replaceAll(".", "Z") + case _ => + s // not expecting any more numeric fields + } + } + } + def indexOf(s: String): Int = { + formats.indexWhere((fld: String) => + fld.startsWith(s) + ) + } + def numIndex: Int = { + formats.indexWhere((s: String) => s.matches("[0-9]+")) + } + def setFirstNum(s: String): Int = { + val i = numIndex + val numval = formats(i) + val numfmt = numval.replaceAll("[0-9]", s) + formats(i) = numfmt + toNum(numval) + } + // if two yyyy-MM-dd fields already fixed, the third is implied + formats.take(3).map { _.distinct }.sorted match { + case Array(_, "M", "y") => dval = setFirstNum("d") + case Array(_, "d", "y") => mval = setFirstNum("M") + case Array(_, "M", "d") => yval = setFirstNum("y") + case _arr => + hook += 1 // more than one numeric fields, so not ready to resolve + } + hook += 1 + def is(s: String, v: String): Boolean = s.startsWith(v) + + val yidx = indexOf("y") + val didx = indexOf("d") + val midx = indexOf("M") + + def hasY = yidx >= 0 + def hasM = midx >= 0 + def hasD = didx >= 0 + def needsY = yidx < 0 + def needsM = midx < 0 + def needsD = didx < 0 + + def replaceFirstNumericField(s: String): Unit = { + val i = numIndex + if (i < 0) { + hook += 1 // no numerics found + } else { + assert(i >= 0 && i < 3, s"internal error: $_rawdatetime [i: $i, s: $s]") + s match { + case "y" => + assert(yval == 0, s"yval: $yval") + yval = toNum(formats(i)) + case "M" => + assert(mval == 0, s"mval: $mval") + mval = toNum(formats(i)) + case "d" => + if (dval > 0) { + hook += 1 + } + assert(dval == 0, s"dval: $dval") + dval = toNum(formats(i)) + case _ => + sys.error(s"internal error: bad format indicator [$s]") + } + setFirstNum(s) + } + } + + val needs = Seq(needsY, needsM, needsD) + (needsY, needsM, needsD) match { + case (false, false, true) => + replaceFirstNumericField("d") + case (false, true, false) => + replaceFirstNumericField("M") + case (true, false, false) => + replaceFirstNumericField("y") + + case (false, true, true) => + // has year, needs month and day + yidx match { + case 1 => + // might as well support bizarre formats (M-y-d or d-M-y) + if (monthFirst) { + replaceFirstNumericField("M") + replaceFirstNumericField("d") + } else { + replaceFirstNumericField("d") + replaceFirstNumericField("M") + } + case 0 | 2 => + // y-M-d + if (monthFirst) { + replaceFirstNumericField("M") + replaceFirstNumericField("d") + } else { + replaceFirstNumericField("d") + replaceFirstNumericField("M") + } + + } + case (true, true, false) => + // has day, needs month and year + didx match { + case 0 => + // d-M-y + replaceFirstNumericField("M") + replaceFirstNumericField("y") + case 2 => + // y-M-d + replaceFirstNumericField("y") + replaceFirstNumericField("M") + case 1 => + // AMBIGUOUS ... + if (monthFirst) { + // M-d-y + replaceFirstNumericField("d") + replaceFirstNumericField("M") + } else { + // d-M-y + replaceFirstNumericField("M") + replaceFirstNumericField("d") + } + } + case (false, false, false) => + hook += 1 // done + case (true, true, true) if timeOnly => + hook += 1 // done + case (yy, mm, dd) => + formats.toList match { + case a :: b :: Nil => + val (ta, tb) = (toNum(a), toNum(b)) + // interpret as missing day or missing year + // missing day if either field is > 31 + if (monthFirst && ta <= 12) { + mval = ta + dval = tb + } else { + mval = tb + dval = tb + } + if (mval > 31) { + // assume day is missing + yval = mval + mval = dval + dval = 1 // convention + } else if (dval > 31) { + // assume day is missing + yval = dval + dval = 1 // convention + } else { + if (mval > 12) { + // the above swap might make this superfluous + // swap month and day + val temp = mval + mval = dval + dval = temp + } + yval = now.getYear // supply missing year + } + // TODO: reorder based on legal field values, if appropriate + formats = Array("yyyy", "MM", "dd") + setDatenums(IndexedSeq(yval, mval, dval).map { _.toString }) + case _ => + if (datenumstrings.nonEmpty) { + sys.error(s"yy[$yy], mm[$mm], dd[$dd] datetime[$_rawdatetime], formats[${formats.mkString("|")}]") + } + } + } + if (datenumstrings.endsWith("2019") ){ + hook += 1 + } + + val bareformats = formats.map { _.distinct }.toList + nums = { + val tdnums = (datenumstrings ++ timestring.split("[+: ]+")) + tdnums.filter { _.trim.nonEmpty }.map { toNum(_) } + } + def ymd(iy: Int, im: Int, id: Int, tail: List[String]): LocalDateTime = { + if (iy <0 || im <0 || id <0) { + hook += 1 + } else if (nums.size < 3) { + hook += 1 + } + val standardOrder = List(nums(iy), nums(im), nums(id)) ++ nums.drop(3) + yyyyMMddHHmmssToDate(standardOrder) + } + val dateTime: LocalDateTime = bareformats match { + case "d" :: "M" :: "y" :: tail => ymd(2,1,0, tail) + case "M" :: "d" :: "y" :: tail => ymd(2,0,1, tail) + case "d" :: "y" :: "M" :: tail => ymd(1,2,0, tail) + case "M" :: "y" :: "d" :: tail => ymd(1,0,2, tail) + case "y" :: "d" :: "M" :: tail => ymd(0,2,1, tail) + case "y" :: "M" :: "d" :: tail => ymd(0,1,2, tail) + case other => + valid = false + BadDate + } + new ChronoParse(dateTime, _rawdatetime, timezone, formats.toSeq, valid) + } + + def yyyyMMddHHmmssToDate(so: List[Int]): LocalDateTime = { + so.take(7) match { + case yr :: mo :: dy :: hr :: mn :: sc :: nano :: Nil => + LocalDateTime.of(yr, mo, dy, hr, mn, sc, nano) + case yr :: mo :: dy :: hr :: mn :: sc :: Nil => + if (sc > 59 || mn > 59 || hr > 59) { + hook += 1 + } + LocalDateTime.of(yr, mo, dy, hr, mn, sc) + case yr :: mo :: dy :: hr :: mn :: Nil => + LocalDateTime.of(yr, mo, dy, hr, mn, 0) + case yr :: mo :: dy :: hr :: Nil => + if (hr > 23) { + hook += 1 + } + LocalDateTime.of(yr, mo, dy, hr, 0, 0) + case yr :: mo :: dy :: Nil => + if (mo > 12) { + hook += 1 + } + LocalDateTime.of(yr, mo, dy, 0, 0, 0) + case other => + sys.error(s"not enough date-time fields: [${so.mkString("|")}]") + } + } + lazy val timeZoneCodes = Set( + "ACDT", // Australian Central Daylight Saving Time UTC+10:30 + "ACST", // Australian Central Standard Time UTC+09:30 + "ACT", // Acre Time UTC−05 + "ACT", // ASEAN Common Time (proposed) UTC+08:00 + "ACWST", // Australian Central Western Standard Time (unofficial) UTC+08:45 + "ADT", // Atlantic Daylight Time UTC−03 + "AEDT", // Australian Eastern Daylight Saving Time UTC+11 + "AEST", // Australian Eastern Standard Time UTC+10 + "AET", // Australian Eastern Time UTC+10 / UTC+11 + "AEST", // Australian Eastern Time UTC+10 / UTC+11, + "AEDT", // Australian Eastern Time UTC+10 / UTC+11 + "AFT", // Afghanistan Time UTC+04:30 + "AKDT", // Alaska Daylight Time UTC−08 + "AKST", // Alaska Standard Time UTC−09 + "ALMT", // Alma-Ata Time[1] UTC+06 + "AMST", // Amazon Summer Time (Brazil)[2] UTC−03 + "AMT", // Amazon Time (Brazil)[3] UTC−04 + "AMT", // Armenia Time UTC+04 + "ANAT", // Anadyr Time[4] UTC+12 + "AQTT", // Aqtobe Time[5] UTC+05 + "ART", // Argentina Time UTC−03 + "AST", // Arabia Standard Time UTC+03 + "AST", // Atlantic Standard Time UTC−04 + "AWST", // Australian Western Standard Time UTC+08 + "AZOST", // Azores Summer Time UTC±00 + "AZOT", // Azores Standard Time UTC−01 + "AZT", // Azerbaijan Time UTC+04 + "BNT", // Brunei Time UTC+08 + "BIOT", // British Indian Ocean Time UTC+06 + "BIT", // Baker Island Time UTC−12 + "BOT", // Bolivia Time UTC−04 + "BRST", // Brasília Summer Time UTC−02 + "BRT", // Brasília Time UTC−03 + "BST", // Bangladesh Standard Time UTC+06 + "BST", // Bougainville Standard Time[6] UTC+11 + "BST", // British Summer Time (British Standard Time from Mar 1968 to Oct 1971) UTC+01 + "BTT", // Bhutan Time UTC+06 + "CAT", // Central Africa Time UTC+02 + "CCT", // Cocos Islands Time UTC+06:30 + "CDT", // Central Daylight Time (North America) UTC−05 + "CDT", // Cuba Daylight Time[7] UTC−04 + "CEST", // Central European Summer Time UTC+02 + "CET", // Central European Time UTC+01 + "CHADT", // Chatham Daylight Time UTC+13:45 + "CHAST", // Chatham Standard Time UTC+12:45 + "CHOT", // Choibalsan Standard Time UTC+08 + "CHOST", // Choibalsan Summer Time UTC+09 + "CHST", // Chamorro Standard Time UTC+10 + "CHUT", // Chuuk Time UTC+10 + "CIST", // Clipperton Island Standard Time UTC−08 + "CKT", // Cook Island Time UTC−10 + "CLST", // Chile Summer Time UTC−03 + "CLT", // Chile Standard Time UTC−04 + "COST", // Colombia Summer Time UTC−04 + "COT", // Colombia Time UTC−05 + "CST", // Central Standard Time (North America) UTC−06 + "CST", // China Standard Time UTC+08 + "CST", // Cuba Standard Time UTC−05 + "CT", + "CST", + "CDT", // Central Time UTC−06 / UTC−05 + "CVT", // Cape Verde Time UTC−01 + "CWST", // Central Western Standard Time (Australia) unofficial UTC+08:45 + "CXT", // Christmas Island Time UTC+07 + "DAVT", // Davis Time UTC+07 + "DDUT", // Dumont d'Urville Time UTC+10 + "DFT", // AIX-specific equivalent of Central European Time[NB 1] UTC+01 + "EASST", // Easter Island Summer Time UTC−05 + "EAST", // Easter Island Standard Time UTC−06 + "EAT", // East Africa Time UTC+03 + "ECT", // Eastern Caribbean Time (does not recognise DST) UTC−04 + "ECT", // Ecuador Time UTC−05 + "EDT", // Eastern Daylight Time (North America) UTC−04 + "EEST", // Eastern European Summer Time UTC+03 + "EET", // Eastern European Time UTC+02 + "EGST", // Eastern Greenland Summer Time UTC±00 + "EGT", // Eastern Greenland Time UTC−01 + "EST", // Eastern Standard Time (North America) UTC−05 + "ET", + "EST", + "EDT", // Eastern Time (North America) UTC−05 / UTC−04 + "FET", // Further-eastern European Time UTC+03 + "FJT", // Fiji Time UTC+12 + "FKST", // Falkland Islands Summer Time UTC−03 + "FKT", // Falkland Islands Time UTC−04 + "FNT", // Fernando de Noronha Time UTC−02 + "GALT", // Galápagos Time UTC−06 + "GAMT", // Gambier Islands Time UTC−09 + "GET", // Georgia Standard Time UTC+04 + "GFT", // French Guiana Time UTC−03 + "GILT", // Gilbert Island Time UTC+12 + "GIT", // Gambier Island Time UTC−09 + "GMT", // Greenwich Mean Time UTC±00 + "GST", // South Georgia and the South Sandwich Islands Time UTC−02 + "GST", // Gulf Standard Time UTC+04 + "GYT", // Guyana Time UTC−04 + "HDT", // Hawaii–Aleutian Daylight Time UTC−09 + "HAEC", // Heure Avancée d'Europe Centrale French-language name for CEST UTC+02 + "HST", // Hawaii–Aleutian Standard Time UTC−10 + "HKT", // Hong Kong Time UTC+08 + "HMT", // Heard and McDonald Islands Time UTC+05 + "HOVST", // Hovd Summer Time (not used from 2017-present) UTC+08 + "HOVT", // Hovd Time UTC+07 + "ICT", // Indochina Time UTC+07 + "IDLW", // International Date Line West time zone UTC−12 + "IDT", // Israel Daylight Time UTC+03 + "IOT", // Indian Ocean Time UTC+03 + "IRDT", // Iran Daylight Time UTC+04:30 + "IRKT", // Irkutsk Time UTC+08 + "IRST", // Iran Standard Time UTC+03:30 + "IST", // Indian Standard Time UTC+05:30 + "IST", // Irish Standard Time[8] UTC+01 + "IST", // Israel Standard Time UTC+02 + "JST", // Japan Standard Time UTC+09 + "KALT", // Kaliningrad Time UTC+02 + "KGT", // Kyrgyzstan Time UTC+06 + "KOST", // Kosrae Time UTC+11 + "KRAT", // Krasnoyarsk Time UTC+07 + "KST", // Korea Standard Time UTC+09 + "LHST", // Lord Howe Standard Time UTC+10:30 + "LHST", // Lord Howe Summer Time UTC+11 + "LINT", // Line Islands Time UTC+14 + "MAGT", // Magadan Time UTC+12 + "MART", // Marquesas Islands Time UTC−09:30 + "MAWT", // Mawson Station Time UTC+05 + "MDT", // Mountain Daylight Time (North America) UTC−06 + "MET", // Middle European Time (same zone as CET) UTC+01 + "MEST", // Middle European Summer Time (same zone as CEST) UTC+02 + "MHT", // Marshall Islands Time UTC+12 + "MIST", // Macquarie Island Station Time UTC+11 + "MIT", // Marquesas Islands Time UTC−09:30 + "MMT", // Myanmar Standard Time UTC+06:30 + "MSK", // Moscow Time UTC+03 + "MST", // Malaysia Standard Time UTC+08 + "MST", // Mountain Standard Time (North America) UTC−07 + "MUT", // Mauritius Time UTC+04 + "MVT", // Maldives Time UTC+05 + "MYT", // Malaysia Time UTC+08 + "NCT", // New Caledonia Time UTC+11 + "NDT", // Newfoundland Daylight Time UTC−02:30 + "NFT", // Norfolk Island Time UTC+11 + "NOVT", // Novosibirsk Time [9] UTC+07 + "NPT", // Nepal Time UTC+05:45 + "NST", // Newfoundland Standard Time UTC−03:30 + "NT", // Newfoundland Time UTC−03:30 + "NUT", // Niue Time UTC−11 + "NZDT", // New Zealand Daylight Time UTC+13 + "NZST", // New Zealand Standard Time UTC+12 + "OMST", // Omsk Time UTC+06 + "ORAT", // Oral Time UTC+05 + "PDT", // Pacific Daylight Time (North America) UTC−07 + "PET", // Peru Time UTC−05 + "PETT", // Kamchatka Time UTC+12 + "PGT", // Papua New Guinea Time UTC+10 + "PHOT", // Phoenix Island Time UTC+13 + "PHT", // Philippine Time UTC+08 + "PHST", // Philippine Standard Time UTC+08 + "PKT", // Pakistan Standard Time UTC+05 + "PMDT", // Saint Pierre and Miquelon Daylight Time UTC−02 + "PMST", // Saint Pierre and Miquelon Standard Time UTC−03 + "PONT", // Pohnpei Standard Time UTC+11 + "PST", // Pacific Standard Time (North America) UTC−08 + "PWT", // Palau Time[10] UTC+09 + "PYST", // Paraguay Summer Time[11] UTC−03 + "PYT", // Paraguay Time[12] UTC−04 + "RET", // Réunion Time UTC+04 + "ROTT", // Rothera Research Station Time UTC−03 + "SAKT", // Sakhalin Island Time UTC+11 + "SAMT", // Samara Time UTC+04 + "SAST", // South African Standard Time UTC+02 + "SBT", // Solomon Islands Time UTC+11 + "SCT", // Seychelles Time UTC+04 + "SDT", // Samoa Daylight Time UTC−10 + "SGT", // Singapore Time UTC+08 + "SLST", // Sri Lanka Standard Time UTC+05:30 + "SRET", // Srednekolymsk Time UTC+11 + "SRT", // Suriname Time UTC−03 + "SST", // Samoa Standard Time UTC−11 + "SST", // Singapore Standard Time UTC+08 + "SYOT", // Showa Station Time UTC+03 + "TAHT", // Tahiti Time UTC−10 + "THA", // Thailand Standard Time UTC+07 + "TFT", // French Southern and Antarctic Time[13] UTC+05 + "TJT", // Tajikistan Time UTC+05 + "TKT", // Tokelau Time UTC+13 + "TLT", // Timor Leste Time UTC+09 + "TMT", // Turkmenistan Time UTC+05 + "TRT", // Turkey Time UTC+03 + "TOT", // Tonga Time UTC+13 + "TST", // Taiwan Standard Time UTC+08 + "TVT", // Tuvalu Time UTC+12 + "ULAST", // Ulaanbaatar Summer Time UTC+09 + "ULAT", // Ulaanbaatar Standard Time UTC+08 + "UTC", // Coordinated Universal Time UTC±00 + "UYST", // Uruguay Summer Time UTC−02 + "UYT", // Uruguay Standard Time UTC−03 + "UZT", // Uzbekistan Time UTC+05 + "VET", // Venezuelan Standard Time UTC−04 + "VLAT", // Vladivostok Time UTC+10 + "VOLT", // Volgograd Time UTC+03 + "VOST", // Vostok Station Time UTC+06 + "VUT", // Vanuatu Time UTC+11 + "WAKT", // Wake Island Time UTC+12 + "WAST", // West Africa Summer Time UTC+02 + "WAT", // West Africa Time UTC+01 + "WEST", // Western European Summer Time UTC+01 + "WET", // Western European Time UTC±00 + "WIB", // Western Indonesian Time UTC+07 + "WIT", // Eastern Indonesian Time UTC+09 + "WITA", // Central Indonesia Time UTC+08 + "WGST", // West Greenland Summer Time[14] UTC−02 + "WGT", // West Greenland Time[15] UTC−03 + "WST", // Western Standard Time UTC+08 + "YAKT", // Yakutsk Time UTC+09 + "YEKT", // Yekaterinburg Time + ) +} + + +// TODO: use timezone info, including CST, etc +case class ChronoParse(dateTime: LocalDateTime, rawdatetime: String, timezone: String, formats: Seq[String], valid: Boolean) { + import vastblue.time.TimeDate.* + override def toString: String = dateTime.toString("yyyy-MM-dd HH:mm:ss") +} diff --git a/src/main/scala/vastblue/time/TimeDate.scala b/src/main/scala/vastblue/time/TimeDate.scala index bf22630..a2e36bf 100644 --- a/src/main/scala/vastblue/time/TimeDate.scala +++ b/src/main/scala/vastblue/time/TimeDate.scala @@ -1,9 +1,10 @@ package vastblue.time import vastblue.pallet.* -import vastblue.time.ParsDate +import vastblue.time.TimeParser +import vastblue.time.ChronoParse -import java.time.{ZoneId} +import java.time.ZoneId import java.time.format.* import io.github.chronoscala.Imports.* @@ -11,14 +12,14 @@ import java.time.temporal.{ChronoField, TemporalAdjuster, TemporalAdjusters} import scala.util.matching.Regex object TimeDate extends vastblue.time.TimeExtensions { - def zoneid = ZoneId.systemDefault - def zoneOffset = zoneid.getRules().getStandardOffset(now.toInstant()) + private[vastblue] def zoneid = ZoneId.systemDefault + private[vastblue] def zoneOffset = zoneid.getRules().getStandardOffset(now.toInstant()) type DateTimeZone = java.time.ZoneId type DateTime = LocalDateTime val DateTime = LocalDateTime - def parseLocalDate(_datestr: String, offset: Int = 0): DateTime = { + private[vastblue] def parseLocalDate(_datestr: String, offset: Int = 0): DateTime = { dateParser(_datestr, offset) // .toLocalDate } @@ -36,33 +37,39 @@ object TimeDate extends vastblue.time.TimeExtensions { lazy val standardTimestampFormat = datetimeFmt6 + lazy val datetimeFmt9 = "yyyy-MM-dd HH:mm:ss [-+][0-9]{4}" lazy val datetimeFmt8 = "yyyy-MM-dd HH:mm:ss-ss:S" lazy val datetimeFmt7 = "yyyy-MM-dd HH:mm:ss.S" lazy val datetimeFmt6 = "yyyy-MM-dd HH:mm:ss" // date-time-format lazy val datetimeFmt6B = "dd-MM-yyyy HH:mm:ss" // day first! lazy val datetimeFmt6C = "MM-dd-yyyy HH:mm:ss" // month first + lazy val datetimeFmt6D = "M-dd-yyyy HH:mm:ss" // month first lazy val datetimeFmt5 = "yyyy-MM-dd HH:mm" // 12-hour format lazy val datetimeFmt5b = "yyyy-MM-dd kk:mm" // 24-hour format lazy val dateonlyFmt = "yyyy-MM-dd" // date-only-format + lazy val dateonlyFmtB = "MM-dd-yyyy" // month-first date-only-format + lazy val datetimeFormatter9: DateTimeFormatter = dateTimeFormatPattern(datetimeFmt9) lazy val datetimeFormatter8: DateTimeFormatter = dateTimeFormatPattern(datetimeFmt8) lazy val datetimeFormatter7: DateTimeFormatter = dateTimeFormatPattern(datetimeFmt7) lazy val datetimeFormatter6: DateTimeFormatter = dateTimeFormatPattern(datetimeFmt6) lazy val datetimeFormatter6B: DateTimeFormatter = dateTimeFormatPattern(datetimeFmt6B) lazy val datetimeFormatter6C: DateTimeFormatter = dateTimeFormatPattern(datetimeFmt6C) + lazy val datetimeFormatter6D: DateTimeFormatter = dateTimeFormatPattern(datetimeFmt6D) lazy val datetimeFormatter5: DateTimeFormatter = dateTimeFormatPattern(datetimeFmt5) lazy val datetimeFormatter5b: DateTimeFormatter = dateTimeFormatPattern(datetimeFmt5b) lazy val dateonlyFormatter: DateTimeFormatter = dateTimeFormatPattern(dateonlyFmt) + lazy val dateonlyFormatterB: DateTimeFormatter = dateTimeFormatPattern(dateonlyFmtB) lazy val EasternTime: ZoneId = java.time.ZoneId.of("America/New_York") lazy val MountainTime: ZoneId = java.time.ZoneId.of("America/Denver") lazy val UTC: ZoneId = java.time.ZoneId.of("UTC") - def LastDayAdjuster: TemporalAdjuster = TemporalAdjusters.lastDayOfMonth() + private[vastblue] def LastDayAdjuster: TemporalAdjuster = TemporalAdjusters.lastDayOfMonth() // ============================== - def dateTimeFormatPattern(fmt: String, zone: ZoneId = ZoneId.systemDefault()): DateTimeFormatter = { + private[vastblue] def dateTimeFormatPattern(fmt: String, zone: ZoneId = ZoneId.systemDefault()): DateTimeFormatter = { val dtf1 = DateTimeFormatter.ofPattern(fmt).withZone(zone) val dtf = if (fmt.length <= "yyyy-mm-dd".length) { import java.time.temporal.ChronoField @@ -185,7 +192,7 @@ object TimeDate extends vastblue.time.TimeExtensions { ageInDays(new java.io.File(fname)) } - def parse(str: String, format: String): DateTime = { + private[vastblue] def parse(str: String, format: String): DateTime = { if (timeDebug) System.err.print("parse(str=[%s], format=[%s]\n".format(str, format)) if (format.length <= "yyyy-mm-dd".length) { DateTime.parse(str, dateTimeFormatPattern(format)) @@ -194,7 +201,7 @@ object TimeDate extends vastblue.time.TimeExtensions { } } - /** The new parser does not depend on ParsDate */ + /** The new parser does not depend on TimeParser */ def parseDateNew(_datestr: String, format: String = ""): DateTime = { // format: off val datestr = _datestr. @@ -231,9 +238,19 @@ object TimeDate extends vastblue.time.TimeExtensions { lazy val ThreeIntegerFields3 = """(\d{1,2})\D(\d{1,2})\D(\d{2,4})""".r lazy val ThreeIntegerFields2 = """(\d{2,2})\D(\d{1,2})\D(\d{1,2})""".r - // TODO: this doesn't depend on joda time anymore (and should not have been named this way) - def dateParser(_inputdatestr: String, offset: Int = 0): DateTime = { - val _datestr = _inputdatestr.trim.replaceAll("\"", "").replaceAll(" [-+][0-9]{4}$", "") + def dateParser(inpDateStr: String, offset: Int = 0): DateTime = { + val flds = vastblue.time.ChronoParse(inpDateStr) + flds.dateTime // might be BadDate! + } + private[vastblue] def _dateParser(inpDateStr: String, offset: Int = 0): DateTime = { + if (inpDateStr.startsWith("31/05/2009")) { + hook += 1 + } + if (inpDateStr.contains("-07")) { + hook += 1 + } + val _datestr = inpDateStr.trim.replaceAll("\"", "").replaceAll(" [-+][0-9]{4}$", "") + val zonestr: String = inpDateStr.drop(_datestr.length) if (_datestr.isEmpty) { BadDate } else { @@ -275,7 +292,6 @@ object TimeDate extends vastblue.time.TimeExtensions { // next, treat yyyyMMdd (8 digits, no field separators) if (_datestr.matches("""2\d{7}""")) { new RichString(_datestr.replaceAll("(....)(..)(..)", "$1-$2-$3")).toDateTime - } else if (_datestr.matches("""\d{2}\D\d{2}\D\d{2}""")) { // MM-dd-yy val fixed = _datestr.split("\\D").toList match { @@ -289,17 +305,26 @@ object TimeDate extends vastblue.time.TimeExtensions { DateTime.parse(fixed, datetimeFormatter6) } else if (_datestr.matches("""2\d{3}\D\d{2}\D\d{2}\.\d{4}""")) { // yyyy-MM-dd.HHMM - val fixed = _datestr.replaceAll("""(....)\D(..)\D(..)\.(\d\d)(\d\d)""", "$1-$2-$3 $4:$5:00") DateTime.parse(fixed, datetimeFormatter6) } else { val datestr = _datestr.replaceAll("/", "-") try { - parseDateString(datestr.replaceAll(" [-+][0-9]{4}$", "")) + val fixed = _datestr. + replaceAll(" [-+][0-9]{4}$", ""). + replaceAll("([0-9])([A-Z])", "$1 $2"). + replaceAll("([a-z])([0-9])", "$1 $2") + parseDateString(fixed) } catch { + case r: RuntimeException if r.getMessage.toLowerCase.contains("bad date format") => + if (TimeParser.debug) System.err.printf("e[%s]\n", r.getMessage) + BadDate + case p: DateTimeParseException => + if (TimeParser.debug) System.err.printf("e[%s]\n", p.getMessage) + BadDate case e: Exception => - if (ParsDate.debug) System.err.printf("e[%s]\n", e.getMessage) - val mdate: ParsDate = ParsDate.parseDate(datestr).getOrElse(ParsDate.BadParsDate) + if (TimeParser.debug) System.err.printf("e[%s]\n", e.getMessage) + val mdate: TimeParser = TimeParser.parseDate(datestr).getOrElse(TimeParser.BadParsDate) // val timestamp = new DateTime(mdate.getEpoch) val standardFormat = mdate.toString(standardTimestampFormat) val timestamp = standardFormat.toDateTime @@ -332,7 +357,7 @@ object TimeDate extends vastblue.time.TimeExtensions { def getDaysElapsed(datestr1: String, datestr2: String): Long = { getDaysElapsed(dateParser(datestr1), dateParser(datestr2)) } - def selectZonedFormat(_datestr: String): java.time.format.DateTimeFormatter = { + private[vastblue] def selectZonedFormat(_datestr: String): java.time.format.DateTimeFormatter = { val datestr = _datestr.replaceAll("/", "-") val numfields = datestr.split("\\D+") numfields.length <= 3 match { @@ -340,7 +365,7 @@ object TimeDate extends vastblue.time.TimeExtensions { case false => datetimeFormatter6 } } - def ti(s: String): Int = { + private[vastblue] def ti(s: String): Int = { s match { case n if n.matches("0\\d+") => n.replaceAll("0+(.)", "$1").toInt @@ -349,28 +374,50 @@ object TimeDate extends vastblue.time.TimeExtensions { } } def numerifyNames(datestr: String) = { - val noweekdayName = datestr.replaceAll("(Sun[day]*|Mon[day]*|Tue[sday]*|Wed[nesday]*|Thu[rsday]*|Fri[day]*|Sat[urday]*),? *", "") + val noweekdayName = datestr.replaceAll("(?i)(Sun[day]*|Mon[day]*|Tue[sday]*|Wed[nesday]*|Thu[rsday]*|Fri[day]*|Sat[urday]*),? *", "") +// val nomonthName = datestr.replaceAll("(?i)(Jan[ury]*|Feb[ruay]*|Mar[ch]*|Apr[il]*|May|Jun[e]*|Jul[y]*|Aug[st]*|Sep[tmbr]*|Oct[ober]*|Nov[embr]*|Dec[mbr]*),? *", "") +// if (noweekdayName != datestr || nomonthName != datestr){ +// hook += 1 +// } noweekdayName match { case str if str.matches("(?i).*[JFMASOND][aerpuco][nbrylgptvc][a-z]*.*") => - var ff = str.split("[,\\s]+") - if (ff(0).matches("\\d+")) { - // swap 1st and 2nd fields (e.g., convert "01 Jan" to "Jan 01") - val tmp = ff(0) - ff(0) = ff(1) - ff(1) = tmp - } - val month = ParsDate.monthAbbrev2Number(ff.head.take(3)) - ff = ff.drop(1) - // format: off - val (day, year, timestr, tz) = ff.toList match { - case d :: y :: Nil => (d.toInt, y.toInt, "", "") - case d :: y :: ts :: tz :: Nil if ts.contains(":") => (d.toInt, y.toInt, " "+ts, " "+tz) - case d :: ts :: y :: tail if ts.contains(":") => (d.toInt, y.toInt, " "+ts, "") - case d :: y :: ts :: tail => (d.toInt, y.toInt, " "+ts, "") - case other => sys.error(s"bad date [$other]") + var ff = str.replaceFirst("([a-zA-Z])([0-9])", "$1 $2").split("[-/,\\s]+") + val monthIndex = ff.indexWhere {(s: String) => s.matches("(?i).*[JFMASOND][aerpuco][nbrylgptvc][a-z]*.*")} + if (monthIndex >= 0){ + val monthName = ff(monthIndex) + val month: Int = TimeParser.monthAbbrev2Number(ff(monthIndex)) + val nwn = noweekdayName.replaceAll(monthName, "%02d ".format(month)) + nwn + } else { + // format: off + if (ff(0).matches("\\d+")) { + // swap 1st and 2nd fields (e.g., convert "01 Jan" to "Jan 01") + val tmp = ff(0) + ff(0) = ff(1) + ff(1) = tmp + } + val mstr = ff.head.take(3) + if (!mstr.toLowerCase.matches("[a-z]{3}")) { + hook += 1 + } + val month = TimeParser.monthAbbrev2Number(mstr) + ff = ff.drop(1) + // format: off + val (day, year, timestr, tz) = ff.toList match { + case d :: y :: Nil => + (d.toInt, y.toInt, "", "") + case d :: y :: ts :: tz :: Nil if ts.contains(":") => + (d.toInt, y.toInt, " "+ts, " "+tz) + case d :: ts :: y :: tail if ts.contains(":") => + (d.toInt, y.toInt, " "+ts, "") + case d :: y :: ts :: tail => + (d.toInt, y.toInt, " "+ts, "") + case other => + sys.error(s"bad date [$other]") + } + // format: on + "%4d-%02d-%02d%s%s".format(year, month, day, timestr, tz) } - // format: on - "%4d-%02d-%02d%s%s".format(year, month, day, timestr, tz) case str => str } @@ -378,18 +425,22 @@ object TimeDate extends vastblue.time.TimeExtensions { lazy val mmddyyyyPattern: Regex = """(\d{1,2})\D(\d{1,2})\D(\d{4})""".r lazy val mmddyyyyTimePattern: Regex = """(\d{1,2})\D(\d{1,2})\D(\d{4})(\D\d\d:\d\d(:\d\d)?)""".r - lazy val mmddyyyyTimePattern2: Regex = """(\d{1,2})\D(\d{1,2})\D(\d{4})\D(\d\d): (\d\d)""".r - lazy val mmddyyyyTimePattern3: Regex = """(\d{1,2})\D(\d{1,2})\D(\d{4})\D(\d\d): (\d\d): (\d\d)""".r - lazy val mmddyyyyTimePattern3tz: Regex = """(\d{1,2})\D(\d{1,2})\D(\d{4})\D(\d\d): (\d\d): (\d\d)\D(-?[0-9]{4})""".r + lazy val mmddyyyyTimePattern2: Regex = """(\d{1,2})\D(\d{1,2})\D(\d{4})\D(\d\d):(\d\d)""".r + lazy val mmddyyyyTimePattern3: Regex = """(\d{1,2})\D(\d{1,2})\D(\d{4})\D(\d\d):(\d\d):(\d\d)""".r + lazy val mmddyyyyTimePattern3tz: Regex = """(\d{1,2})\D(\d{1,2})\D(\d{4})\D(\d\d):(\d\d):(\d\d)\D(-?[0-9]{4})""".r lazy val yyyymmddPattern: Regex = """(\d{4})\D(\d{1,2})\D(\d{1,2})""".r lazy val yyyymmddPatternWithTime: Regex = """(\d{4})\D(\d{1,2})\D(\d{1,2})(\D.+)""".r - lazy val yyyymmddPatternWithTime2: Regex = """(\d{4})\D(\d{1,2})\D(\d{1,2}) +(\d{2}): (\d{2})""".r - lazy val yyyymmddPatternWithTime3: Regex = """(\d{4})\D(\d{1,2})\D(\d{1,2})\D(\d{2}): (\d{2}): (\d{2})""".r + lazy val yyyymmddPatternWithTime2: Regex = """(\d{4})\D(\d{1,2})\D(\d{1,2})\D+(\d{2}):(\d{2})""".r + lazy val yyyymmddPatternWithTime3: Regex = """(\d{4})\D(\d{1,2})\D(\d{1,2})\D+(\d{2}):(\d{2}):(\d{2})""".r + lazy val mmddyyyyPatternWithTime3: Regex = """(\d{1,2})\D(\d{1,2})\D(\d{4})\D+(\d{2}):(\d{2}):(\d{2})""".r lazy val validYearPattern = """(1|2)\d{3}""" // only consider years between 1000 and 2999 // format: off - def parseDateString(_datestr: String): LocalDateTime = { + private[vastblue] def parseDateString(_datestr: String): LocalDateTime = { + if (_datestr.startsWith("31")) { + hook += 1 + } var datestr = _datestr. replaceAll("/", "-"). replaceAll("#", ""). @@ -406,10 +457,21 @@ object TimeDate extends vastblue.time.TimeExtensions { "%s-%02d-%02d %02d:%02d".format(y, ti(m), h, min) case mmddyyyyTimePattern3(m, d, y, h, min, s) if m.toInt <= 12 && y.matches(validYearPattern) => - "%s-%02d-%02d %02d:%02d:02d".format(y, ti(m), h, min, s) + "%s-%02d-%02d %02d:%02d:02d".format(y, ti(m), ti(d), h, min, s) case mmddyyyyTimePattern3tz(m, d, y, h, min, s, tz) if y.matches(validYearPattern) => - "%s-%02d-%02d %02d:%02d:02d %s".format(y, ti(m), h, min, s) + "%s-%02d-%02d %02d:%02d:02d %s".format(y, ti(m), ti(d), h, min, s) + + case mmddyyyyPatternWithTime3(dm, md, y, h, min, s) if y.matches(validYearPattern) => + val Seq(tyr, tmd, tdm, th, tmin, ts) = Seq(y, md, dm, h, min, s).map ( ti(_) ) + val dstr = if (tdm > 12) { + // dm is day, md is month + "%04d-%02d-%04d %02d:%02d:%02d %d".format(tyr, tmd, tdm, th, tmin, ts) + } else { + // md is day, dm is month + "%04d-%02d-%04d %02d:%02d:%02d %d".format(tyr, tdm, tmd, th, tmin, ts) + } + dstr case yyyymmddPattern(y, m, d) if y.matches(validYearPattern) => "%s-%02d-%02d".format(y, ti(m), ti(d)) @@ -435,45 +497,56 @@ object TimeDate extends vastblue.time.TimeExtensions { val withNums = numerifyNames(other) withNums } - - val numfields = datestr.split("\\D+").map { _.trim }.filter { _.nonEmpty }.map { _.toInt} + val numstrings = datestr.split("\\D+").map { _.trim }.filter { _.nonEmpty } + val numfields = numstrings.map { _.toInt} + val numWidths = numstrings.map { _.length } numfields.length match { - case 1 => - val dstr = if (datestr.startsWith("2")) { - // e.g., 20220330 - datestr.replaceAll("(\\d{4})(\\d{2})(\\d{2})", "$1-$2-$3") - } else if (datestr.drop(4).startsWith("2")) { - // e.g., 03302022 - datestr.replaceAll("(\\d{2})(\\d{2})(\\d{4})", "$3-$1-$2") - } else { - sys.error(s"bad date format [$datestr]") - } - val fmtr = datetimeFormatter6 - DateTime.parse(s"${dstr} 00:00:00", fmtr) - case 3 => - val fmtr = datetimeFormatter6 - DateTime.parse(s"${datestr} 00:00:00", fmtr) - case 5 => - if (numfields(3) <= 12) { - DateTime.parse(datestr, datetimeFormatter5) - } else { - DateTime.parse(datestr, datetimeFormatter5b) - } - case 6 => - if (numfields(0) < 1000) { - if (numfields(0) > 12) { - DateTime.parse(datestr, datetimeFormatter6B) + case 1 => + val dstr = if (datestr.startsWith("2")) { + // e.g., 20220330 + datestr.replaceAll("(\\d{4})(\\d{2})(\\d{2})", "$1-$2-$3") + } else if (datestr.drop(4).startsWith("2")) { + // e.g., 03302022 + datestr.replaceAll("(\\d{2})(\\d{2})(\\d{4})", "$3-$1-$2") } else { - DateTime.parse(datestr, datetimeFormatter6C) + sys.error(s"bad date format [$datestr]") } - } else { + val fmtr = datetimeFormatter6 + DateTime.parse(s"${dstr} 00:00:00", fmtr) + case 3 => + datestr = datestr.replaceAll("\\D+", "-") + val fmtr = if (numWidths.mkString == "224") { + datetimeFormatter6B + } else { + datetimeFormatter6 + } + DateTime.parse(s"$datestr 00:00:00", fmtr) + case 5 => + if (numfields(3) <= 12) { + DateTime.parse(datestr, datetimeFormatter5) + } else { + DateTime.parse(datestr, datetimeFormatter5b) + } + case 6 => + if (numfields(2) > 1000) { + // partial ambiguity elimination + if (numfields(0) > 12) { + DateTime.parse(datestr, datetimeFormatter6B) + } else { + if (numWidths(0) > 1) { + DateTime.parse(datestr, datetimeFormatter6C) + } else { + DateTime.parse(datestr, datetimeFormatter6D) + } + } + } else { + DateTime.parse(datestr, datetimeFormatter6) + } + case 7 => + DateTime.parse(datestr, datetimeFormatter7) + case _ => + // System.err.printf("%d datetime fields: [%s] [%s]\n".format(numfields.size, numfields.mkString("|"), datestr)) DateTime.parse(datestr, datetimeFormatter6) - } - case 7 => - DateTime.parse(datestr, datetimeFormatter7) - case _ => - // System.err.printf("%d datetime fields: [%s] [%s]\n".format(numfields.size, numfields.mkString("|"), datestr)) - DateTime.parse(datestr, datetimeFormatter6) } } // format: on diff --git a/src/main/scala-3/vastblue/time/TimeExtensions.scala b/src/main/scala/vastblue/time/TimeExtensions.scala similarity index 100% rename from src/main/scala-3/vastblue/time/TimeExtensions.scala rename to src/main/scala/vastblue/time/TimeExtensions.scala diff --git a/src/main/scala/vastblue/time/ParsDate.scala b/src/main/scala/vastblue/time/TimeParser.scala similarity index 87% rename from src/main/scala/vastblue/time/ParsDate.scala rename to src/main/scala/vastblue/time/TimeParser.scala index ee71929..2d87161 100644 --- a/src/main/scala/vastblue/time/ParsDate.scala +++ b/src/main/scala/vastblue/time/TimeParser.scala @@ -2,7 +2,7 @@ package vastblue.time /** * This is useful for converting between a wide variety - * of Date and Time Strings and the ParsDate class. + * of Date and Time Strings and the TimeParser class. */ import vastblue.pallet.* import java.io.{File => JFile} @@ -16,7 +16,7 @@ import scala.collection.immutable.* import scala.util.matching.Regex import scala.util.control.Breaks.* -object ParsDate { +object TimeParser { var verbose = ".verbose".path.isFile var debug: Boolean = ".debug".path.isFile var yearFirstFlag = true @@ -86,7 +86,7 @@ object ParsDate { def setFormat(fmt: DateFormat): Unit = { outfmt = fmt } def setFormat(fmt: String): Unit = { outfmt = new SimpleDateFormat(fmt) } - def extractDateFromText(rawline: String): Option[ParsDate] = { + def extractDateFromText(rawline: String): Option[TimeParser] = { // debug: test before toLowerCase val text = rawline.replaceAll("""[^-a-zA-Z:/_0-9\s]+""", " ").replaceAll("""\s+""", " ").trim if (debug) { @@ -105,7 +105,7 @@ object ParsDate { val result = if (lc.contains("@")) { // ignore email if (verbose) printf("ignore email[%s]\n", text) - None // Some(ParsDate("2019:01:01")) + None // Some(TimeParser("2019:01:01")) } else if (lc.matches(""".*\bapprov.*""")) { if (verbose) printf("ignore approval of minutes [%s]\n", text) None @@ -129,7 +129,7 @@ object ParsDate { case YYMMddDensePattern(yy, mm, dd) if okYMD(yy, mm, dd) => Some(normalizedMdate(yy, mm, dd)) case _ => - None // Some(ParsDate("2019:02:02")) + None // Some(TimeParser("2019:02:02")) } } result @@ -154,11 +154,11 @@ object ParsDate { val valid = (num >= 1 && num <= 12) (num, valid) } - def normalizedMdate(yy: String, mm: String, dd: String): ParsDate = { + def normalizedMdate(yy: String, mm: String, dd: String): TimeParser = { val (y, m, d) = numericFields(yy, mm, dd) val normalized = "%04d/%02d/%02d".format(y, m, d) if (verbose) printf("normalized: [%s]\n", normalized) - ParsDate(normalized) + TimeParser(normalized) } lazy val YYMMddDensePattern: Regex = """.*(\b2[01]\d{2})(\d{2})(\d{2})\b.*""".r lazy val LcMonthPattern: Regex = (s"(?i)${MonthPattern.toString}").r @@ -202,16 +202,16 @@ object ParsDate { "November", "December", ) - lazy val weekdayNames: List[String] = List( - "Sunday", - "Monday", - "Tuesday", - "Wednesday", - "Thursday", - "Friday", - "Saturday", - ) - lazy val monthAbbreviationsLowerCase: List[String] = monthNames.map { _.toLowerCase.substring(0, 3) } +// lazy val weekdayNames: List[String] = List( +// "Sunday", +// "Monday", +// "Tuesday", +// "Wednesday", +// "Thursday", +// "Friday", +// "Saturday", +// ) +// lazy val monthAbbreviationsLowerCase: List[String] = monthNames.map { _.toLowerCase.substring(0, 3) } def indexedLetters(idx: Int, list: List[String]): String = { new String({ var uniqChars = List[Char]() @@ -230,19 +230,19 @@ object ParsDate { s"[$cc0][$cc1][$cc2][\\.\\w]*" } - def apply(): ParsDate = { - ParsDate(new Date) + def apply(): TimeParser = { + TimeParser(new Date) } - def apply(time: Long): ParsDate = { - new ParsDate(time) + def apply(time: Long): TimeParser = { + new TimeParser(time) } - def apply(date: Date): ParsDate = { - ParsDate(date.getTime) + def apply(date: Date): TimeParser = { + TimeParser(date.getTime) } - def apply(tupleDate: ((Int, Int, Int), (Int, Int, Int))): ParsDate = { + def apply(tupleDate: ((Int, Int, Int), (Int, Int, Int))): TimeParser = { val (date, time) = tupleDate val (yy, mm, day) = date val (hr, mn, sec) = time @@ -251,17 +251,17 @@ object ParsDate { lazy val BadParsDate = apply(-1L) - def apply(datestr: String): ParsDate = { + def apply(datestr: String): TimeParser = { parseDate(datestr).getOrElse(BadParsDate) } - def apply(yy: Int, mm: Int, dd: Int): ParsDate = { + def apply(yy: Int, mm: Int, dd: Int): TimeParser = { // Calendar month is zero-based val cal = java.util.Calendar.getInstance cal.set(yy, mm - 1, dd) apply(cal.getTime) } - def apply(date: Any): ParsDate = { + def apply(date: Any): TimeParser = { date match { case tt: Long => apply(tt) case tt: Date => apply(tt) @@ -290,7 +290,6 @@ object ParsDate { s"$number" } - lazy val MonthNamePattern: Regex = """(?i)(.*)\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\b(.*)""".r def monthAbbrev2Number(name: String): Int = { name.toLowerCase.substring(0, 3) match { @@ -306,6 +305,9 @@ object ParsDate { case "oct" => 10 case "nov" => 11 case "dec" => 12 + case _ => + hook += 1 + -1 } } def monthName2Number(rawname: String): Int = { @@ -318,6 +320,7 @@ object ParsDate { result } + lazy val MonthNamePattern: Regex = """(?i)(.*)\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\b(.*)""".r lazy val QuadrixBackIssuesFilenameFormat: Regex = """(?i)([jfmasond][aepuco][nbrylgptvc])\D?(\d{1,2})\D(\d{1,4})""".r def toNumericFormat(dateStrRaw: String): String = { // "Wed Apr 08 18:17:08 2009" @@ -363,7 +366,17 @@ object ParsDate { "" // junk } } - def reorderYearFirst(ff: Array[String]): Array[String] = { + def reorderYearFirst(_ff: Array[String]): Array[String] = { + var ff = _ff + _ff.indexWhere((s: String) => s.length >= 4 && !s.contains(":") && !s.startsWith("0")) match { + case -1 => + // mostly + case yidx => + val yr = _ff(yidx) + val residue = _ff.take(yidx) ++ ff.drop(yidx+1) + ff = (Seq(yr) ++ residue).toArray + } + if (ff(2).length == 4) { // reorder first 3 from mm/dd/yyyy to yyyy/mm/dd def zpad(s: String): String = { @@ -425,8 +438,8 @@ object ParsDate { (dateStr, zone) } - def parseDate(date: Date): ParsDate = ParsDate(date) - def parseDate(date: ParsDate): ParsDate = ParsDate(date.getTime) + def parseDate(date: Date): TimeParser = TimeParser(date) + def parseDate(date: TimeParser): TimeParser = TimeParser(date.getTime) def prepDateString(str: String): (Boolean, String, String) = { if (str.contains(":") && str.matches(".* 2[0-9]{3}")) { @@ -486,7 +499,7 @@ object ParsDate { def isDate(text: String): Boolean = { try { if (text.matches(""".*\d\d.*""")) { - ParsDate(text) + TimeParser(text) true } else { // a date string requires at least 2 consecutive digits (the year) @@ -501,10 +514,10 @@ object ParsDate { lazy val legalCharacters: Set[Char] = "0123456789-:/ abcdefghijklmnopqrstuvwxyz.,+()".toSet // time zones can be in parentheses - def tryFormat(dateStr: String, fmt: SimpleDateFormat): Option[ParsDate] = { + def tryFormat(dateStr: String, fmt: SimpleDateFormat): Option[TimeParser] = { try { val dt = fmt.parse(dateStr) - val pdate = ParsDate(dt) + val pdate = TimeParser(dt) Some(pdate) } catch { case ee: Exception => @@ -515,7 +528,7 @@ object ParsDate { /** * Parse date String. */ - def parseDate(rawdate: String): Option[ParsDate] = { + def parseDate(rawdate: String): Option[TimeParser] = { if (rawdate.startsWith("08/04/")) { hook += 1 } @@ -529,11 +542,11 @@ object ParsDate { // standardize format to use hyphenated y-m-d rather than y/m/d val (yearFirst, dateStr, zone) = prepDateString(rawdate) - var parsOpt: Option[ParsDate] = None + var parsOpt: Option[TimeParser] = None val dateFormats = relevantFormats(dateStr, yearFirst) dateFormats.find { testfmt => - tryFormat(dateStr, testfmt).foreach { (pd: ParsDate) => + tryFormat(dateStr, testfmt).foreach { (pd: TimeParser) => parsOpt = Some(pd) // save most recent successful format (try it on first attempt next time) currentFormat = testfmt @@ -586,17 +599,17 @@ object ParsDate { * Guess date format. * TODO: currently unable to parse "January 12, 1972" !!!! */ - def guessFormat(date: String): ParsDate = { + def guessFormat(date: String): TimeParser = { val dateStr = date val sf = selfFormat(date) val fmt = simpleFormat(sf) try { val dt = fmt.parse(date) - ParsDate(dt) + TimeParser(dt) } catch { case _: Exception => if (debug) eprintf("failed self-format: [%s] : [%s]\n", date, fmt) - var mdate: ParsDate = null + var mdate: TimeParser = null // year is easy var onthefly = date.replaceFirst("""\d\d\d\d""", "yyyy") @@ -640,7 +653,7 @@ object ParsDate { } try { val testFormat = new SimpleDateFormat(onthefly) - mdate = ParsDate(testFormat.parse(dateStr)) + mdate = TimeParser(testFormat.parse(dateStr)) currentFormat = testFormat // successful yearFirstFlag = onthefly.startsWith("yyyy") if (!newFormats.contains(onthefly)) { @@ -663,7 +676,7 @@ object ParsDate { val list = if (punctuationMap.contains(punct)) { punctuationMap(punct).sortBy { sdf => -sdf.toPattern.length } // longest patterns first } else { - if (ParsDate.debug || ParsDate.verbose) { + if (TimeParser.debug || TimeParser.verbose) { eprintf("no date format for punct[%s], literalDate[%s]\n", punct, literalDate) } List[SimpleDateFormat]() @@ -748,7 +761,7 @@ object ParsDate { def simpleFormat(fmt: String): SimpleDateFormat = new SimpleDateFormat(fmt, Locale.US) /// ============================================================== former object Main - def parse(line: String, zeroTime: Boolean = false): ParsDate = { + def parse(line: String, zeroTime: Boolean = false): TimeParser = { val simplified = line.replaceAll("""[\s\(\),]+""", " ").trim simplified match { @@ -757,14 +770,14 @@ object ParsDate { val tm = if (zeroTime) "00:00:00" else time val stdfmt = "%4d/%02d/%02d %s".format(yyyy, mm, dd, tm) // eprintf("dystr:[%s], moName:[%s], yr:[%s], time:[%s], tz:[%s], stdfmt[%s]".format(dystr, moName, yr, time, tz, stdfmt)) - ParsDate(stdfmt) + TimeParser(stdfmt) case DateRegex_02(dummy @ _, dystr, moName, yr, time, tz @ _) => val (yyyy, mm, dd) = getNumbers(yr, moName, dystr) val tm = if (zeroTime) "00:00:00" else time val stdfmt = "%4d/%02d/%02d %s".format(yyyy, mm, dd, tm) // eprintf("dystr:[%s], moName:[%s], yr:[%s], time:[%s], tz:[%s], stdfmt[%s] (%s)".format(dystr, moName, yr, time, tz, stdfmt, dummy)) - ParsDate(stdfmt) + TimeParser(stdfmt) case other => sys.error(s"unparseable date:[$other]") @@ -816,35 +829,35 @@ object ParsDate { def ymdDate: String => DateTime = quikDate // alias } -class ParsDate(msec: Long) extends Ordered[ParsDate] { - import ParsDate.* +class TimeParser(msec: Long) extends Ordered[TimeParser] { + import TimeParser.* var zone: String = "" - var outfmt: DateFormat = ParsDate.outfmt // inherit the current default + var outfmt: DateFormat = TimeParser.outfmt // inherit the current default // return self, to permit this usage: date.dateOnly.toString - def dateAndTime: ParsDate = { - outfmt = dateTimeFormat - this - } +// def dateAndTime: TimeParser = { +// outfmt = dateTimeFormat +// this +// } // TODO: this sets global mode for default printing of date format - def dateOnly: ParsDate = { - outfmt = dateOnlyFormat - this - } +// def dateOnly: TimeParser = { +// outfmt = dateOnlyFormat +// this +// } private val cal = java.util.Calendar.getInstance cal.setTimeInMillis(msec) private val date = cal.getTime - val stringValue: String = dateTimeFormat.format(date).replaceAll("""\s+00:00:00""", "") +// val stringValue: String = dateTimeFormat.format(date).replaceAll("""\s+00:00:00""", "") - def toTuple: ((Int, Int, Int), (Int, Int, Int)) = { - val date = (getYear, getMonth, getDay) - val time = (getHour, getMinute, getSecond) - (date, time) - } +// def toTuple: ((Int, Int, Int), (Int, Int, Int)) = { +// val date = (getYear, getMonth, getDay) +// val time = (getHour, getMinute, getSecond) +// (date, time) +// } def getTime: Long = date.getTime @@ -858,20 +871,20 @@ class ParsDate(msec: Long) extends Ordered[ParsDate] { tcal.setTimeInMillis(getEpoch) tcal } -// def compareTo(that: ParsDate): Int = { +// def compareTo(that: TimeParser): Int = { // if (this < that) -1 // else if (this > that) 1 // else 0 // } -// def compare(x: ParsDate, y: ParsDate) = x compareTo y - def compare(that: ParsDate): Int = this compareTo that +// def compare(x: TimeParser, y: TimeParser) = x compareTo y + def compare(that: TimeParser): Int = this compareTo that - def isLeapYear: Boolean = gcal.isLeapYear(getYear) +// def isLeapYear: Boolean = gcal.isLeapYear(getYear) -// def < (other: ParsDate): Boolean = { getTime < other.getTime } -// def <= (other: ParsDate): Boolean = { getTime <= other.getTime } -// def > (other: ParsDate): Boolean = { getTime > other.getTime } -// def >= (other: ParsDate): Boolean = { getTime >= other.getTime } +// def < (other: TimeParser): Boolean = { getTime < other.getTime } +// def <= (other: TimeParser): Boolean = { getTime <= other.getTime } +// def > (other: TimeParser): Boolean = { getTime > other.getTime } +// def >= (other: TimeParser): Boolean = { getTime >= other.getTime } override def toString: String = { outfmt.format(date).replaceAll("""\s+00:00:00""", "") @@ -882,26 +895,26 @@ class ParsDate(msec: Long) extends Ordered[ParsDate] { df.format(date) } - def nextDay: ParsDate = addDays(1) + def nextDay: TimeParser = addDays(1) - def addMilliseconds(milliseconds: Int): ParsDate = ParsDate(getEpoch + milliseconds) + def addMilliseconds(milliseconds: Int): TimeParser = TimeParser(getEpoch + milliseconds) - def addSeconds(seconds: Int): ParsDate = addMilliseconds(seconds * 1000) - def addMinutes(minutes: Int): ParsDate = addSeconds(minutes * 60) - def addHours(hours: Int): ParsDate = addMinutes(hours * 60) + def addSeconds(seconds: Int): TimeParser = addMilliseconds(seconds * 1000) + def addMinutes(minutes: Int): TimeParser = addSeconds(minutes * 60) +// def addHours(hours: Int): TimeParser = addMinutes(hours * 60) - def between(a: ParsDate, b: ParsDate): Boolean = { + def between(a: TimeParser, b: TimeParser): Boolean = { assert(a <= b) this >= a && this <= b } - def addDays(days: Int): ParsDate = { + def addDays(days: Int): TimeParser = { val tcal = copyCalendar() tcal.add(Calendar.DAY_OF_YEAR, days) - ParsDate(tcal.getTime) + TimeParser(tcal.getTime) } // time elapsed since previousTime - def elapsedMilliSeconds(previousTime: ParsDate): Long = { + def elapsedMilliSeconds(previousTime: TimeParser): Long = { val t0: Long = previousTime.getTime val t1: Long = this.getTime if (t0 > t1) { @@ -910,25 +923,25 @@ class ParsDate(msec: Long) extends Ordered[ParsDate] { t1 - t0 } } - def elapsedSeconds(previousTime: ParsDate): Long = { + def elapsedSeconds(previousTime: TimeParser): Long = { elapsedMilliSeconds(previousTime) / 1000 } - def elapsedMinutes(previousTime: ParsDate): BigDecimal = { + def elapsedMinutes(previousTime: TimeParser): BigDecimal = { BigDecimal(elapsedSeconds(previousTime) / 60.0) } - def elapsedHours(previousTime: ParsDate): BigDecimal = { + def elapsedHours(previousTime: TimeParser): BigDecimal = { elapsedMinutes(previousTime) / 60.0 } - def elapsedDays(previousTime: ParsDate): BigDecimal = { + def elapsedDays(previousTime: TimeParser): BigDecimal = { elapsedHours(previousTime) / 24.0 } // duration methods that return approximate answers lazy val averageMonthSize: Double = (31 + 28.25 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + 31) / 12.0 - def elapsedMonths(previousTime: ParsDate): BigDecimal = { + def elapsedMonths(previousTime: TimeParser): BigDecimal = { elapsedDays(previousTime) / averageMonthSize } - def elapsedYears(previousTime: ParsDate): BigDecimal = { + def elapsedYears(previousTime: TimeParser): BigDecimal = { elapsedDays(previousTime) / 365.25 } @@ -974,7 +987,7 @@ object LongIso { def main(args: Array[String]): Unit = { try { for (arg <- args) { - printf("%s\n", ParsDate(arg)) + printf("%s\n", TimeParser(arg)) } } catch { case ee: Exception => diff --git a/src/test/scala/vastblue/file/CsvTests.scala b/src/test/scala/vastblue/file/CsvTests.scala index e2bd02d..a65423d 100644 --- a/src/test/scala/vastblue/file/CsvTests.scala +++ b/src/test/scala/vastblue/file/CsvTests.scala @@ -13,7 +13,7 @@ class CsvTests extends AnyFunSpec with Matchers with BeforeAndAfter { val fnamestr = s"${TMP}/youMayDeleteThisDebrisCsvParser.csv" printf("csvTestFile[%s]\n", fnamestr) val path = Paths.get(fnamestr) - if (path.parentFile.isDirectory) { + if (path.parentPath.isDirectory) { path.withWriter() { w => // format: off w.print(s""" diff --git a/src/test/scala/vastblue/file/PathSpec.scala b/src/test/scala/vastblue/file/FilePathSpec.scala similarity index 93% rename from src/test/scala/vastblue/file/PathSpec.scala rename to src/test/scala/vastblue/file/FilePathSpec.scala index 504237e..e3141ef 100644 --- a/src/test/scala/vastblue/file/PathSpec.scala +++ b/src/test/scala/vastblue/file/FilePathSpec.scala @@ -8,7 +8,7 @@ import org.scalatest.* import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.should.Matchers -class PathSpec extends AnyFunSpec with Matchers with BeforeAndAfter { +class FilePathSpec extends AnyFunSpec with Matchers with BeforeAndAfter { lazy val verbose = Option(System.getenv("VERBOSE_TESTS")).nonEmpty var hook: Int = 0 @@ -242,6 +242,46 @@ class PathSpec extends AnyFunSpec with Matchers with BeforeAndAfter { } describe("Path") { + describe("# bare filename") { + it ("bare path segments are valid files") { + val s1 = Paths.get("s1") + val stdpath = s1.stdpath + assert(stdpath.startsWith("/")) + } + it ("bare filenames always have parent files") { + val s1 = Paths.get("s1") + val p1: Path = s1.getParentPath + val par = s1.toAbsolutePath.parent + assert(par != null) + } + } + describe("# getParentPath extension method") { + val windowsPaths: List[String] = if (isWindows) { + List( + "C:", + "C:/", + ) + } else { + Nil + } + val testPaths: List[String] = windowsPaths + ::: List( + ".", + "src", + "/", + "/bin", + ) + + for (pathstr <- testPaths.distinct) { + it(s"does not return null on $pathstr") { + val p = Paths.get(pathstr) + val par = p.getParentPath + eprintf("par [%s]\n", par.posx) + assert(par != null) + } + } + } + describe("# round trip consistency") { for (fname <- distinctKeys) { val f1: Path = Paths.get(fname) diff --git a/src/test/scala/vastblue/file/PathnameTest.scala b/src/test/scala/vastblue/file/PathnameTest.scala index c6cb8fe..0c04d8f 100644 --- a/src/test/scala/vastblue/file/PathnameTest.scala +++ b/src/test/scala/vastblue/file/PathnameTest.scala @@ -59,7 +59,7 @@ class PathnameTest extends AnyFunSpec with Matchers with BeforeAndAfter { for (testfilename <- testfilenames) { it(s"should correctly handle filename [$testfilename] ") { val testfile = vastblue.pallet.Paths.get(testfilename) - val testPossible = testfile.parentFile match { + val testPossible = testfile.parentPath match { case dir if dir.isDirectory => true case _ => diff --git a/src/test/scala/vastblue/time/ParsDateTests.scala b/src/test/scala/vastblue/time/ChronoParseTests.scala similarity index 95% rename from src/test/scala/vastblue/time/ParsDateTests.scala rename to src/test/scala/vastblue/time/ChronoParseTests.scala index a91a73a..f788330 100644 --- a/src/test/scala/vastblue/time/ParsDateTests.scala +++ b/src/test/scala/vastblue/time/ChronoParseTests.scala @@ -1,7 +1,7 @@ package vastblue.time -import vastblue.pallet.* -import vastblue.time.ParsDate +//import vastblue.pallet.* +import vastblue.time.TimeParser import vastblue.time.TimeDate.{parseDateTime => parseDate} import vastblue.time.TimeDate.* @@ -12,13 +12,13 @@ import TestDates.* import TestDates.hook -class ParsDateTests extends AnyFunSpec with Matchers { - // TODO: ParsDate(datestring: String, monthDayOrder: String = "MD") +class ChronoParseTests extends AnyFunSpec with Matchers { + // TODO: TimeParser(datestring: String, monthDayOrder: String = "MD") describe("parseDateTime") { for ((teststr, expected) <- TestDates.testDates) { it(s"should properly parse input timestamp ${teststr}") { - var pdDate = parseDate(teststr) + val pdDate = parseDate(teststr) val pds = pdDate.toString("yyyy/MM/dd") if (pds != expected) { hook += 1 @@ -28,14 +28,14 @@ class ParsDateTests extends AnyFunSpec with Matchers { } } - describe("ParsDate") { - for ((teststr, expected) <- TestDates.testStamps) { + describe("TimeParser") { + for ((teststr, expected) <- TestDates.dateStrings) { if (!badDates.contains(teststr)) { it(s"should properly parse input date string ${teststr}") { if (teststr.startsWith("08/04/2009")) { hook += 1 } - var pdDate = ParsDate(teststr) + val pdDate = TimeParser(teststr) val pds = pdDate.toString("yyyy/MM/dd") if (pds != expected) { hook += 1 @@ -47,7 +47,7 @@ class ParsDateTests extends AnyFunSpec with Matchers { for (teststr <- TestDates.testDates2) { if (!badDates.contains(teststr)) { it(s"should properly parse input timestamp ${teststr}") { - val pdDate: ParsDate = ParsDate(teststr) + val pdDate: TimeParser = TimeParser(teststr) val Array(ys, ms, ds) = pdDate.toString("yyyy/MM/dd").split("/") val y: Int = toInt(ys) @@ -88,13 +88,13 @@ object TestDates { ("31/05/2009 08:59:59 -0000", "2009/05/31"), // MM/dd/yyyy hh:mm:ss -0000 ("31/05/2009 02:20:13 -0700", "2009/05/31"), // MM/dd/yyyy hh:mm:ss -0700 ("2/11/2009 16:34:32 -0800", "2009/02/11"), // M/dd/yyyy HH:mm:ss -0800 - // ("04/08 18:17:08 2009", "2009/04/08"), // MM/dd HH:mm:ss yyyy + ("04/08 18:17:08 2009", "2009/04/08"), // MM/dd HH:mm:ss yyyy ("05/06/1993", "1993/05/06"), // MM/dd/yyyy ("2009/03/24 21:48:25.0", "2009/03/24"), // yyyy/MM:dd HH:mm:ss.S ("2009/03/30 22:10:03", "2009/03/30"), // yyyy/MM/dd HH:mm:ss ) - lazy val testStamps = List( + lazy val dateStrings = List( ("Fri Jan 10 2014 2:34:17 PM EST", "2014/01/10"), ("04/13/1992 11:59 PM", "1992/04/13"), ("04/13/1992 12:01 PM", "1992/04/13"), @@ -165,7 +165,7 @@ object TestDates { ("2009/03/31 22:48:00", "2009/03/31"), ("2009/03/31 22:49:15", "2009/03/31"), ("2009/03/31 23:13:10", "2009/03/31"), - //("04/08 18:17:08 2009", "2009/04/08"), + ("04/08 18:17:08 2009", "2009/04/08"), ("4/10/2009 6:52:34 PM", "2009/04/10"), ("4/10/2009 12:00:00 AM", "2009/04/10"), ("2009/04/20 04:36:03", "2009/04/20"),