Skip to content

Commit

Permalink
Merge with 8358b3eb6e5b5237ade42438836b869e6141a776
Browse files Browse the repository at this point in the history
  • Loading branch information
jzell committed Apr 2, 2013
2 parents b57e9ee + 3cd8946 commit ceaba2e
Show file tree
Hide file tree
Showing 75 changed files with 3,072 additions and 11 deletions.
95 changes: 95 additions & 0 deletions desc/annotator/StanfordPOSTaggerWrapper.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
<?xml version="1.0" encoding="UTF-8"?>
<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
<frameworkImplementation>org.apache.uima.java</frameworkImplementation>
<primitive>true</primitive>
<annotatorImplementationName>de.unihd.dbs.uima.annotator.stanfordtagger.StanfordPOSTaggerWrapper</annotatorImplementationName>
<analysisEngineMetaData>
<name>StanfordPOSTaggerWrapper</name>
<description/>
<version>1.0</version>
<vendor>UniHD DBS</vendor>
<configurationParameters searchStrategy="language_fallback">
<configurationParameter>
<name>model_path</name>
<description>Path to a model for use with the Stanford POS Tagger</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>true</mandatory>
</configurationParameter>
<configurationParameter>
<name>config_path</name>
<description>Optional path to a configuration for use with the Stanford POS Tagger</description>
<type>String</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>annotate_tokens</name>
<description>Whether or not to annotate tokens</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>annotate_sentences</name>
<description>Whether or not to annotate sentences.</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
<configurationParameter>
<name>annotate_partofspeech</name>
<description>Whether or not to annotate part of speech (POS) information</description>
<type>Boolean</type>
<multiValued>false</multiValued>
<mandatory>false</mandatory>
</configurationParameter>
</configurationParameters>
<configurationParameterSettings>
<nameValuePair>
<name>annotate_tokens</name>
<value>
<boolean>true</boolean>
</value>
</nameValuePair>
<nameValuePair>
<name>annotate_sentences</name>
<value>
<boolean>true</boolean>
</value>
</nameValuePair>
<nameValuePair>
<name>annotate_partofspeech</name>
<value>
<boolean>true</boolean>
</value>
</nameValuePair>
<nameValuePair>
<name>model_path</name>
<value>
<string>/opt/stanford-postagger-full/models/english-bidirectional-distsim.tagger</string>
</value>
</nameValuePair>
</configurationParameterSettings>
<typeSystemDescription>
<imports>
<import location="../type/HeidelTime_TypeSystem.xml"/>
</imports>
</typeSystemDescription>
<typePriorities/>
<fsIndexCollection/>
<capabilities>
<capability>
<inputs/>
<outputs/>
<languagesSupported/>
</capability>
</capabilities>
<operationalProperties>
<modifiesCas>true</modifiesCas>
<multipleDeploymentAllowed>true</multipleDeploymentAllowed>
<outputsNewCASes>false</outputsNewCASes>
</operationalProperties>
</analysisEngineMetaData>
<resourceManagerConfiguration/>
</analysisEngineDescription>
2 changes: 2 additions & 0 deletions metadata/setenv
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,7 @@ export CLASSPATH=$DKPRO_HOME/bin:$CLASSPATH
export CLASSPATH=$DKPRO_HOME/lib:$CLASSPATH
### setting the environment for jvntextpro (required for the wrapper)
export CLASSPATH=$JVNTEXTPRO_HOME:$CLASSPATH
### setting the environment for the stanford pos tagger (required for the wrapper)
export CLASSPATH=$STANFORDTAGGER:$CLASSPATH


Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// author: Jannik Strötgen
// email: [email protected]
// date: 2011-09-12
// This file contains "approximate words" and their normalized expressions
// for dates/times according to TIMEX3 format.
// For example, the normalized value of "about" is "APPROX"
// FORMAT: "approximate-word","normalized-approximate-word"
// about
"حوالي","APPROX"
"نحو","APPROX"
"تقريبا[ً]?","APPROX"
"حول","APPROX"S
"حتى","EQUAL_OR_LESS"
"على ال[اأ]قل","EQUAL_OR_MORE"
"[اأ]كثر من","MORE_THAN"
"[اأ]قل من","LESS_THAN"
"[اأ]طول من","MORE_THAN"
"[اأ]قصر من","LESS_THAN"
"في غضون","APPROX"
"قرابة","APPROX"
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// author: Jannik Strötgen
// email: [email protected]
// date: 2011-09-12
// This file contains "approximate words" and their normalized expressions
// for dates/times according to TIMEX3 format.
// For example, the normalized value of "about" is "APPROX"
// FORMAT: "approximate-word","normalized-approximate-word"
"حوالي","APPROX"
"نحو","APPROX"
"تقريبا[ً]?","APPROX"
"حول","APPROX"
"حتى","EQUAL_OR_LESS"
"على ال[اأ]قل","EQUAL_OR_MORE"
"[اأ]كثر من","MORE_THAN"
"[اأ]قل من","LESS_THAN"
"[اأ]طول من","MORE_THAN"
"[اأ]قصر من","LESS_THAN"
"في غضون","APPROX"
"قرابة","APPROX"

Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// author: Jannik Strötgen
// email: [email protected]
// date: 2011-06-10
// This file contains "duration numbers" and their normalized expressions
// according to TIMEX3 format.
// For example, the normalized value of "one" is "1"
// FORMAT: "duration-number","normalized-duration-number"
"\u0660","0"
"\u0661","1"
"\u0662","2"
"\u0663","3"
"\u0664","4"
"\u0665","5"
"\u0666","6"
"\u0667","7"
"\u0668","8"
"\u0669","9"
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
// author: Jannik Strötgen
// email: [email protected]
// date: 2011-06-10
// This file contains "duration numbers" and their normalized expressions
// according to TIMEX3 format.
// For example, the normalized value of "one" is "1"
// FORMAT: "duration-number","normalized-duration-number"
"واحد[هة]?","01"
"[اإ]ثن[ت]?[اي]ن","02"
"ثلاث[هة]?","03"
"[اأ]ربع[هة]?","04"
"خمس[هة]?","05"
"ست[هة]?","06"
"سبع[هة]?","07"
"ثماني[هة]?","08"
"تسع[هة]?","09"
"عشر[هة]?","10"
"[أاإ]حد[ى]?[\s]?عشر[هة]?","11"
"[اإ]ثن[ت]?[اي][\s]?عشر[هة]?","12"
"ثلاث[هة]?[\s]?عشر[هة]?","13"
"[أا]ربع[هة]?[\s]?عشر[هة]?","14"
"خمس[هة]?[\s]?عشر[هة]?","15"
"ست[هة]?[\s]?عشر[هة]?","16"
"سبع[هة]?[\s]?عشر[هة]?","17"
"ثماني[هة]?[\s]?عشر[هة]?","18"
"تسع[هة]?[\s]?عشر[هة]?","19"
"عشر[وي]ن","20"
"[اإ]حدى[\s]?و[\s]?عشر[وي]ن","21"
"واحد[\s]?و[\s]?عشر[وي]ن","21"
"[اإ]ثن[ت]?[اي][ن]?[\s]?و[\s]?عشر[وي]ن","22"
"ثلاث[هة]?[\s]?و[\s]?عشر[وي]ن","23"
"[أا]ربع[هة]?[\s]?و[\s]?عشر[وي]ن","24"
"خمس[هة]?[\s]?و[\s]?عشر[وي]ن","25"
"ست[هة]?[\s]?و[\s]?عشر[وي]ن","26"
"سبع[هة]?[\s]?و[\s]?عشر[وي]ن","27"
"ثماني[هة]?[\s]?و[\s]?عشر[وي]ن","28"
"تسع[هة]?[\s]?و[\s]?عشر[وي]ن","29"
"ثلاث[وي]ن","30"
"[اإ]حدى[\s]?و[\s]?ثلاث[وي]ن","31"
"واحد[\s]?و[\s]?ثلاث[وي]ن","31"
"[اإ]ثن[ت]?[اي][ن]?[\s]?و[\s]?ثلاث[وي]ن","32"
"ثلاث[هة]?[\s]?و[\s]?ثلاث[وي]ن","33"
"[أا]ربع[هة]?[\s]?و[\s]?ثلاث[وي]ن","34"
"خمس[هة]?[\s]?و[\s]?ثلاث[وي]ن","35"
"ست[هة]?[\s]?و[\s]?ثلاث[وي]ن","36"
"سبع[هة]?[\s]?و[\s]?ثلاث[وي]ن","37"
"ثماني[هة]?[\s]?و[\s]?ثلاث[وي]ن","38"
"تسع[هة]?[\s]?و[\s]?ثلاث[وي]ن","39"
"[اأ]ربع[وي]ن","40"
"[اإ]حدى[\s]?و[\s]?[اأ]ربع[وي]ن","41"
"واحد[\s]?و[\s]?[اأ]ربع[وي]ن","41"
"[اإ]ثن[ت]?[اي][ن]?[\s]?و[\s]?[اأ]ربع[وي]ن","42"
"ثلاث[هة]?[\s]?و[\s]?[اأ]ربع[وي]ن","43"
"[أا]ربع[هة]?[\s]?و[\s]?[اأ]ربع[وي]ن","44"
"خمس[هة]?[\s]?و[\s]?[اأ]ربع[وي]ن","45"
"ست[هة]?[\s]?و[\s]?[اأ]ربع[وي]ن","46"
"سبع[هة]?[\s]?و[\s]?[اأ]ربع[وي]ن","47"
"ثماني[هة]?[\s]?و[\s]?[اأ]ربع[وي]ن","48"
"تسع[هة]?[\s]?و[\s]?[اأ]ربع[وي]ن","49"
"خمس[وي]ن","50"
"[اإ]حدى[\s]?و[\s]?خمس[وي]ن","51"
"واحد[\s]?و[\s]?خمس[وي]ن","51"
"[اإ]ثن[ت]?[اي][ن]?[\s]?و[\s]?خمس[وي]ن","52"
"ثلاث[هة]?[\s]?و[\s]?خمس[وي]ن","53"
"[أا]ربع[هة]?[\s]?و[\s]?خمس[وي]ن","54"
"خمس[هة]?[\s]?و[\s]?خمس[وي]ن","55"
"ست[هة]?[\s]?و[\s]?خمس[وي]ن","56"
"سبع[هة]?[\s]?و[\s]?خمس[وي]ن","57"
"ثماني[هة]?[\s]?و[\s]?خمس[وي]ن","58"
"تسع[هة]?[\s]?و[\s]?خمس[وي]ن","59"
"ست[وي]ن","60"
"[اإ]حدى[\s]?و[\s]?ست[وي]ن","61"
"واحد[\s]?و[\s]?ست[وي]ن","61"
"[اإ]ثن[ت]?[اي][ن]?[\s]?و[\s]?ست[وي]ن","62"
"ثلاث[هة]?[\s]?و[\s]?ست[وي]ن","63"
"[أا]ربع[هة]?[\s]?و[\s]?ست[وي]ن","64"
"خمس[هة]?[\s]?و[\s]?ست[وي]ن","65"
"ست[هة]?[\s]?و[\s]?ست[وي]ن","66"
"سبع[هة]?[\s]?و[\s]?ست[وي]ن","67"
"ثماني[هة]?[\s]?و[\s]?ست[وي]ن","68"
"تسع[هة]?[\s]?و[\s]?ست[وي]ن","69"
"سبع[وي]ن","70"
"[اإ]حدى[\s]?و[\s]?سبع[وي]ن","71"
"واحد[\s]?و[\s]?سبع[وي]ن","71"
"[اإ]ثن[ت]?[اي][ن]?[\s]?و[\s]?سبع[وي]ن","72"
"ثلاث[هة]?[\s]?و[\s]?سبع[وي]ن","73"
"[أا]ربع[هة]?[\s]?و[\s]?سبع[وي]ن","74"
"خمس[هة]?[\s]?و[\s]?سبع[وي]ن","75"
"ست[هة]?[\s]?و[\s]?سبع[وي]ن","76"
"سبع[هة]?[\s]?و[\s]?سبع[وي]ن","77"
"ثماني[هة]?[\s]?و[\s]?سبع[وي]ن","78"
"تسع[هة]?[\s]?و[\s]?سبع[وي]ن","79"
"ثمان[وي]ن","80"
"[اإ]حدى[\s]?و[\s]?ثمان[وي]ن","81"
"واحد[\s]?و[\s]?ثمان[وي]ن","81"
"[اإ]ثن[ت]?[اي][ن]?[\s]?و[\s]?ثمان[وي]ن","82"
"ثلاث[هة]?[\s]?و[\s]?ثمان[وي]ن","83"
"[أا]ربع[هة]?[\s]?و[\s]?ثمان[وي]ن","84"
"خمس[هة]?[\s]?و[\s]?ثمان[وي]ن","85"
"ست[هة]?[\s]?و[\s]?ثمان[وي]ن","86"
"سبع[هة]?[\s]?و[\s]?ثمان[وي]ن","87"
"ثماني[هة]?[\s]?و[\s]?ثمان[وي]ن","88"
"تسع[هة]?[\s]?و[\s]?ثمان[وي]ن","89"
"تسع[وي]ن","90"
"[اإ]حدى[\s]?و[\s]?تسع[وي]ن","91"
"واحد[\s]?و[\s]?تسع[وي]ن","91"
"[اإ]ثن[ت]?[اي][ن]?[\s]?و[\s]?تسع[وي]ن","92"
"ثلاث[هة]?[\s]?و[\s]?تسع[وي]ن","93"
"[أا]ربع[هة]?[\s]?و[\s]?تسع[وي]ن","94"
"خمس[هة]?[\s]?و[\s]?تسع[وي]ن","95"
"ست[هة]?[\s]?و[\s]?تسع[وي]ن","96"
"سبع[هة]?[\s]?و[\s]?تسع[وي]ن","97"
"ثماني[هة]?[\s]?و[\s]?تسع[وي]ن","98"
"تسع[هة]?[\s]?و[\s]?تسع[وي]ن","99"
"م[ا]?ئ[هة]","1"
"م[ا]?ئت[اي][ن]?","2"
"ثلاث[هة]?[\s]?م[ا]?ئ[هة]","3"
"[اأ]ربع[هة]?[\s]?م[ا]?ئ[هة],"4"
"خمس[هة]?[\s]?م[ا]?ئ[هة]","5"
"ست[هة]?[\s]?م[ا]?ئ[هة]","6"
"سبع[هة]?[\s]?م[ا]?ئ[هة]","7"
"ثماني[هة]?[\s]?م[ا]?ئ[هة]","8"
"تسع[هة]?[\s]?م[ا]?ئ[هة]","9"
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// author: Jannik Strötgen
// email: [email protected]
// date: 2011-06-10
// This file contains "date words" and their normalized expressions
// according to TIMEX3 format.
// For example, the normalized value of "tomorrow" is "UNDEF-next-day"
// FORMAT: "date-word","normalized-date-word"
"[اأ]ول [اأ]مس","UNDEF-this-day-MINUS-2"
"[اأ]لبارح[هة]","UNDEF-last-day"
"(ب)?[اأ](لأ)?(لا)?مس","UNDEF-last-day"
"[اأ]ليوم","UNDEF-this-day"
"[اأ][ل]?غد","UNDEF-next-day"
"غدا","UNDEF-next-day"
"[اأ]ل[اآ]ن","PRESENT_REF"
"في اللحظ[هة]","PRESENT_REF"
"في [اأ]قرب وقت","FUTURE_REF"
"قريبا[ً]?","FUTURE_REF"
"حديثا[ً]?","PRESENT_REF"
"حاليا[ً]?","PRESENT_REF"
"سابقا[ً]?","PAST_REF"
"في الماضي","PAST_REF"
"في السابق","PAST_REF"
"مؤخرا[ً]?","PAST_REF"
"آنذاك","PAST_REF"
"في الراهن","PRESENT_REF"
"الوقت الراهن","PRESENT_REF"
"الوقت الماضي","PAST_REF"
"الوقت السابق","PAST_REF"
"الوقت الحالي","PRESENT_REF"
"هذه ال[اأ]يام","PRESENT_REF"
"ال[اأ]يام ال[أا]خير[هة]","PAST_REF"

Loading

0 comments on commit ceaba2e

Please sign in to comment.