forked from marklogic-community/commons
-
Notifications
You must be signed in to change notification settings - Fork 0
/
query-xml.xqy
210 lines (202 loc) · 5.69 KB
/
query-xml.xqy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
(:~
: Mark Logic Search String to XML Utility
:
: Copyright 2008 Ryan Grimm
:
: Licensed under the Apache License, Version 2.0 (the "License");
: you may not use this file except in compliance with the License.
: You may obtain a copy of the License at
:
: http://www.apache.org/licenses/LICENSE-2.0
:
: Unless required by applicable law or agreed to in writing, software
: distributed under the License is distributed on an "AS IS" BASIS,
: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
: See the License for the specific language governing permissions and
: limitations under the License.
:
: @author Ryan Grimm ([email protected])
: @version 0.5
:
:)
module "http://marklogic.com/commons/query-xml"
declare namespace stox = "http://marklogic.com/commons/query-xml"
default function namespace = "http://www.w3.org/2003/05/xpath-functions"
(:~
: Takes a search string as the input and returns a xml document that can be
: used in constructing a cts:search()
:
: @param $fields list of fields that you would like to be parsed. For example,
: if you were google you would set the fields to something like:
: ("link", "site", "filetype", "allintitle", "allintext", "allinurl", "allinanchor")
:
: @param $operators list of operators that you would like to look for at the
: beginning of each search term. If you would like to support negation and
: thesaurus lookups you could set the operators to:
: ("-", "~")
: Note: The operators can be any character but can only be one character long.
:
: @param $modes list of modes that you would like to have parsed out. This is handy
: when you want to enable searches that use words like "OR" or "AND" or "NOT".
: The two terms that are on either side of the mode will get a mode attribute
: assinged to them. The downside is that in some cases you might not know how to
: group the modes when constructing your query.
:
: @return A xml document that simplifies constructing a query
:
:)
define function stox:searchToXml(
$search as xs:string,
$fields as xs:string*,
$operators as xs:string*,
$modes as xs:string*,
$pops as xs:string*
) as element(search)
{
stox:_collapse(
let $newsearch := string-join(
if (count(tokenize($search, '"')) > 2)
then
for $i at $count in tokenize($search, '"')
return
if ($count mod 2 = 0)
then replace($i, "\s+", "!+!")
else $i
else $search, '')
let $terms := tokenize($newsearch, "\s+")
for $term at $count in $terms
let $tokens := tokenize($term, ":")
let $rawToken := stox:_stripOps($tokens[1], $operators, $pops)
where not($term = $modes)
return
if (count($tokens) > 1)
then
if ($fields[. = $rawToken])
then <term>{ (
stox:_getMode($modes, $terms, $count)
,
stox:_getOp($tokens[1], $operators)
,
stox:_getPop($tokens[2], $pops)
,
attribute { "field" } { stox:_stripOps($tokens[1], $operators, $pops) }
,
replace(string-join($tokens[2 to count($tokens)], ":"), "!\+!", " ")
) }</term>
else <term>{ (
stox:_getMode($modes, $terms, $count)
,
stox:_getOp($tokens[1], $operators)
,
stox:_getPop($tokens[1], $pops)
,
stox:_stripOps(replace(stox:_stripOps(string-join($tokens, ":"), $operators, $pops), "!\+!", " "), $operators, $pops) )
}</term>
else if ($tokens[1])
then <term>{ (
stox:_getMode($modes, $terms, $count)
,
stox:_getOp($tokens[1], $operators)
,
stox:_getPop($tokens[1], $pops)
,
replace(stox:_stripOps($tokens[1], $operators, $pops), "!\+!", " ")
) }</term>
else ()
)
}
define function stox:_getMode(
$modes as xs:string*,
$terms as xs:string*,
$index as xs:integer
) as attribute()?
{
if($terms[$index - 1] = $modes or $terms[$index + 1] = $modes)
then attribute mode {
let $prev := $terms[$index - 1]
let $next := $terms[$index + 1]
return
if($next = $modes)
then $next
else $prev
}
else ()
}
(:~
: Returns a 'op' attribute if the first character of the given term has one
: of the specified operators
:
: @param $term the search term to get the operator from
:
: @param $operators list of operators that you would like to look for in the term
:
: @return a 'op' attrubute if the first character of the given term has one of
: the specified operators
:)
define function stox:_getOp(
$term as xs:string,
$ops as xs:string*
) as attribute()*
{
let $op := substring($term, 1, 1)
return
if ($op = $ops)
then attribute op { $op }
else ()
}
define function stox:_getPop(
$term as xs:string,
$pops as xs:string*
) as attribute()*
{
let $pop := substring($term, string-length($term), 1)
return
if ($pop = $pops)
then attribute postop { $pop }
else ()
}
(:~
: Removes the leading operator from the term if it exists
:
: @param $term the search term to strip the operator from
:
: @param $operators list of operators that you would like to look for in the term
:
: @return the term with the operator removed if it exists
:)
define function stox:_stripOps(
$term as xs:string,
$ops as xs:string*,
$pops as xs:string
) as xs:string
{
let $op := substring($term, 1, 1)
let $pop := substring($term, string-length($term), 1)
let $stripedOp :=
if ($op = $ops)
then substring($term, 2)
else $term
return
if ($pop = $pops)
then substring($stripedOp, 1, string-length($stripedOp) - 1)
else $stripedOp
}
define function stox:_collapse(
$terms as element(term)*
) as element(search)
{
<search>{
let $pos := 0
for $term in $terms
let $set := xdmp:set($pos, $pos + 1)
let $term := $terms[$pos]
return
if(exists($term/@field) and string-length($term) = 0)
then <term>{ (
$term/@*,
string($terms[$pos + 1]),
xdmp:set($pos, $pos + 1)
) }</term>
else $terms[$pos]
}</search>
}