2 * ============LICENSE_START=======================================================
3 * Copyright (C) 2024 Ericsson
4 * Modifications Copyright (C) 2024 OpenInfra Foundation Europe
5 * ================================================================================
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 * SPDX-License-Identifier: Apache-2.0
19 * ============LICENSE_END=========================================================
21 package org.oran.smo.yangtools.parser.model.util;
23 public abstract class PatternHelper {
26 * Does a basic translation of a YANG REGEX (which is of XML schema flavour) to a Java REGEX flavour.
27 * Does not handle category escape \p{X} or block escape \p{Is}
29 * See annex F of https://www.w3.org/TR/2004/REC-xmlschema-2-20041028/
31 public static String toJavaPatternString(final String input) {
34 * Notable differences between XSD REGEX syntax and Java REGEX syntax:
36 * The '^' and '$' are not used as head/tail anchors, so are interpreted as literals. But the ^ is
37 * used as negation in character classes.
39 * In XSD REGEX, meta-characters are either . \ ? * + { } ( ) [ ]
41 * All of these must be escaped with the backslash \ to use these as literals. The following must
42 * also be escaped to arrive at a literal:
44 * \n \r \t \\ \| \- \^
46 * Note that . refers to any character BUT NOT \n \r - this is markedly different from Java.
49 String result = cleanDollar(input);
50 result = cleanRoof(result);
53 * We are not handling all of the category escape \p{X} or block escape \p{Is} - this is quite complex
54 * and so far these have never been seen in Yang 'pattern' statement.
60 protected static String cleanDollar(final String input) {
62 if (!input.contains("$")) {
67 * The $ character has no special meaning in XSD REGEX syntax. It is a literal. It should never be encountered
68 * in escaped form in YANG.
70 * In Java, it denotes line-end - so we need to escape any $ character that we find to make it a literal.
73 return input.replace("$", "\\$");
76 protected static String cleanRoof(final String input) {
78 if (!input.contains("^")) {
83 * The ^ character has special meaning in XSD REGEX syntax only inside character classes, for example:
87 * In all other cases, it is a literal.
89 * In Java, it is also used inside character classes, but is also used to denote line-start. So if we
90 * encounter the ^ character we escape it (unless at the start of a character class).
93 final StringBuilder sb = new StringBuilder(input.length());
95 for (int i = 0, len = input.length(); i < len; ++i) {
96 final char c = input.charAt(i);
97 if (c == '^' && (i == 0 || (i > 0 && input.charAt(i - 1) != '['))) {
104 return sb.toString();